commit 63b2f6358e4bfb756076bb53195eb60bb5028640
parent eaf9a863d705bf0ae62d6eda5346205a0480df7c
Author: Matsuda Kenji <info@mtkn.jp>
Date: Sat, 1 Apr 2023 09:18:20 +0900
add bincrc
Diffstat:
M | Makefile | | | 23 | +++++++++++++++++++---- |
A | bincrc.c | | | 85 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | boot2/boot2.S | | | 66 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | boot2/boot2_w25q.S | | | 287 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | boot2/bs2_default.bin | | | 0 | |
5 files changed, 457 insertions(+), 4 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,6 +1,7 @@
AS = arm-none-eabi-as
LD = arm-none-eabi-ld
CC = arm-none-eabi-gcc
+OBJCOPY = arm-none-eabi-objcopy
ELF2UF2 = ../pico-sdk/build/elf2uf2/elf2uf2
MCPU = -mcpu=cortex-m0plus
@@ -15,12 +16,24 @@ clean:
rm -f *.o
rm -f *.elf
rm -f *.uf2
+ rm -f bincrc
+ rm -f boot2/*.o
+# rm -f boot2/*.bin
+ rm -f boot2/boot2_crc.S
start.o: start.s
$(AS) $(ASFLAGS) -o start.o start.s
-boot2.o: boot2/bs2_default_padded_checksummed.S
- $(AS) $(ASFLAGS) -o boot2.o boot2/bs2_default_padded_checksummed.S
+#boot2_crc.o: boot2/bs2_default_padded_checksummed.S
+# $(AS) $(ASFLAGS) -o boot2_crc.o boot2/bs2_default_padded_checksummed.S
+boot2/boot2_crc.S: boot2/bs2_default.bin
+ ./bincrc boot2/bs2_default.bin boot2/boot2_crc.S
+boot2_crc.o: boot2/boot2_crc.S
+ $(AS) $(ASFLAGS) -o boot2_crc.o boot2/boot2_crc.S
+#boot2/boot2_crc.S: boot2/boot2.S bincrc
+# $(AS) $(ASFLAGS) -o boot2/boot2.o boot2/boot2.S
+# $(OBJCOPY) -O binary boot2/boot2.o boot2/boot2.bin
+# ./bincrc boot2/boot2.bin boot2/boot2_crc.S
main.o: main.c
$(CC) $(CFLAGS) -o main.o main.c
@@ -28,8 +41,8 @@ main.o: main.c
as.o: mach.s
$(AS) $(ASFLAGS) -o as.o mach.s
-led.elf: start.o boot2.o main.o as.o
- $(LD) $(LFLAGS) -o led.elf -T memmap.ld start.o boot2.o as.o main.o
+led.elf: start.o boot2_crc.o main.o as.o
+ $(LD) $(LFLAGS) -o led.elf -T memmap.ld start.o boot2_crc.o as.o main.o
led.uf2: led.elf
$(ELF2UF2) led.elf led.uf2
@@ -38,3 +51,5 @@ flash: led.uf2
mount /dev/disk/by-label/RPI-RP2 /mnt
cp led.uf2 /mnt
+bincrc: bincrc.c
+ tcc -o bincrc bincrc.c
diff --git a/bincrc.c b/bincrc.c
@@ -0,0 +1,85 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+
+void
+printb(uint32_t i)
+{
+ for (int j = 31; j >= 0; j--) {
+ if (i >> j & 1) {
+ printf("1");
+ } else {
+ printf("0");
+ }
+ }
+ printf("\n");
+}
+
+uint32_t
+crc32(uint8_t *idata, size_t len)
+{
+ uint32_t pol = 0x04C11DB7;
+ uint32_t c = 0xFFFFFFFF;
+ uint32_t b;
+
+ for (int i = 0; i < len; i++) {
+ b = idata[i] << 24;
+ c ^= b;
+ for (int j = 0; j < 8; j++) {
+ c = c >> 31 & 1 ? c << 1 ^ pol : c << 1;
+ }
+ }
+
+ return c;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int src, dst;
+ size_t isize, osize = 256;
+ uint8_t idata[osize];
+ uint32_t cs;
+
+ if(argc != 3) {
+ fprintf(stderr, "Usage: %s src dst\n", argv[0]);
+ exit(1);
+ }
+
+ if((src = open(argv[1], O_RDONLY)) < 0) {
+ fprintf(stderr, "Could not open %s.\n", argv[1]);
+ exit(1);
+ }
+
+ isize = read(src, idata, osize);
+ for (int i = isize; i < osize - 4; i++)
+ idata[i] = 0;
+
+ close(src);
+
+ if((dst = open(argv[2], O_CREAT | O_WRONLY, 0777)) < 0) {
+ fprintf(stderr, "Could not open %s.\n", argv[2]);
+ exit(1);
+ }
+
+ cs = crc32(idata, osize - 4);
+ for (int i = 0; i < 4; i++)
+ idata[osize - 4 + i] = cs >> i * 8;
+
+ dprintf(dst, ".cpu cortex-m0plus\n");
+ dprintf(dst, ".thumb\n\n");
+ dprintf(dst, ".section .boot2, \"ax\"\n\n");
+ for (int i = 0; i < osize / 16; i++) {
+ dprintf(dst, ".byte ");
+ for (int j = 0; j < 16; j++) {
+ dprintf(dst, "0x%02x%s", idata[i * 16 + j],
+ j == 15 ? "\n" : ", ");
+ }
+ }
+
+ close(dst);
+ return 0;
+}
diff --git a/boot2/boot2.S b/boot2/boot2.S
@@ -0,0 +1,66 @@
+.cpu cortex-m0
+.thumb
+
+.equ XIP_BASE, 0x10000000
+.equ XIP_SSI_BASE, 0x18000000
+.equ PPB_BASE, 0xe0000000
+
+.section .text
+
+.global _stage2_boot
+.type _stage2_boot,%function
+.thumb_func
+_stage2_boot:
+ push {lr}
+
+ ldr r3, =XIP_SSI_BASE
+
+ mov r1, #0
+ str r1, [r3, #0x8] // SSI_SSIENR
+
+ mov r1, #0x4
+ str r1, [r3, #0x14] // SSI_BAUDR
+
+ mov r1, #0x0
+ lsl r1, r1, #21
+ mov r2, #31
+ lsl r2, r2, #16
+ add r1, r1, r2
+ mov r2, #0x3
+ lsl r2, r2, #8
+ add r1, r1, r2
+ str r1, [r3, #0x0] // SSI_CTRLR0
+
+ mov r1, #0x03 // read command
+ lsl r1, r1, #24
+ mov r2, #2 // 8bit instruction
+ lsl r2, r2, #8
+ add r1, r1, r2
+ mov r2, #6 // 24bit address length
+ lsl r2, r2, #2
+ add r1, r1, r2
+ ldr r0, =0xf4
+ add r0, r0, r3 // SSI_SPI_CTRLR0
+ str r1, [r0, #0x0]
+
+ mov r1, #0x0
+ str r1, [r3, #0x0] // SSI_CTRLR1_OFFSET
+
+ mov r1, #1
+ str r1, [r3, #0x8] // SSI_SSIENR_OFFSET
+
+ pop {r0}
+ cmp r0, #0
+ beq vector_into_flash
+ bx r0
+vector_into_flash:
+ ldr r0, =XIP_BASE
+ ldr r2, =0x100
+ add r0, r0, r2
+ ldr r1, =PPB_BASE
+ ldr r2, =0xed08 // M0PLUS_VTOR
+ add r1, r1, r2
+ str r0, [r1, #0x0]
+ ldmia r0, {r0, r1}
+ msr msp, r0
+ bx r1
diff --git a/boot2/boot2_w25q.S b/boot2/boot2_w25q.S
@@ -0,0 +1,287 @@
+// ----------------------------------------------------------------------------
+// Second stage boot code
+// Copyright (c) 2019-2021 Raspberry Pi (Trading) Ltd.
+// SPDX-License-Identifier: BSD-3-Clause
+//
+// Device: Winbond W25Q080
+// Also supports W25Q16JV (which has some different SR instructions)
+// Also supports AT25SF081
+// Also supports S25FL132K0
+//
+// Description: Configures W25Q080 to run in Quad I/O continuous read XIP mode
+//
+// Details: * Check status register 2 to determine if QSPI mode is enabled,
+// and perform an SR2 programming cycle if necessary.
+// * Use SSI to perform a dummy 0xEB read command, with the mode
+// continuation bits set, so that the flash will not require
+// 0xEB instruction prefix on subsequent reads.
+// * Configure SSI to write address, mode bits, but no instruction.
+// SSI + flash are now jointly in a state where continuous reads
+// can take place.
+// * Jump to exit pointer passed in via lr. Bootrom passes null,
+// in which case this code uses a default 256 byte flash offset
+//
+// Building: * This code must be position-independent, and use stack only
+// * The code will be padded to a size of 256 bytes, including a
+// 4-byte checksum. Therefore code size cannot exceed 252 bytes.
+// ----------------------------------------------------------------------------
+
+#include "pico/asm_helper.S"
+#include "hardware/regs/addressmap.h"
+#include "hardware/regs/ssi.h"
+#include "hardware/regs/pads_qspi.h"
+
+// ----------------------------------------------------------------------------
+// Config section
+// ----------------------------------------------------------------------------
+// It should be possible to support most flash devices by modifying this section
+
+// The serial flash interface will run at clk_sys/PICO_FLASH_SPI_CLKDIV.
+// This must be a positive, even integer.
+// The bootrom is very conservative with SPI frequency, but here we should be
+// as aggressive as possible.
+
+#ifndef PICO_FLASH_SPI_CLKDIV
+#define PICO_FLASH_SPI_CLKDIV 4
+#endif
+#if PICO_FLASH_SPI_CLKDIV & 1
+#error PICO_FLASH_SPI_CLKDIV must be even
+#endif
+
+// Define interface width: single/dual/quad IO
+#define FRAME_FORMAT SSI_CTRLR0_SPI_FRF_VALUE_QUAD
+
+// For W25Q080 this is the "Read data fast quad IO" instruction:
+#define CMD_READ 0xeb
+
+// "Mode bits" are 8 special bits sent immediately after
+// the address bits in a "Read Data Fast Quad I/O" command sequence.
+// On W25Q080, the four LSBs are don't care, and if MSBs == 0xa, the
+// next read does not require the 0xeb instruction prefix.
+#define MODE_CONTINUOUS_READ 0xa0
+
+// The number of address + mode bits, divided by 4 (always 4, not function of
+// interface width).
+#define ADDR_L 8
+
+// How many clocks of Hi-Z following the mode bits. For W25Q080, 4 dummy cycles
+// are required.
+#define WAIT_CYCLES 4
+
+// If defined, we will read status reg, compare to SREG_DATA, and overwrite
+// with our value if the SR doesn't match.
+// We do a two-byte write to SR1 (01h cmd) rather than a one-byte write to
+// SR2 (31h cmd) as the latter command isn't supported by WX25Q080.
+// This isn't great because it will remove block protections.
+// A better solution is to use a volatile SR write if your device supports it.
+#define PROGRAM_STATUS_REG
+
+#define CMD_WRITE_ENABLE 0x06
+#define CMD_READ_STATUS 0x05
+#define CMD_READ_STATUS2 0x35
+#define CMD_WRITE_STATUS 0x01
+#define SREG_DATA 0x02 // Enable quad-SPI mode
+
+// ----------------------------------------------------------------------------
+// Start of 2nd Stage Boot Code
+// ----------------------------------------------------------------------------
+
+.syntax unified
+.cpu cortex-m0plus
+.thumb
+
+.section .text
+
+// The exit point is passed in lr. If entered from bootrom, this will be the
+// flash address immediately following this second stage (0x10000100).
+// Otherwise it will be a return address -- second stage being called as a
+// function by user code, after copying out of XIP region. r3 holds SSI base,
+// r0...2 used as temporaries. Other GPRs not used.
+.global _stage2_boot
+.type _stage2_boot,%function
+.thumb_func
+_stage2_boot:
+ push {lr}
+
+ // Set pad configuration:
+ // - SCLK 8mA drive, no slew limiting
+ // - SDx disable input Schmitt to reduce delay
+
+ ldr r3, =PADS_QSPI_BASE
+ movs r0, #(2 << PADS_QSPI_GPIO_QSPI_SCLK_DRIVE_LSB | PADS_QSPI_GPIO_QSPI_SCLK_SLEWFAST_BITS)
+ str r0, [r3, #PADS_QSPI_GPIO_QSPI_SCLK_OFFSET]
+ ldr r0, [r3, #PADS_QSPI_GPIO_QSPI_SD0_OFFSET]
+ movs r1, #PADS_QSPI_GPIO_QSPI_SD0_SCHMITT_BITS
+ bics r0, r1
+ str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD0_OFFSET]
+ str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD1_OFFSET]
+ str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD2_OFFSET]
+ str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD3_OFFSET]
+
+ ldr r3, =XIP_SSI_BASE
+
+ // Disable SSI to allow further config
+ movs r1, #0
+ str r1, [r3, #SSI_SSIENR_OFFSET]
+
+ // Set baud rate
+ movs r1, #PICO_FLASH_SPI_CLKDIV
+ str r1, [r3, #SSI_BAUDR_OFFSET]
+
+ // Set 1-cycle sample delay. If PICO_FLASH_SPI_CLKDIV == 2 then this means,
+ // if the flash launches data on SCLK posedge, we capture it at the time that
+ // the next SCLK posedge is launched. This is shortly before that posedge
+ // arrives at the flash, so data hold time should be ok. For
+ // PICO_FLASH_SPI_CLKDIV > 2 this pretty much has no effect.
+
+ movs r1, #1
+ movs r2, #SSI_RX_SAMPLE_DLY_OFFSET // == 0xf0 so need 8 bits of offset significance
+ str r1, [r3, r2]
+
+
+// On QSPI parts we usually need a 01h SR-write command to enable QSPI mode
+// (i.e. turn WPn and HOLDn into IO2/IO3)
+#ifdef PROGRAM_STATUS_REG
+program_sregs:
+#define CTRL0_SPI_TXRX \
+ (7 << SSI_CTRLR0_DFS_32_LSB) | /* 8 bits per data frame */ \
+ (SSI_CTRLR0_TMOD_VALUE_TX_AND_RX << SSI_CTRLR0_TMOD_LSB)
+
+ ldr r1, =(CTRL0_SPI_TXRX)
+ str r1, [r3, #SSI_CTRLR0_OFFSET]
+
+ // Enable SSI and select slave 0
+ movs r1, #1
+ str r1, [r3, #SSI_SSIENR_OFFSET]
+
+ // Check whether SR needs updating
+ movs r0, #CMD_READ_STATUS2
+ bl read_flash_sreg
+ movs r2, #SREG_DATA
+ cmp r0, r2
+ beq skip_sreg_programming
+
+ // Send write enable command
+ movs r1, #CMD_WRITE_ENABLE
+ str r1, [r3, #SSI_DR0_OFFSET]
+
+ // Poll for completion and discard RX
+ bl wait_ssi_ready
+ ldr r1, [r3, #SSI_DR0_OFFSET]
+
+ // Send status write command followed by data bytes
+ movs r1, #CMD_WRITE_STATUS
+ str r1, [r3, #SSI_DR0_OFFSET]
+ movs r0, #0
+ str r0, [r3, #SSI_DR0_OFFSET]
+ str r2, [r3, #SSI_DR0_OFFSET]
+
+ bl wait_ssi_ready
+ ldr r1, [r3, #SSI_DR0_OFFSET]
+ ldr r1, [r3, #SSI_DR0_OFFSET]
+ ldr r1, [r3, #SSI_DR0_OFFSET]
+
+ // Poll status register for write completion
+1:
+ movs r0, #CMD_READ_STATUS
+ bl read_flash_sreg
+ movs r1, #1
+ tst r0, r1
+ bne 1b
+
+skip_sreg_programming:
+
+ // Disable SSI again so that it can be reconfigured
+ movs r1, #0
+ str r1, [r3, #SSI_SSIENR_OFFSET]
+#endif
+
+// Currently the flash expects an 8 bit serial command prefix on every
+// transfer, which is a waste of cycles. Perform a dummy Fast Read Quad I/O
+// command, with mode bits set such that the flash will not expect a serial
+// command prefix on *subsequent* transfers. We don't care about the results
+// of the read, the important part is the mode bits.
+
+dummy_read:
+#define CTRLR0_ENTER_XIP \
+ (FRAME_FORMAT /* Quad I/O mode */ \
+ << SSI_CTRLR0_SPI_FRF_LSB) | \
+ (31 << SSI_CTRLR0_DFS_32_LSB) | /* 32 data bits */ \
+ (SSI_CTRLR0_TMOD_VALUE_EEPROM_READ /* Send INST/ADDR, Receive Data */ \
+ << SSI_CTRLR0_TMOD_LSB)
+
+ ldr r1, =(CTRLR0_ENTER_XIP)
+ str r1, [r3, #SSI_CTRLR0_OFFSET]
+
+ movs r1, #0x0 // NDF=0 (single 32b read)
+ str r1, [r3, #SSI_CTRLR1_OFFSET]
+
+#define SPI_CTRLR0_ENTER_XIP \
+ (ADDR_L << SSI_SPI_CTRLR0_ADDR_L_LSB) | /* Address + mode bits */ \
+ (WAIT_CYCLES << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) | /* Hi-Z dummy clocks following address + mode */ \
+ (SSI_SPI_CTRLR0_INST_L_VALUE_8B \
+ << SSI_SPI_CTRLR0_INST_L_LSB) | /* 8-bit instruction */ \
+ (SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C2A /* Send Command in serial mode then address in Quad I/O mode */ \
+ << SSI_SPI_CTRLR0_TRANS_TYPE_LSB)
+
+ ldr r1, =(SPI_CTRLR0_ENTER_XIP)
+ ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET) // SPI_CTRL0 Register
+ str r1, [r0]
+
+ movs r1, #1 // Re-enable SSI
+ str r1, [r3, #SSI_SSIENR_OFFSET]
+
+ movs r1, #CMD_READ
+ str r1, [r3, #SSI_DR0_OFFSET] // Push SPI command into TX FIFO
+ movs r1, #MODE_CONTINUOUS_READ // 32-bit: 24 address bits (we don't care, so 0) and M[7:4]=1010
+ str r1, [r3, #SSI_DR0_OFFSET] // Push Address into TX FIFO - this will trigger the transaction
+
+ // Poll for completion
+ bl wait_ssi_ready
+
+// The flash is in a state where we can blast addresses in parallel, and get
+// parallel data back. Now configure the SSI to translate XIP bus accesses
+// into QSPI transfers of this form.
+
+ movs r1, #0
+ str r1, [r3, #SSI_SSIENR_OFFSET] // Disable SSI (and clear FIFO) to allow further config
+
+// Note that the INST_L field is used to select what XIP data gets pushed into
+// the TX FIFO:
+// INST_L_0_BITS {ADDR[23:0],XIP_CMD[7:0]} Load "mode bits" into XIP_CMD
+// Anything else {XIP_CMD[7:0],ADDR[23:0]} Load SPI command into XIP_CMD
+configure_ssi:
+#define SPI_CTRLR0_XIP \
+ (MODE_CONTINUOUS_READ /* Mode bits to keep flash in continuous read mode */ \
+ << SSI_SPI_CTRLR0_XIP_CMD_LSB) | \
+ (ADDR_L << SSI_SPI_CTRLR0_ADDR_L_LSB) | /* Total number of address + mode bits */ \
+ (WAIT_CYCLES << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) | /* Hi-Z dummy clocks following address + mode */ \
+ (SSI_SPI_CTRLR0_INST_L_VALUE_NONE /* Do not send a command, instead send XIP_CMD as mode bits after address */ \
+ << SSI_SPI_CTRLR0_INST_L_LSB) | \
+ (SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_2C2A /* Send Address in Quad I/O mode (and Command but that is zero bits long) */ \
+ << SSI_SPI_CTRLR0_TRANS_TYPE_LSB)
+
+ ldr r1, =(SPI_CTRLR0_XIP)
+ ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET)
+ str r1, [r0]
+
+ movs r1, #1
+ str r1, [r3, #SSI_SSIENR_OFFSET] // Re-enable SSI
+
+// Bus accesses to the XIP window will now be transparently serviced by the
+// external flash on cache miss. We are ready to run code from flash.
+
+// Pull in standard exit routine
+#include "boot2_helpers/exit_from_boot2.S"
+
+// Common functions
+#include "boot2_helpers/wait_ssi_ready.S"
+#ifdef PROGRAM_STATUS_REG
+#include "boot2_helpers/read_flash_sreg.S"
+#endif
+
+.global literals
+literals:
+.ltorg
+
+.end
diff --git a/boot2/bs2_default.bin b/boot2/bs2_default.bin
Binary files differ.