rp2040

RP2040 Programming without SDK
Log | Files | Refs

commit 63b2f6358e4bfb756076bb53195eb60bb5028640
parent eaf9a863d705bf0ae62d6eda5346205a0480df7c
Author: Matsuda Kenji <info@mtkn.jp>
Date:   Sat,  1 Apr 2023 09:18:20 +0900

add bincrc

Diffstat:
MMakefile | 23+++++++++++++++++++----
Abincrc.c | 85+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aboot2/boot2.S | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aboot2/boot2_w25q.S | 287+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aboot2/bs2_default.bin | 0
5 files changed, 457 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,6 +1,7 @@ AS = arm-none-eabi-as LD = arm-none-eabi-ld CC = arm-none-eabi-gcc +OBJCOPY = arm-none-eabi-objcopy ELF2UF2 = ../pico-sdk/build/elf2uf2/elf2uf2 MCPU = -mcpu=cortex-m0plus @@ -15,12 +16,24 @@ clean: rm -f *.o rm -f *.elf rm -f *.uf2 + rm -f bincrc + rm -f boot2/*.o +# rm -f boot2/*.bin + rm -f boot2/boot2_crc.S start.o: start.s $(AS) $(ASFLAGS) -o start.o start.s -boot2.o: boot2/bs2_default_padded_checksummed.S - $(AS) $(ASFLAGS) -o boot2.o boot2/bs2_default_padded_checksummed.S +#boot2_crc.o: boot2/bs2_default_padded_checksummed.S +# $(AS) $(ASFLAGS) -o boot2_crc.o boot2/bs2_default_padded_checksummed.S +boot2/boot2_crc.S: boot2/bs2_default.bin + ./bincrc boot2/bs2_default.bin boot2/boot2_crc.S +boot2_crc.o: boot2/boot2_crc.S + $(AS) $(ASFLAGS) -o boot2_crc.o boot2/boot2_crc.S +#boot2/boot2_crc.S: boot2/boot2.S bincrc +# $(AS) $(ASFLAGS) -o boot2/boot2.o boot2/boot2.S +# $(OBJCOPY) -O binary boot2/boot2.o boot2/boot2.bin +# ./bincrc boot2/boot2.bin boot2/boot2_crc.S main.o: main.c $(CC) $(CFLAGS) -o main.o main.c @@ -28,8 +41,8 @@ main.o: main.c as.o: mach.s $(AS) $(ASFLAGS) -o as.o mach.s -led.elf: start.o boot2.o main.o as.o - $(LD) $(LFLAGS) -o led.elf -T memmap.ld start.o boot2.o as.o main.o +led.elf: start.o boot2_crc.o main.o as.o + $(LD) $(LFLAGS) -o led.elf -T memmap.ld start.o boot2_crc.o as.o main.o led.uf2: led.elf $(ELF2UF2) led.elf led.uf2 @@ -38,3 +51,5 @@ flash: led.uf2 mount /dev/disk/by-label/RPI-RP2 /mnt cp led.uf2 /mnt +bincrc: bincrc.c + tcc -o bincrc bincrc.c diff --git a/bincrc.c b/bincrc.c @@ -0,0 +1,85 @@ +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <fcntl.h> +#include <unistd.h> + + +void +printb(uint32_t i) +{ + for (int j = 31; j >= 0; j--) { + if (i >> j & 1) { + printf("1"); + } else { + printf("0"); + } + } + printf("\n"); +} + +uint32_t +crc32(uint8_t *idata, size_t len) +{ + uint32_t pol = 0x04C11DB7; + uint32_t c = 0xFFFFFFFF; + uint32_t b; + + for (int i = 0; i < len; i++) { + b = idata[i] << 24; + c ^= b; + for (int j = 0; j < 8; j++) { + c = c >> 31 & 1 ? c << 1 ^ pol : c << 1; + } + } + + return c; +} + +int +main(int argc, char *argv[]) +{ + int src, dst; + size_t isize, osize = 256; + uint8_t idata[osize]; + uint32_t cs; + + if(argc != 3) { + fprintf(stderr, "Usage: %s src dst\n", argv[0]); + exit(1); + } + + if((src = open(argv[1], O_RDONLY)) < 0) { + fprintf(stderr, "Could not open %s.\n", argv[1]); + exit(1); + } + + isize = read(src, idata, osize); + for (int i = isize; i < osize - 4; i++) + idata[i] = 0; + + close(src); + + if((dst = open(argv[2], O_CREAT | O_WRONLY, 0777)) < 0) { + fprintf(stderr, "Could not open %s.\n", argv[2]); + exit(1); + } + + cs = crc32(idata, osize - 4); + for (int i = 0; i < 4; i++) + idata[osize - 4 + i] = cs >> i * 8; + + dprintf(dst, ".cpu cortex-m0plus\n"); + dprintf(dst, ".thumb\n\n"); + dprintf(dst, ".section .boot2, \"ax\"\n\n"); + for (int i = 0; i < osize / 16; i++) { + dprintf(dst, ".byte "); + for (int j = 0; j < 16; j++) { + dprintf(dst, "0x%02x%s", idata[i * 16 + j], + j == 15 ? "\n" : ", "); + } + } + + close(dst); + return 0; +} diff --git a/boot2/boot2.S b/boot2/boot2.S @@ -0,0 +1,66 @@ +.cpu cortex-m0 +.thumb + +.equ XIP_BASE, 0x10000000 +.equ XIP_SSI_BASE, 0x18000000 +.equ PPB_BASE, 0xe0000000 + +.section .text + +.global _stage2_boot +.type _stage2_boot,%function +.thumb_func +_stage2_boot: + push {lr} + + ldr r3, =XIP_SSI_BASE + + mov r1, #0 + str r1, [r3, #0x8] // SSI_SSIENR + + mov r1, #0x4 + str r1, [r3, #0x14] // SSI_BAUDR + + mov r1, #0x0 + lsl r1, r1, #21 + mov r2, #31 + lsl r2, r2, #16 + add r1, r1, r2 + mov r2, #0x3 + lsl r2, r2, #8 + add r1, r1, r2 + str r1, [r3, #0x0] // SSI_CTRLR0 + + mov r1, #0x03 // read command + lsl r1, r1, #24 + mov r2, #2 // 8bit instruction + lsl r2, r2, #8 + add r1, r1, r2 + mov r2, #6 // 24bit address length + lsl r2, r2, #2 + add r1, r1, r2 + ldr r0, =0xf4 + add r0, r0, r3 // SSI_SPI_CTRLR0 + str r1, [r0, #0x0] + + mov r1, #0x0 + str r1, [r3, #0x0] // SSI_CTRLR1_OFFSET + + mov r1, #1 + str r1, [r3, #0x8] // SSI_SSIENR_OFFSET + + pop {r0} + cmp r0, #0 + beq vector_into_flash + bx r0 +vector_into_flash: + ldr r0, =XIP_BASE + ldr r2, =0x100 + add r0, r0, r2 + ldr r1, =PPB_BASE + ldr r2, =0xed08 // M0PLUS_VTOR + add r1, r1, r2 + str r0, [r1, #0x0] + ldmia r0, {r0, r1} + msr msp, r0 + bx r1 diff --git a/boot2/boot2_w25q.S b/boot2/boot2_w25q.S @@ -0,0 +1,287 @@ +// ---------------------------------------------------------------------------- +// Second stage boot code +// Copyright (c) 2019-2021 Raspberry Pi (Trading) Ltd. +// SPDX-License-Identifier: BSD-3-Clause +// +// Device: Winbond W25Q080 +// Also supports W25Q16JV (which has some different SR instructions) +// Also supports AT25SF081 +// Also supports S25FL132K0 +// +// Description: Configures W25Q080 to run in Quad I/O continuous read XIP mode +// +// Details: * Check status register 2 to determine if QSPI mode is enabled, +// and perform an SR2 programming cycle if necessary. +// * Use SSI to perform a dummy 0xEB read command, with the mode +// continuation bits set, so that the flash will not require +// 0xEB instruction prefix on subsequent reads. +// * Configure SSI to write address, mode bits, but no instruction. +// SSI + flash are now jointly in a state where continuous reads +// can take place. +// * Jump to exit pointer passed in via lr. Bootrom passes null, +// in which case this code uses a default 256 byte flash offset +// +// Building: * This code must be position-independent, and use stack only +// * The code will be padded to a size of 256 bytes, including a +// 4-byte checksum. Therefore code size cannot exceed 252 bytes. +// ---------------------------------------------------------------------------- + +#include "pico/asm_helper.S" +#include "hardware/regs/addressmap.h" +#include "hardware/regs/ssi.h" +#include "hardware/regs/pads_qspi.h" + +// ---------------------------------------------------------------------------- +// Config section +// ---------------------------------------------------------------------------- +// It should be possible to support most flash devices by modifying this section + +// The serial flash interface will run at clk_sys/PICO_FLASH_SPI_CLKDIV. +// This must be a positive, even integer. +// The bootrom is very conservative with SPI frequency, but here we should be +// as aggressive as possible. + +#ifndef PICO_FLASH_SPI_CLKDIV +#define PICO_FLASH_SPI_CLKDIV 4 +#endif +#if PICO_FLASH_SPI_CLKDIV & 1 +#error PICO_FLASH_SPI_CLKDIV must be even +#endif + +// Define interface width: single/dual/quad IO +#define FRAME_FORMAT SSI_CTRLR0_SPI_FRF_VALUE_QUAD + +// For W25Q080 this is the "Read data fast quad IO" instruction: +#define CMD_READ 0xeb + +// "Mode bits" are 8 special bits sent immediately after +// the address bits in a "Read Data Fast Quad I/O" command sequence. +// On W25Q080, the four LSBs are don't care, and if MSBs == 0xa, the +// next read does not require the 0xeb instruction prefix. +#define MODE_CONTINUOUS_READ 0xa0 + +// The number of address + mode bits, divided by 4 (always 4, not function of +// interface width). +#define ADDR_L 8 + +// How many clocks of Hi-Z following the mode bits. For W25Q080, 4 dummy cycles +// are required. +#define WAIT_CYCLES 4 + +// If defined, we will read status reg, compare to SREG_DATA, and overwrite +// with our value if the SR doesn't match. +// We do a two-byte write to SR1 (01h cmd) rather than a one-byte write to +// SR2 (31h cmd) as the latter command isn't supported by WX25Q080. +// This isn't great because it will remove block protections. +// A better solution is to use a volatile SR write if your device supports it. +#define PROGRAM_STATUS_REG + +#define CMD_WRITE_ENABLE 0x06 +#define CMD_READ_STATUS 0x05 +#define CMD_READ_STATUS2 0x35 +#define CMD_WRITE_STATUS 0x01 +#define SREG_DATA 0x02 // Enable quad-SPI mode + +// ---------------------------------------------------------------------------- +// Start of 2nd Stage Boot Code +// ---------------------------------------------------------------------------- + +.syntax unified +.cpu cortex-m0plus +.thumb + +.section .text + +// The exit point is passed in lr. If entered from bootrom, this will be the +// flash address immediately following this second stage (0x10000100). +// Otherwise it will be a return address -- second stage being called as a +// function by user code, after copying out of XIP region. r3 holds SSI base, +// r0...2 used as temporaries. Other GPRs not used. +.global _stage2_boot +.type _stage2_boot,%function +.thumb_func +_stage2_boot: + push {lr} + + // Set pad configuration: + // - SCLK 8mA drive, no slew limiting + // - SDx disable input Schmitt to reduce delay + + ldr r3, =PADS_QSPI_BASE + movs r0, #(2 << PADS_QSPI_GPIO_QSPI_SCLK_DRIVE_LSB | PADS_QSPI_GPIO_QSPI_SCLK_SLEWFAST_BITS) + str r0, [r3, #PADS_QSPI_GPIO_QSPI_SCLK_OFFSET] + ldr r0, [r3, #PADS_QSPI_GPIO_QSPI_SD0_OFFSET] + movs r1, #PADS_QSPI_GPIO_QSPI_SD0_SCHMITT_BITS + bics r0, r1 + str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD0_OFFSET] + str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD1_OFFSET] + str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD2_OFFSET] + str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD3_OFFSET] + + ldr r3, =XIP_SSI_BASE + + // Disable SSI to allow further config + movs r1, #0 + str r1, [r3, #SSI_SSIENR_OFFSET] + + // Set baud rate + movs r1, #PICO_FLASH_SPI_CLKDIV + str r1, [r3, #SSI_BAUDR_OFFSET] + + // Set 1-cycle sample delay. If PICO_FLASH_SPI_CLKDIV == 2 then this means, + // if the flash launches data on SCLK posedge, we capture it at the time that + // the next SCLK posedge is launched. This is shortly before that posedge + // arrives at the flash, so data hold time should be ok. For + // PICO_FLASH_SPI_CLKDIV > 2 this pretty much has no effect. + + movs r1, #1 + movs r2, #SSI_RX_SAMPLE_DLY_OFFSET // == 0xf0 so need 8 bits of offset significance + str r1, [r3, r2] + + +// On QSPI parts we usually need a 01h SR-write command to enable QSPI mode +// (i.e. turn WPn and HOLDn into IO2/IO3) +#ifdef PROGRAM_STATUS_REG +program_sregs: +#define CTRL0_SPI_TXRX \ + (7 << SSI_CTRLR0_DFS_32_LSB) | /* 8 bits per data frame */ \ + (SSI_CTRLR0_TMOD_VALUE_TX_AND_RX << SSI_CTRLR0_TMOD_LSB) + + ldr r1, =(CTRL0_SPI_TXRX) + str r1, [r3, #SSI_CTRLR0_OFFSET] + + // Enable SSI and select slave 0 + movs r1, #1 + str r1, [r3, #SSI_SSIENR_OFFSET] + + // Check whether SR needs updating + movs r0, #CMD_READ_STATUS2 + bl read_flash_sreg + movs r2, #SREG_DATA + cmp r0, r2 + beq skip_sreg_programming + + // Send write enable command + movs r1, #CMD_WRITE_ENABLE + str r1, [r3, #SSI_DR0_OFFSET] + + // Poll for completion and discard RX + bl wait_ssi_ready + ldr r1, [r3, #SSI_DR0_OFFSET] + + // Send status write command followed by data bytes + movs r1, #CMD_WRITE_STATUS + str r1, [r3, #SSI_DR0_OFFSET] + movs r0, #0 + str r0, [r3, #SSI_DR0_OFFSET] + str r2, [r3, #SSI_DR0_OFFSET] + + bl wait_ssi_ready + ldr r1, [r3, #SSI_DR0_OFFSET] + ldr r1, [r3, #SSI_DR0_OFFSET] + ldr r1, [r3, #SSI_DR0_OFFSET] + + // Poll status register for write completion +1: + movs r0, #CMD_READ_STATUS + bl read_flash_sreg + movs r1, #1 + tst r0, r1 + bne 1b + +skip_sreg_programming: + + // Disable SSI again so that it can be reconfigured + movs r1, #0 + str r1, [r3, #SSI_SSIENR_OFFSET] +#endif + +// Currently the flash expects an 8 bit serial command prefix on every +// transfer, which is a waste of cycles. Perform a dummy Fast Read Quad I/O +// command, with mode bits set such that the flash will not expect a serial +// command prefix on *subsequent* transfers. We don't care about the results +// of the read, the important part is the mode bits. + +dummy_read: +#define CTRLR0_ENTER_XIP \ + (FRAME_FORMAT /* Quad I/O mode */ \ + << SSI_CTRLR0_SPI_FRF_LSB) | \ + (31 << SSI_CTRLR0_DFS_32_LSB) | /* 32 data bits */ \ + (SSI_CTRLR0_TMOD_VALUE_EEPROM_READ /* Send INST/ADDR, Receive Data */ \ + << SSI_CTRLR0_TMOD_LSB) + + ldr r1, =(CTRLR0_ENTER_XIP) + str r1, [r3, #SSI_CTRLR0_OFFSET] + + movs r1, #0x0 // NDF=0 (single 32b read) + str r1, [r3, #SSI_CTRLR1_OFFSET] + +#define SPI_CTRLR0_ENTER_XIP \ + (ADDR_L << SSI_SPI_CTRLR0_ADDR_L_LSB) | /* Address + mode bits */ \ + (WAIT_CYCLES << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) | /* Hi-Z dummy clocks following address + mode */ \ + (SSI_SPI_CTRLR0_INST_L_VALUE_8B \ + << SSI_SPI_CTRLR0_INST_L_LSB) | /* 8-bit instruction */ \ + (SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C2A /* Send Command in serial mode then address in Quad I/O mode */ \ + << SSI_SPI_CTRLR0_TRANS_TYPE_LSB) + + ldr r1, =(SPI_CTRLR0_ENTER_XIP) + ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET) // SPI_CTRL0 Register + str r1, [r0] + + movs r1, #1 // Re-enable SSI + str r1, [r3, #SSI_SSIENR_OFFSET] + + movs r1, #CMD_READ + str r1, [r3, #SSI_DR0_OFFSET] // Push SPI command into TX FIFO + movs r1, #MODE_CONTINUOUS_READ // 32-bit: 24 address bits (we don't care, so 0) and M[7:4]=1010 + str r1, [r3, #SSI_DR0_OFFSET] // Push Address into TX FIFO - this will trigger the transaction + + // Poll for completion + bl wait_ssi_ready + +// The flash is in a state where we can blast addresses in parallel, and get +// parallel data back. Now configure the SSI to translate XIP bus accesses +// into QSPI transfers of this form. + + movs r1, #0 + str r1, [r3, #SSI_SSIENR_OFFSET] // Disable SSI (and clear FIFO) to allow further config + +// Note that the INST_L field is used to select what XIP data gets pushed into +// the TX FIFO: +// INST_L_0_BITS {ADDR[23:0],XIP_CMD[7:0]} Load "mode bits" into XIP_CMD +// Anything else {XIP_CMD[7:0],ADDR[23:0]} Load SPI command into XIP_CMD +configure_ssi: +#define SPI_CTRLR0_XIP \ + (MODE_CONTINUOUS_READ /* Mode bits to keep flash in continuous read mode */ \ + << SSI_SPI_CTRLR0_XIP_CMD_LSB) | \ + (ADDR_L << SSI_SPI_CTRLR0_ADDR_L_LSB) | /* Total number of address + mode bits */ \ + (WAIT_CYCLES << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) | /* Hi-Z dummy clocks following address + mode */ \ + (SSI_SPI_CTRLR0_INST_L_VALUE_NONE /* Do not send a command, instead send XIP_CMD as mode bits after address */ \ + << SSI_SPI_CTRLR0_INST_L_LSB) | \ + (SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_2C2A /* Send Address in Quad I/O mode (and Command but that is zero bits long) */ \ + << SSI_SPI_CTRLR0_TRANS_TYPE_LSB) + + ldr r1, =(SPI_CTRLR0_XIP) + ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET) + str r1, [r0] + + movs r1, #1 + str r1, [r3, #SSI_SSIENR_OFFSET] // Re-enable SSI + +// Bus accesses to the XIP window will now be transparently serviced by the +// external flash on cache miss. We are ready to run code from flash. + +// Pull in standard exit routine +#include "boot2_helpers/exit_from_boot2.S" + +// Common functions +#include "boot2_helpers/wait_ssi_ready.S" +#ifdef PROGRAM_STATUS_REG +#include "boot2_helpers/read_flash_sreg.S" +#endif + +.global literals +literals: +.ltorg + +.end diff --git a/boot2/bs2_default.bin b/boot2/bs2_default.bin Binary files differ.