From fe6b92f8b700db5310164339e71110953a59e110 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Sun, 2 Mar 2025 13:38:44 -0700 Subject: libhw: w5500: Re-add SPI validation This reverts commit 063f263f84d517c6497e7ca37f503956bad7075a. --- libhw/w5500.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/libhw/w5500.c b/libhw/w5500.c index b4ad86b..fafc846 100644 --- a/libhw/w5500.c +++ b/libhw/w5500.c @@ -312,6 +312,15 @@ void _w5500_init(struct w5500 *chip, }; } + /* Validate that SPI works correctly. */ + for (uint16_t a = 0; a < 0x100; a++) { + w5500ll_write_sock_reg(chip->spidev, 0, mode, a); + uint8_t b = w5500ll_read_sock_reg(chip->spidev, 0, mode); + if (b != a) + errorf("SPI to W5500 does not appear to be functional: wrote:%d != read:%d", a, b); + } + w5500ll_write_sock_reg(chip->spidev, 0, mode, 0); + /* Initialize the hardware. */ gpioirq_set_and_enable_exclusive_handler(pin_intr, GPIO_IRQ_EDGE_FALL, w5500_intrhandler, chip); gpio_set_dir(chip->pin_reset, GPIO_OUT); -- cgit v1.2.3-2-g168b From c63584e6bcb934d84ef277cdc4619763a41f1bbe Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Sun, 2 Mar 2025 13:43:36 -0700 Subject: libhw: w5500: Add a VALIDATE_SPI toggle --- cmd/sbc_harness/config/config.h | 2 +- libhw/w5500.c | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/cmd/sbc_harness/config/config.h b/cmd/sbc_harness/config/config.h index f9c7df2..0fa95ae 100644 --- a/cmd/sbc_harness/config/config.h +++ b/cmd/sbc_harness/config/config.h @@ -22,8 +22,8 @@ #define CONFIG_W5500_LOCAL_PORT_MIN 32768 #define CONFIG_W5500_LOCAL_PORT_MAX 60999 +#define CONFIG_W5500_VALIDATE_SPI 1 /* bool */ #define CONFIG_W5500_DEBUG 1 /* bool */ - #define CONFIG_W5500_LL_DEBUG 0 /* bool */ /* 9P *************************************************************************/ diff --git a/libhw/w5500.c b/libhw/w5500.c index fafc846..0f15785 100644 --- a/libhw/w5500.c +++ b/libhw/w5500.c @@ -96,6 +96,9 @@ #ifndef CONFIG_W5500_LOCAL_PORT_MAX #error config.h must define CONFIG_W5500_LOCAL_PORT_MAX #endif +#ifndef CONFIG_W5500_VALIDATE_SPI + #error config.h must define CONFIG_W5500_VALIDATE_SPI +#endif #ifndef CONFIG_W5500_DEBUG #error config.h must define CONFIG_W5500_DEBUG #endif @@ -312,14 +315,21 @@ void _w5500_init(struct w5500 *chip, }; } +#if CONFIG_W5500_VALIDATE_SPI /* Validate that SPI works correctly. */ + bool spi_ok = true; for (uint16_t a = 0; a < 0x100; a++) { w5500ll_write_sock_reg(chip->spidev, 0, mode, a); uint8_t b = w5500ll_read_sock_reg(chip->spidev, 0, mode); - if (b != a) - errorf("SPI to W5500 does not appear to be functional: wrote:%d != read:%d", a, b); + if (b != a) { + errorf("SPI to W5500 does not appear to be functional: wrote:0x%02"PRIx16" != read:0x%02"PRIx8, a, b); + spi_ok = false; + } } + if (!spi_ok) + __lm_abort(); w5500ll_write_sock_reg(chip->spidev, 0, mode, 0); +#endif /* Initialize the hardware. */ gpioirq_set_and_enable_exclusive_handler(pin_intr, GPIO_IRQ_EDGE_FALL, w5500_intrhandler, chip); -- cgit v1.2.3-2-g168b From 5a3d7bafd47067e9659c5773e371e796e6d3585b Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Mon, 24 Feb 2025 22:54:30 -0700 Subject: libhw: rp2040_hwspi: Pull out a assert_4distinct macro --- libhw/rp2040_hwspi.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/libhw/rp2040_hwspi.c b/libhw/rp2040_hwspi.c index 8c87666..bdfaa62 100644 --- a/libhw/rp2040_hwspi.c +++ b/libhw/rp2040_hwspi.c @@ -15,6 +15,14 @@ LO_IMPLEMENTATION_C(io_duplex_readwriter, struct rp2040_hwspi, rp2040_hwspi, static) LO_IMPLEMENTATION_C(spi, struct rp2040_hwspi, rp2040_hwspi, static) +#define assert_4distinct(a, b, c, d) \ + assert(a != b); \ + assert(a != c); \ + assert(a != d); \ + assert(b != c); \ + assert(b != d); \ + assert(c != d); + void _rp2040_hwspi_init(struct rp2040_hwspi *self, enum rp2040_hwspi_instance inst_num, enum spi_mode mode, @@ -29,12 +37,7 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, assert(self); assert(baudrate_hz); - assert(pin_miso != pin_mosi); - assert(pin_miso != pin_clk); - assert(pin_miso != pin_cs); - assert(pin_mosi != pin_clk); - assert(pin_mosi != pin_cs); - assert(pin_clk != pin_cs); + assert_4distinct(pin_miso, pin_mosi, pin_clk, pin_cs); /* Regarding the constraints on pin assignments: see the * RP2040 datasheet, table 2, in §1.4.3 "GPIO Functions". */ -- cgit v1.2.3-2-g168b From 6354a440b24d9a0b157c8c5571403f09dd04c245 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Sun, 2 Mar 2025 21:16:35 -0700 Subject: libhw: Update comments and asserts about clock rate --- cmd/sbc_harness/config/config.h | 4 ++++ cmd/sbc_harness/main.c | 10 +++++----- libhw/rp2040_hwspi.c | 24 +++++++++++++++++++++--- libhw/rp2040_include/libhw/rp2040_hwspi.h | 22 ++++++++++++++++------ libhw/w5500.c | 6 +++--- 5 files changed, 49 insertions(+), 17 deletions(-) diff --git a/cmd/sbc_harness/config/config.h b/cmd/sbc_harness/config/config.h index 0fa95ae..e923ac8 100644 --- a/cmd/sbc_harness/config/config.h +++ b/cmd/sbc_harness/config/config.h @@ -9,6 +9,10 @@ #include /* for size_t */ +/* RP2040 *********************************************************************/ + +#define CONFIG_RP2040_SPI_DEBUG 1 /* bool */ + /* W5500 **********************************************************************/ /** diff --git a/cmd/sbc_harness/main.c b/cmd/sbc_harness/main.c index 6f1d0ca..6fa76bd 100644 --- a/cmd/sbc_harness/main.c +++ b/cmd/sbc_harness/main.c @@ -168,11 +168,11 @@ COROUTINE init_cr(void *) { rp2040_hwspi_init(&globals.dev_spi, "W5500", RP2040_HWSPI_0, SPI_MODE_0, /* the W5500 supports mode 0 or mode 3 */ - 60*1000*1000, /* as close to the W5500's max rate of 80MHz as we can without hwspi borking */ - 16, /* PIN_MISO */ - 19, /* PIN_MOSI */ - 18, /* PIN_CLK */ - 17); /* PIN_CS */ + 31250000, /* min(w5500, hwspi); w5500=80MHz; hwspi=31.25MHz, see rp2040_hwspi.h for a comment about why this is so low */ + 16, /* PIN_MISO */ + 19, /* PIN_MOSI */ + 18, /* PIN_CLK */ + 17);/* PIN_CS */ w5500_init(&globals.dev_w5500, "W5500", lo_box_rp2040_hwspi_as_spi(&globals.dev_spi), 21, /* PIN_INTR */ diff --git a/libhw/rp2040_hwspi.c b/libhw/rp2040_hwspi.c index bdfaa62..ac46451 100644 --- a/libhw/rp2040_hwspi.c +++ b/libhw/rp2040_hwspi.c @@ -4,14 +4,26 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -#include /* pico-sdk:hardware_spi */ -#include /* pico-sdk:hardware_gpio */ +#include /* for PRIu{n} */ + +#include /* for clock_get_hz() and clk_peri */ +#include +#include #include +#define LOG_NAME RP2040_SPI +#include + #define IMPLEMENTATION_FOR_LIBHW_RP2040_HWSPI_H YES #include +#include "config.h" + +#ifndef CONFIG_RP2040_SPI_DEBUG + #error config.h must define CONFIG_RP2040_SPI_DEBUG (bool) +#endif + LO_IMPLEMENTATION_C(io_duplex_readwriter, struct rp2040_hwspi, rp2040_hwspi, static) LO_IMPLEMENTATION_C(spi, struct rp2040_hwspi, rp2040_hwspi, static) @@ -34,9 +46,13 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, /* Be not weary: This is but 12 lines of actual code; and many * lines of comments and assert()s. */ spi_inst_t *inst; + uint actual_baudrate_hz; assert(self); assert(baudrate_hz); + uint32_t clk_peri_hz = clock_get_hz(clk_peri); + debugf("clk_peri = %"PRIu32"Hz", clk_peri_hz); + assert(baudrate_hz*2 <= clk_peri_hz); assert_4distinct(pin_miso, pin_mosi, pin_clk, pin_cs); /* Regarding the constraints on pin assignments: see the @@ -60,7 +76,9 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, assert_notreached("invalid hwspi instance number"); } - spi_init(inst, baudrate_hz); + actual_baudrate_hz = spi_init(inst, baudrate_hz); + debugf("baudrate = %uHz", actual_baudrate_hz); + assert(actual_baudrate_hz == baudrate_hz); spi_set_format(inst, 8, (mode & 0b10) ? SPI_CPOL_1 : SPI_CPOL_0, (mode & 0b01) ? SPI_CPHA_1 : SPI_CPHA_0, diff --git a/libhw/rp2040_include/libhw/rp2040_hwspi.h b/libhw/rp2040_include/libhw/rp2040_hwspi.h index 76e0709..a1d85d4 100644 --- a/libhw/rp2040_include/libhw/rp2040_hwspi.h +++ b/libhw/rp2040_include/libhw/rp2040_hwspi.h @@ -48,12 +48,22 @@ LO_IMPLEMENTATION_H(spi, struct rp2040_hwspi, rp2040_hwspi) * the CPU. This is because the PL022 has a maximum of 16-bit frames, * but we need to be able to do *at least* 32-bit frames (and ideally, * much larger). By managing it ourselves, we can just keep CS pulled - * low extra-long, making the frame extra-long. However, this means - * that we can't SPI so fast that the CPU can't do things in time; - * experimentally much faster than 60MHz seems to be when I start - * getting mangled messages. We wouldn't have this speed limit with a - * PIO-based SPI driver, because it could toggle CLK and CS in - * lock-step with receiving data from the FIFO. + * low extra-long, making the frame extra-long. + * + * Restrictions on baudrate: + * + * - The PL022 requires that the baudrate is an even-number fraction + * of clk_peri. + * + This implies that the maximum baudrate is clk_peri/2. + * + Pico-SDK' default clk_peri is 125MHz, max is 200MHz. + * - The CS-from-GPIO hack above means that that we can't go so fast + * that the CPU can't do things in time. + * + Experimentally: + * | clk_sys=125MHz | baud=31.25MHz | works OK | + * | clk_sys=125MHz | baud=62.5 MHz | messages get shifted right a bit | + * + * Both of these restrictions aught to be avoidable by using a + * PIO-based SPI driver instead of this PLL02-based driver. */ #define rp2040_hwspi_init(self, name, \ inst_num, mode, baudrate_hz, \ diff --git a/libhw/w5500.c b/libhw/w5500.c index 0f15785..c4d36f3 100644 --- a/libhw/w5500.c +++ b/libhw/w5500.c @@ -348,9 +348,9 @@ static void w5500_post_reset(struct w5500 *chip) { w5500ll_write_common_reg(chip->spidev, eth_addr, chip->hwaddr); /* The RP2040 needs a 1/sys_clk hysteresis between interrupts - * for us to notice them. At the maximum-rated clock-rate of - * 133MHz, that means 7.5ns (but the sbc-harness overclocks - * the RP2040, so we could get away with even shorter). + * for us to notice them. At the default clock-rate of + * 125MHz, that means 8ns; and at the maximum-rated clock-rate + * of 200MHz, that means 5ns. * * If intlevel is non-zero, then the hysteresis is * (intlevel+1)*4/(150MHz), or (intlevel+1)*26.7ns; so even -- cgit v1.2.3-2-g168b From 55df5f277fe53b964361fb5e305ca574082afd91 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Sun, 2 Mar 2025 23:56:15 -0700 Subject: gdb-helpers: rp2040-show-interrupts: Split into several methods --- gdb-helpers/rp2040.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/gdb-helpers/rp2040.py b/gdb-helpers/rp2040.py index 30a936a..ec719e8 100644 --- a/gdb-helpers/rp2040.py +++ b/gdb-helpers/rp2040.py @@ -1,6 +1,6 @@ # gdb-helpers/rp2040.py - GDB helpers for the RP2040 CPU. # -# Copyright (C) 2024 Luke T. Shumaker +# Copyright (C) 2024-2025 Luke T. Shumaker # SPDX-License-Identifier: AGPL-3.0-or-later import gdb @@ -96,15 +96,13 @@ class RP2040ShowInterrupts(gdb.Command): ) def invoke(self, arg: str, from_tty: bool) -> None: + self.arm_cortex_m0plus_mmregisters() + self.arm_cortex_m0plus_registers() + print() + + def arm_cortex_m0plus_mmregisters(self) -> None: base: int = 0xE0000000 icsr = read_mmreg(base + 0xED04) - psr = read_reg("xPSR") - # ║├┤║├┤├┤║├┤║║├┤├──┤║║║├──┤ - # 10987654321098765432109876543210 (dec bitnum) - # 3 2 1 0 - # ║├┤║├┤├┤║├┤║║├┤├──┤║║║├──┤ - # fedcba9876543210fedcba9876543210 (hex bitnum) - # 1 0 print( f""" ARM Cortex-M0+ memory-mapped registers: @@ -142,7 +140,13 @@ AIRCR : {fmt32(read_mmreg(base+0xed0c)) } Application Interrupt and Reset Co ╓sleep_deep s_ev_on_pend╖ ║╓sleep_on_exit SCR : {fmt32(read_mmreg(base+0xed10)) } System Control +""" + ) + def arm_cortex_m0plus_registers(self) -> None: + psr = read_reg("xPSR") + print( + f""" ARM Cortex-M0+ processor core registers: ╓pm (0=normal, 1=top priority) -- cgit v1.2.3-2-g168b From ab32648720ea3c256b79b867fa2992b4601b4214 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Mon, 3 Mar 2025 00:42:47 -0700 Subject: gdb-helpers: rp2040-show-interrupts: Fuss with output --- gdb-helpers/rp2040.py | 54 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/gdb-helpers/rp2040.py b/gdb-helpers/rp2040.py index ec719e8..9e10d73 100644 --- a/gdb-helpers/rp2040.py +++ b/gdb-helpers/rp2040.py @@ -18,6 +18,23 @@ def fmt32(x: int) -> str: return "0b" + bin(x)[2:].rjust(32, "0") +def box(title: str, content: str) -> str: + width = 80 + + lines = content.split("\n") + while len(lines) and lines[0] == "": + lines = lines[1:] + while len(lines) and lines[-1] == "": + lines = lines[:-1] + lines = ["", *lines, ""] + + ret = "┏━[" + title + "]" + ("━" * (width - len(title) - 5)) + "┓\n" + for line in content.split("\n"): + ret += f"┃ {line.ljust(width-4)} ┃\n" + ret += "┗" + ("━" * (width - 2)) + "┛" + return ret + + def read_prio(addr: int) -> str: prios: list[int] = 32 * [0] for regnum in range(0, 8): @@ -96,20 +113,19 @@ class RP2040ShowInterrupts(gdb.Command): ) def invoke(self, arg: str, from_tty: bool) -> None: - self.arm_cortex_m0plus_mmregisters() self.arm_cortex_m0plus_registers() - print() + self.arm_cortex_m0plus_mmregisters() def arm_cortex_m0plus_mmregisters(self) -> None: base: int = 0xE0000000 icsr = read_mmreg(base + 0xED04) print( - f""" -ARM Cortex-M0+ memory-mapped registers: - + box( + "ARM Cortex-M0+ memory-mapped registers", + f""" clocks╖ ┌SIO SPI┐ ║ │╓QSPI - UART┐│ ║ │║╓bank0 ╓XIP + UART┐│ ║ │║╓GPIO ╓XIP ADC╖ ││ ║ │║║┌DMA ║╓USB I2C┐ ║ ││ ║ │║║│ ┌PIO║║╓PWM RTC╖├┐║┌┤├┐║┌┤║║├┐├──┐║║║┌──┬timers @@ -140,27 +156,29 @@ AIRCR : {fmt32(read_mmreg(base+0xed0c)) } Application Interrupt and Reset Co ╓sleep_deep s_ev_on_pend╖ ║╓sleep_on_exit SCR : {fmt32(read_mmreg(base+0xed10)) } System Control -""" +""", + ) ) def arm_cortex_m0plus_registers(self) -> None: psr = read_reg("xPSR") print( - f""" -ARM Cortex-M0+ processor core registers: - + box( + "ARM Cortex-M0+ processor-core registers", + f""" ╓pm (0=normal, 1=top priority) PRIMASK : {fmt32(read_reg('primask')) } Priority Mask - [C]arry╖╓o[V]erflow - [Z]ero╖║║ ╓[T]humb ╓require [a]lignment - [N]egative╖║║║ ║ exec ║ ┌interrupt - app╫╫╫╢ ╟───────┴──────╢┌────┴──┐ + app exec intr + ┌┴─┐ ┌───────┴──────┐┌───┴───┐ xPSR : {fmt32(psr) } {{Application,Execution,Interrupt}} Program Status - └────┬──┘ - └{psr&0x1FF} ({exception_names[psr&0x1FF]}) - -""" + ║║║║ ║ ║└───┬───┘ + [N]egative╜║║║ ║ ║ └{psr&0x1FF} ({exception_names[psr&0x1FF]}) + [Z]ero╜║║ ╙[T]humb ╙require [a]lignment + [C]arry╜║ + o[V]erflow╜ +""", + ) ) -- cgit v1.2.3-2-g168b From 95d821dc2767a4b144dba30977dab35825594e06 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Mon, 3 Mar 2025 23:23:40 -0700 Subject: notes.md: Link to W5500 datasheet versions --- notes.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/notes.md b/notes.md index 8afdb78..497363e 100644 --- a/notes.md +++ b/notes.md @@ -1,7 +1,7 @@ @@ -140,6 +140,19 @@ sections" wut does that mean 8 16-bit values in both the TX buffer and the RX buffer +| Ver. 1.0.0 | 2013-08-01 | https://www.alldatasheet.com/datasheet-pdf/view/554784/ETC2/W5500.html https://www.alldatasheet.com/pdfjsview/web/viewer.html?file=//www.alldatasheet.com/datasheet-pdf/view/554784/ETC2/W5500/+_44J97VwSw9bZYvAB+/datasheet.pdf | +| Ver. 1.0.1 | 2013-09-13 | | +| Ver. 1.0.2 | 2013-11-14 | https://cdn.sparkfun.com/datasheets/Dev/Arduino/Shields/W5500_datasheet_v1.0.2_1.pdf | +| Ver. 1.0.3 | 2014-05-29 | | +| Ver. 1.0.4 | 2014-06-13 | | +| Ver. 1.0.5 | 2014-11-10 | | +| Ver. 1.0.6 | 2014-12-30 | | +| Ver. 1.0.7 | 2016-02-24 | | +| Ver. 1.0.8 | 2017-05-19 | https://docs.wiznet.io/img/products/w5500/w5500_ds_v108e.pdf (on-page version and date are wrong) | +| Ver. 1.0.9 | 2019-05-22 | https://docs.wiznet.io/img/products/w5500/w5500_ds_v109e.pdf | +| Ver. 1.1.0 | 2022-12-17 | https://docs.wiznet.io/img/products/w5500/W5500_ds_v110e.pdf | + +https://github.com/Bodmer/TFT_eSPI/discussions/2432 ---- -- cgit v1.2.3-2-g168b From fb73355711c99003c559df48164a1ce6db93cff9 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Tue, 4 Mar 2025 00:05:35 -0700 Subject: libhw: rp2040_hwspi: Add more config knobs --- cmd/sbc_harness/main.c | 2 ++ libhw/rp2040_hwspi.c | 15 +++++++++++++-- libhw/rp2040_include/libhw/rp2040_hwspi.h | 30 +++++++++++++++++++++--------- 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/cmd/sbc_harness/main.c b/cmd/sbc_harness/main.c index 6fa76bd..a3351fc 100644 --- a/cmd/sbc_harness/main.c +++ b/cmd/sbc_harness/main.c @@ -169,6 +169,8 @@ COROUTINE init_cr(void *) { rp2040_hwspi_init(&globals.dev_spi, "W5500", RP2040_HWSPI_0, SPI_MODE_0, /* the W5500 supports mode 0 or mode 3 */ 31250000, /* min(w5500, hwspi); w5500=80MHz; hwspi=31.25MHz, see rp2040_hwspi.h for a comment about why this is so low */ + 30, /* W5500 datasheet says min(T_CS = SCSn High Time) = 30ns */ + 0, /* bogus write write data when doing a read */ 16, /* PIN_MISO */ 19, /* PIN_MOSI */ 18, /* PIN_CLK */ diff --git a/libhw/rp2040_hwspi.c b/libhw/rp2040_hwspi.c index ac46451..8dd49d6 100644 --- a/libhw/rp2040_hwspi.c +++ b/libhw/rp2040_hwspi.c @@ -18,6 +18,8 @@ #define IMPLEMENTATION_FOR_LIBHW_RP2040_HWSPI_H YES #include +#include + #include "config.h" #ifndef CONFIG_RP2040_SPI_DEBUG @@ -39,6 +41,8 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, enum rp2040_hwspi_instance inst_num, enum spi_mode mode, uint baudrate_hz, + uint64_t min_delay_ns, + uint8_t bogus_data, uint pin_miso, uint pin_mosi, uint pin_clk, @@ -100,9 +104,12 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, gpio_set_dir(pin_cs, GPIO_OUT); gpio_put(pin_cs, 1); - /* Return. */ + /* Initialize self. */ self->inst = inst; + self->min_delay_ns = min_delay_ns; + self->bogus_data = bogus_data; self->pin_cs = pin_cs; + self->dead_until_ns = 0; } static void rp2040_hwspi_readwritev(struct rp2040_hwspi *self, const struct duplex_iovec *iov, int iovcnt) { @@ -113,6 +120,9 @@ static void rp2040_hwspi_readwritev(struct rp2040_hwspi *self, const struct dupl assert(iov); assert(iovcnt); + uint64_t now = LO_CALL(bootclock, get_time_ns); + if (now < self->dead_until_ns) + sleep_until_ns(self->dead_until_ns); gpio_put(self->pin_cs, 0); /* TODO: Replace blocking reads+writes with DMA. */ for (int i = 0; i < iovcnt; i++) { @@ -121,9 +131,10 @@ static void rp2040_hwspi_readwritev(struct rp2040_hwspi *self, const struct dupl else if (iov[i].iov_write_src) spi_write_blocking(inst, iov[i].iov_write_src, iov[i].iov_len); else if (iov[i].iov_read_dst) - spi_read_blocking(inst, 0, iov[i].iov_read_dst, iov[i].iov_len); + spi_read_blocking(inst, self->bogus_data, iov[i].iov_read_dst, iov[i].iov_len); else assert_notreached("duplex_iovec is neither read nor write"); } gpio_put(self->pin_cs, 1); + self->dead_until_ns = LO_CALL(bootclock, get_time_ns) + self->min_delay_ns; } diff --git a/libhw/rp2040_include/libhw/rp2040_hwspi.h b/libhw/rp2040_include/libhw/rp2040_hwspi.h index a1d85d4..fef1dbd 100644 --- a/libhw/rp2040_include/libhw/rp2040_hwspi.h +++ b/libhw/rp2040_include/libhw/rp2040_hwspi.h @@ -20,9 +20,14 @@ enum rp2040_hwspi_instance { struct rp2040_hwspi { BEGIN_PRIVATE(LIBHW_RP2040_HWSPI_H) + /* const */ LM_IF(IS_IMPLEMENTATION_FOR(LIBHW_RP2040_HWSPI_H))(spi_inst_t)(void) *inst; + uint64_t min_delay_ns; + uint8_t bogus_data; + uint pin_cs; - uint pin_cs; + /* mutable */ + uint64_t dead_until_ns; END_PRIVATE(LIBHW_RP2040_HWSPI_H) }; LO_IMPLEMENTATION_H(io_duplex_readwriter, struct rp2040_hwspi, rp2040_hwspi) @@ -31,14 +36,17 @@ LO_IMPLEMENTATION_H(spi, struct rp2040_hwspi, rp2040_hwspi) /** * Initialize an instance of `struct rp2040_hwspi`. * - * @param self : struct rp2040_hwspi : the structure to initialize - * @param name : char * : a name for the SPI port; to include in the bininfo - * @param inst_num : enum rp2040_hwspi_instance : the PL220 instance number; RP2040_HWSPI_{0,1} - * @param mode : enum spi_mode : the SPI mode; SPI_MODE_{0..3} - * @param pin_miso : uint : pin number; 0, 4, 16, or 20 for _HWSPI_0; 8, 12, 24, or 28 for _HWSPI_1 - * @param pin_mosi : uint : pin number; 3, 7, 19, or 23 for _HWSPI_0; 11, 15, or 27 for _HWSPI_1 - * @param pin_clk : uint : pin number; 2, 6, 18, or 22 for _HWSPI_0; 10, 14, or 26 for _HWSPI_1 - * @param pin_cs : uint : pin number; any unused GPIO pin + * @param self : struct rp2040_hwspi : the structure to initialize + * @param name : char * : a name for the SPI port; to include in the bininfo + * @param inst_num : enum rp2040_hwspi_instance : the PL220 instance number; RP2040_HWSPI_{0,1} + * @param mode : enum spi_mode : the SPI mode; SPI_MODE_{0..3} + * @param baudrate_hz : uint : baudrate in Hz + * @param min_delay_ns: uint64_t : minimum time for pin_cs to be high between messages + * @param bogus_data : uint8_t : bogus data to write when .iov_write_src is NULL + * @param pin_miso : uint : pin number; 0, 4, 16, or 20 for _HWSPI_0; 8, 12, 24, or 28 for _HWSPI_1 + * @param pin_mosi : uint : pin number; 3, 7, 19, or 23 for _HWSPI_0; 11, 15, or 27 for _HWSPI_1 + * @param pin_clk : uint : pin number; 2, 6, 18, or 22 for _HWSPI_0; 10, 14, or 26 for _HWSPI_1 + * @param pin_cs : uint : pin number; any unused GPIO pin * * There is no bit-order argument; the RP2040's hardware SPI always * uses MSB-first bit order. @@ -67,6 +75,7 @@ LO_IMPLEMENTATION_H(spi, struct rp2040_hwspi, rp2040_hwspi) */ #define rp2040_hwspi_init(self, name, \ inst_num, mode, baudrate_hz, \ + min_delay_ns, bogus_data, \ pin_miso, pin_mosi, pin_clk, pin_cs) \ do { \ bi_decl(bi_4pins_with_names(pin_miso, name" SPI MISO", \ @@ -75,12 +84,15 @@ LO_IMPLEMENTATION_H(spi, struct rp2040_hwspi, rp2040_hwspi) pin_mosi, name" SPI CS")); \ _rp2040_hwspi_init(self, \ inst_num, mode, baudrate_hz, \ + min_delay_ns, bogus_data, \ pin_miso, pin_mosi, pin_clk, pin_cs); \ } while(0) void _rp2040_hwspi_init(struct rp2040_hwspi *self, enum rp2040_hwspi_instance inst_num, enum spi_mode mode, uint baudrate_hz, + uint64_t min_delay_ns, + uint8_t bogus_data, uint pin_miso, uint pin_mosi, uint pin_clk, -- cgit v1.2.3-2-g168b From c336bf7f2205131c86e6d2991770a2c150d85ca9 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Mon, 24 Feb 2025 22:54:30 -0700 Subject: libhw: rp2040_hwspi: Use DMA --- cmd/sbc_harness/main.c | 3 +- gdb-helpers/rp2040.py | 112 +++++++++++++++++++++++++ libhw/rp2040_dma.h | 115 ++++++++++++++++++++++++++ libhw/rp2040_hwspi.c | 131 ++++++++++++++++++++++++++---- libhw/rp2040_include/libhw/rp2040_hwspi.h | 17 +++- 5 files changed, 358 insertions(+), 20 deletions(-) create mode 100644 libhw/rp2040_dma.h diff --git a/cmd/sbc_harness/main.c b/cmd/sbc_harness/main.c index a3351fc..8e2c5ee 100644 --- a/cmd/sbc_harness/main.c +++ b/cmd/sbc_harness/main.c @@ -174,7 +174,8 @@ COROUTINE init_cr(void *) { 16, /* PIN_MISO */ 19, /* PIN_MOSI */ 18, /* PIN_CLK */ - 17);/* PIN_CS */ + 17, /* PIN_CS */ + 0, 1, 2, 3); /* DMA channels */ w5500_init(&globals.dev_w5500, "W5500", lo_box_rp2040_hwspi_as_spi(&globals.dev_spi), 21, /* PIN_INTR */ diff --git a/gdb-helpers/rp2040.py b/gdb-helpers/rp2040.py index 9e10d73..983e13b 100644 --- a/gdb-helpers/rp2040.py +++ b/gdb-helpers/rp2040.py @@ -3,6 +3,8 @@ # Copyright (C) 2024-2025 Luke T. Shumaker # SPDX-License-Identifier: AGPL-3.0-or-later +import typing + import gdb @@ -115,6 +117,7 @@ class RP2040ShowInterrupts(gdb.Command): def invoke(self, arg: str, from_tty: bool) -> None: self.arm_cortex_m0plus_registers() self.arm_cortex_m0plus_mmregisters() + self.rp2040_dma_mmregisters() def arm_cortex_m0plus_mmregisters(self) -> None: base: int = 0xE0000000 @@ -181,5 +184,114 @@ xPSR : {fmt32(psr) } {{Application,Execution,Interrupt} ) ) + def rp2040_dma_mmregisters(self) -> None: + base: int = 0x50000000 + + def fmt12(x: int) -> str: + s = fmt32(x) + return s[:-12] + "_" + s[-12:] + + print( + box( + "RP2040 DMA memory-mapped registers", + f""" + + 8 4 0 + ┌──┴───┴───┤ +INTR : {fmt12(read_mmreg(base + 0x400))} Raw + │ │ │ │ +INTE0: {fmt12(read_mmreg(base + 0x404))} IRQ_DMA_0 Enable +INTF0: {fmt12(read_mmreg(base + 0x408))} IRQ_DMA_0 Force +INTS0: {fmt12(read_mmreg(base + 0x40c))} IRQ_DMA_0 Status + │ │ │ │ +INTE1: {fmt12(read_mmreg(base + 0x414))} IRQ_DMA_1 Enable +INTF1: {fmt12(read_mmreg(base + 0x418))} IRQ_DMA_1 Force +INTS1: {fmt12(read_mmreg(base + 0x41c))} IRQ_DMA_1 Status +""", + ) + ) + RP2040ShowInterrupts() + + +class RP2040ShowDMA(gdb.Command): + """Show the RP2040's DMA control registers.""" + + def __init__(self) -> None: + super(RP2040ShowDMA, self).__init__("rp2040-show-dma", gdb.COMMAND_USER) + + def invoke(self, arg: str, from_tty: bool) -> None: + base: int = 0x50000000 + u32_size: int = 4 + + nchan = read_mmreg(base + 0x448) + + def chreg( + ch: int, + name: typing.Literal[ + "read_addr", + "write_addr", + "trans_count", + "ctrl", + "dbg_ctdreq", + "dbg_tcr", + ], + ) -> int: + fieldcnt: int = 4 * 4 + fieldnum: int + debug = False + match name: + case "read_addr": + fieldnum = 0 + case "write_addr": + fieldnum = 1 + case "trans_count": + fieldnum = 2 + case "ctrl": + fieldnum = 4 + case "dbg_ctdreq": + fieldnum = 0 + debug = True + case "dbg_tcr": + fieldnum = 1 + debug = True + return read_mmreg( + base + + (0x800 if debug else 0) + + (ch * u32_size * fieldcnt) + + (u32_size * fieldnum) + ) + + def ctrl(ch: int) -> str: + s = fmt32(chreg(ch, "ctrl")) + return s[:10] + "_" + s[10:] + + def chaddr(ch: int, name: typing.Literal["read", "write"]) -> str: + val = chreg(ch, name + "_addr") # type: ignore + if val == 0: + return "NULL " + return f"0x{val:08x}" + + ret = f""" + ╓sniff_enable + ║╓bswap + ║║╓irq_quiet + ║║║ ┌treq_sel + ║║║ │ ┌chain_to + ║║║ │ │ ╓ring_sel + ║║║ │ │ ║ ┌ring_size + ║║║ │ │ ║ │ ╓incr_write + busy╖ ║║║ │ │ ║ │ ║╓incr_read +write_err╖ ║ ║║║ │ │ ║ │ ║║┌data_size +read_err╖║ ║ ║║║ │ │ ║ │ ║║│ ╓high_priority +ahb_err╖║║ ║ ║║║ │ │ ║ │ ║║│ ║╓enable + ║║║ ║ ║║║ │ │ ║ │ ║║│ ║║ trans_cnt + ║║║ ║ ║║║┌─┴──┐┌┴─┐║┌┴─┐║║├┐║║ read_addr write_addr cur/reload +""" + for ch in range(0, nchan): + ret += f"{ch: 3}: {ctrl(ch)} {chaddr(ch, 'read')} {chaddr(ch, 'write')} {chreg(ch, 'trans_count')}/{chreg(ch, 'dbg_tcr')}\n" + print(box("RP2040 DMA channels", ret)) + + +RP2040ShowDMA() diff --git a/libhw/rp2040_dma.h b/libhw/rp2040_dma.h new file mode 100644 index 0000000..e4b44ff --- /dev/null +++ b/libhw/rp2040_dma.h @@ -0,0 +1,115 @@ +/* libhw/rp2040_dma.h - Utilities for using DMA on the RP2040 (replaces ) + * + * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (C) 2025 Luke T. Shumaker + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#ifndef _LIBHW_RP2040_DMA_H_ +#define _LIBHW_RP2040_DMA_H_ + +#include +#include /* for uint32_t */ + +#include /* for DREQ_* for use with DMA_CTRL_TREQ_SEL() */ +#include /* for dma_hw, dma_channel_hw_t, DMA_NUM_CHANNELS */ + +#include /* for LM_FLOORLOG2() */ + +/* Borrowed from *********************************************/ + +static inline dma_channel_hw_t *dma_channel_hw_addr(uint channel) { + assert(channel < NUM_DMA_CHANNELS); + return &dma_hw->ch[channel]; +} + +enum dma_channel_transfer_size { + DMA_SIZE_8 = 0, ///< Byte transfer (8 bits) + DMA_SIZE_16 = 1, ///< Half word transfer (16 bits) + DMA_SIZE_32 = 2 ///< Word transfer (32 bits) +}; + +static inline bool dma_channel_is_busy(uint channel) { + assert(channel < NUM_DMA_CHANNELS); + return dma_hw->ch[channel].al1_ctrl & DMA_CH0_CTRL_TRIG_BUSY_BITS; +} + +/* Our own code ***************************************************************/ + +#define DMA_CTRL_ENABLE (1<<0) +#define DMA_CTRL_HI_PRIO (1<<1) +#define DMA_CTRL_DATA_SIZE(sz) ((sz)<<2) +#define DMA_CTRL_INCR_READ (1<<4) +#define DMA_CTRL_INCR_WRITE (1<<5) +#define _DMA_CTRL_RING_BITS(b) ((b)<<6) +#define _DMA_CTRL_RING_RD (0) +#define _DMA_CTRL_RING_WR (1<<10) +#define DMA_CTRL_RING(rdwr, bits) (_DMA_CTRL_RING_##rdwr | _DMA_CTRL_RING_BITS(bits)) +#define DMA_CTRL_CHAIN_TO(ch) ((ch)<<11) +#define DMA_CTRL_TREQ_SEL(dreq) ((dreq)<<15) +#define DMA_CTRL_IRQ_QUIET (1<<21) +#define DMA_CTRL_BSWAP (1<<22) +#define DMA_CTRL_SNIFF_EN (1<<23) + +/* | elem | val | name */ +#define READ_ADDR /*|*/volatile const void/*|*/ * /*|*/read_addr +#define WRITE_ADDR /*|*/volatile void/*|*/ * /*|*/write_addr +#define TRANS_COUNT /*|*/ /*|*/uint32_t/*|*/trans_count +#define CTRL /*|*/ /*|*/uint32_t/*|*/ctrl + +/* { +0x0 ; +0x4 ; +0x8 ; +0xC (Trigger) */ +struct dma_alias0 { READ_ADDR ; WRITE_ADDR ; TRANS_COUNT ; CTRL ; }; +struct dma_alias1 { CTRL ; READ_ADDR ; WRITE_ADDR ; TRANS_COUNT ; }; +struct dma_alias2 { CTRL ; TRANS_COUNT ; READ_ADDR ; WRITE_ADDR ; }; +struct dma_alias3 { CTRL ; WRITE_ADDR ; TRANS_COUNT ; READ_ADDR ; }; +struct dma_alias0_short2 { TRANS_COUNT ; CTRL ; }; +struct dma_alias1_short2 { WRITE_ADDR ; TRANS_COUNT ; }; +struct dma_alias2_short2 { READ_ADDR ; WRITE_ADDR ; }; +struct dma_alias3_short2 { TRANS_COUNT ; READ_ADDR ; }; +struct dma_alias0_short3 { CTRL ; }; +struct dma_alias1_short3 { TRANS_COUNT ; }; +struct dma_alias2_short3 { WRITE_ADDR ; }; +struct dma_alias3_short3 { READ_ADDR ; }; + +#undef CTRL +#undef TRANS_COUNT +#undef WRITE_ADDR +#undef READ_ADDR + +#define DMA_CHAN_ADDR(CH, TYP) ((TYP *volatile)_Generic((TYP){}, \ + struct dma_alias0: &dma_channel_hw_addr(CH)->read_addr, \ + struct dma_alias1: &dma_channel_hw_addr(CH)->al1_ctrl, \ + struct dma_alias2: &dma_channel_hw_addr(CH)->al2_ctrl, \ + struct dma_alias3: &dma_channel_hw_addr(CH)->al3_ctrl, \ + struct dma_alias0_short2: &dma_channel_hw_addr(CH)->transfer_count, \ + struct dma_alias1_short2: &dma_channel_hw_addr(CH)->al1_write_addr, \ + struct dma_alias2_short2: &dma_channel_hw_addr(CH)->al2_read_addr, \ + struct dma_alias3_short2: &dma_channel_hw_addr(CH)->al3_transfer_count, \ + struct dma_alias0_short3: &dma_channel_hw_addr(CH)->ctrl_trig, \ + struct dma_alias1_short3: &dma_channel_hw_addr(CH)->al1_transfer_count_trig, \ + struct dma_alias2_short3: &dma_channel_hw_addr(CH)->al2_write_addr_trig, \ + struct dma_alias3_short3: &dma_channel_hw_addr(CH)->al3_read_addr_trig)) + +#define DMA_CHAN_WR_TRANS_COUNT(TYP) \ + (sizeof(TYP)/4) + +#define DMA_CHAN_WR_CTRL(TYP) ( DMA_CTRL_DATA_SIZE(DMA_SIZE_32) \ + | DMA_CTRL_INCR_WRITE \ + | DMA_CTRL_RING(WR, LM_FLOORLOG2(sizeof(TYP))) \ + ) + +#define DMA_NONTRIGGER(CH, FIELD) (DMA_CHAN_ADDR(CH, _DMA_NONTRIGGER_##FIELD)->FIELD) +#define _DMA_NONTRIGGER_read_addr struct dma_alias0 +#define _DMA_NONTRIGGER_write_addr struct dma_alias0 +#define _DMA_NONTRIGGER_trans_count struct dma_alias0 +#define _DMA_NONTRIGGER_ctrl struct dma_alias1 + +#define DMA_TRIGGER(CH, FIELD) (DMA_CHAN_ADDR(CH, _DMA_TRIGGER_##FIELD)->FIELD) +#define _DMA_TRIGGER_read_addr struct dma_alias3 +#define _DMA_TRIGGER_write_addr struct dma_alias2 +#define _DMA_TRIGGER_trans_count struct dma_alias1 +#define _DMA_TRIGGER_ctrl struct dma_alias0 + +#endif /* _LIBHW_RP2040_DMA_H_ */ diff --git a/libhw/rp2040_hwspi.c b/libhw/rp2040_hwspi.c index 8dd49d6..1c4e096 100644 --- a/libhw/rp2040_hwspi.c +++ b/libhw/rp2040_hwspi.c @@ -4,12 +4,14 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ +#include #include /* for PRIu{n} */ #include /* for clock_get_hz() and clk_peri */ #include #include +#include #include #define LOG_NAME RP2040_SPI @@ -20,6 +22,8 @@ #include +#include "rp2040_dma.h" + #include "config.h" #ifndef CONFIG_RP2040_SPI_DEBUG @@ -46,7 +50,12 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, uint pin_miso, uint pin_mosi, uint pin_clk, - uint pin_cs) { + uint pin_cs, + uint dma1, + uint dma2, + uint dma3, + uint dma4) +{ /* Be not weary: This is but 12 lines of actual code; and many * lines of comments and assert()s. */ spi_inst_t *inst; @@ -58,6 +67,7 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, debugf("clk_peri = %"PRIu32"Hz", clk_peri_hz); assert(baudrate_hz*2 <= clk_peri_hz); assert_4distinct(pin_miso, pin_mosi, pin_clk, pin_cs); + assert_4distinct(dma1, dma2, dma3, dma4); /* Regarding the constraints on pin assignments: see the * RP2040 datasheet, table 2, in §1.4.3 "GPIO Functions". */ @@ -109,32 +119,121 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, self->min_delay_ns = min_delay_ns; self->bogus_data = bogus_data; self->pin_cs = pin_cs; + self->dma_tx_ctrl = dma1; + self->dma_rx_ctrl = dma2; + self->dma_tx_data = dma3; + self->dma_rx_data = dma4; self->dead_until_ns = 0; } static void rp2040_hwspi_readwritev(struct rp2040_hwspi *self, const struct duplex_iovec *iov, int iovcnt) { assert(self); - spi_inst_t *inst = self->inst; - - assert(inst); + assert(self->inst); assert(iov); - assert(iovcnt); + assert(iovcnt > 0); + /* At this time, I have no intention to run SPI faster than + * 80MHz (= 80Mb/s = 10MB/s). If we ran the CPU at just + * 100MHz (we'll be running it faster than that, maybe even + * 200MHz), that means we'd have 10 clock cycles to send each + * byte. + * + * This affords us substantial simplifications, like being + * able to afford 4-cycle changeovers between DMA blocks, and + * not having to worry about alignment because we can just use + * DMA_SIZE_8. + */ + + uint8_t bogus_rx_dst; + + int pruned_iovcnt = 0; + for (int i = 0; i < iovcnt; i++) + if (iov[i].iov_len) + pruned_iovcnt++; + if (!pruned_iovcnt) + return; + + /* For tx_data_blocks, it doesn't really matter which aliases + * we choose: + * - None of our fields can be NULL (so no + * false-termination). + * - Moving const fields first so they don't have to be + * re-programmed each time isn't possible for us there need + * to be at least 2 const fields, and we only have 1 + * (read_addr for rx_data_blocks, and write_addr for + * tx_data_blocks). + * + * But for rx_data_blocks, we need ctrl to be the trigger + * register so that the DMA_CTRL_IRQ_QUIET flag isn't cleared + * before we get to the trigger; and while for tx_data_blocks + * it doesn't really matter, the inverse would be nice. + */ + struct dma_alias1 *tx_data_blocks = alloca(sizeof(struct dma_alias1)*(pruned_iovcnt+1)); + struct dma_alias0 *rx_data_blocks = alloca(sizeof(struct dma_alias0)*(pruned_iovcnt+1)); + + for (int i = 0, j = 0; i < iovcnt; i++) { + if (!iov[i].iov_len) + continue; + tx_data_blocks[j] = (typeof(tx_data_blocks[0])){ + .read_addr = iov[i].iov_write_src ?: &self->bogus_data, + .write_addr = &spi_get_hw(self->inst)->dr, + .trans_count = iov[i].iov_len, + .ctrl = (DMA_CTRL_ENABLE + | DMA_CTRL_DATA_SIZE(DMA_SIZE_8) + | (iov[i].iov_write_src ? DMA_CTRL_INCR_READ : 0) + | DMA_CTRL_CHAIN_TO(self->dma_tx_ctrl) + | DMA_CTRL_TREQ_SEL(SPI_DREQ_NUM(self->inst, true)) + | DMA_CTRL_IRQ_QUIET), + }; + rx_data_blocks[j] = (typeof(rx_data_blocks[0])){ + .read_addr = &spi_get_hw(self->inst)->dr, + .write_addr = iov[i].iov_read_dst ?: &bogus_rx_dst, + .trans_count = iov[i].iov_len, + .ctrl = (DMA_CTRL_ENABLE + | DMA_CTRL_DATA_SIZE(DMA_SIZE_8) + | (iov[i].iov_read_dst ? DMA_CTRL_INCR_WRITE : 0) + | DMA_CTRL_CHAIN_TO(self->dma_rx_ctrl) + | DMA_CTRL_TREQ_SEL(SPI_DREQ_NUM(self->inst, false)) + | DMA_CTRL_IRQ_QUIET), + }; + j++; + } + tx_data_blocks[pruned_iovcnt] = (typeof(tx_data_blocks[0])){0}; + rx_data_blocks[pruned_iovcnt] = (typeof(rx_data_blocks[0])){0}; + + /* Set up ctrl. */ + DMA_NONTRIGGER(self->dma_tx_ctrl, read_addr) = tx_data_blocks; + DMA_NONTRIGGER(self->dma_tx_ctrl, write_addr) = DMA_CHAN_ADDR(self->dma_tx_data, typeof(tx_data_blocks[0])); + DMA_NONTRIGGER(self->dma_tx_ctrl, trans_count) = DMA_CHAN_WR_TRANS_COUNT(typeof(tx_data_blocks[0])); + DMA_NONTRIGGER(self->dma_tx_ctrl, ctrl) = (DMA_CTRL_ENABLE + | DMA_CHAN_WR_CTRL(typeof(tx_data_blocks[0])) + | DMA_CTRL_INCR_READ + | DMA_CTRL_CHAIN_TO(self->dma_tx_data) + | DMA_CTRL_TREQ_SEL(DREQ_FORCE) + | DMA_CTRL_IRQ_QUIET); + DMA_NONTRIGGER(self->dma_rx_ctrl, read_addr) = rx_data_blocks; + DMA_NONTRIGGER(self->dma_rx_ctrl, write_addr) = DMA_CHAN_ADDR(self->dma_rx_data, typeof(rx_data_blocks[0])); + DMA_NONTRIGGER(self->dma_rx_ctrl, trans_count) = DMA_CHAN_WR_TRANS_COUNT(typeof(rx_data_blocks[0])); + DMA_NONTRIGGER(self->dma_rx_ctrl, ctrl) = (DMA_CTRL_ENABLE + | DMA_CHAN_WR_CTRL(typeof(rx_data_blocks[0])) + | DMA_CTRL_INCR_READ + | DMA_CTRL_CHAIN_TO(self->dma_rx_data) + | DMA_CTRL_TREQ_SEL(DREQ_FORCE) + | DMA_CTRL_IRQ_QUIET); + + /* Run. */ uint64_t now = LO_CALL(bootclock, get_time_ns); if (now < self->dead_until_ns) sleep_until_ns(self->dead_until_ns); + /* TODO: Use interrupts instead of busy-polling. */ gpio_put(self->pin_cs, 0); - /* TODO: Replace blocking reads+writes with DMA. */ - for (int i = 0; i < iovcnt; i++) { - if (iov[i].iov_write_src && iov[i].iov_read_dst) - spi_write_read_blocking(inst, iov[i].iov_write_src, iov[i].iov_read_dst, iov[i].iov_len); - else if (iov[i].iov_write_src) - spi_write_blocking(inst, iov[i].iov_write_src, iov[i].iov_len); - else if (iov[i].iov_read_dst) - spi_read_blocking(inst, self->bogus_data, iov[i].iov_read_dst, iov[i].iov_len); - else - assert_notreached("duplex_iovec is neither read nor write"); - } + dma_hw->multi_channel_trigger = (1u<dma_tx_ctrl) | (1u<dma_rx_ctrl); + while (dma_channel_is_busy(self->dma_tx_ctrl) + || dma_channel_is_busy(self->dma_tx_data) + || dma_channel_is_busy(self->dma_rx_ctrl) + || dma_channel_is_busy(self->dma_rx_data)) + tight_loop_contents(); + __compiler_memory_barrier(); gpio_put(self->pin_cs, 1); self->dead_until_ns = LO_CALL(bootclock, get_time_ns) + self->min_delay_ns; } diff --git a/libhw/rp2040_include/libhw/rp2040_hwspi.h b/libhw/rp2040_include/libhw/rp2040_hwspi.h index fef1dbd..f90c1af 100644 --- a/libhw/rp2040_include/libhw/rp2040_hwspi.h +++ b/libhw/rp2040_include/libhw/rp2040_hwspi.h @@ -25,6 +25,10 @@ struct rp2040_hwspi { uint64_t min_delay_ns; uint8_t bogus_data; uint pin_cs; + uint dma_tx_data; + uint dma_tx_ctrl; + uint dma_rx_data; + uint dma_rx_ctrl; /* mutable */ uint64_t dead_until_ns; @@ -47,6 +51,7 @@ LO_IMPLEMENTATION_H(spi, struct rp2040_hwspi, rp2040_hwspi) * @param pin_mosi : uint : pin number; 3, 7, 19, or 23 for _HWSPI_0; 11, 15, or 27 for _HWSPI_1 * @param pin_clk : uint : pin number; 2, 6, 18, or 22 for _HWSPI_0; 10, 14, or 26 for _HWSPI_1 * @param pin_cs : uint : pin number; any unused GPIO pin + * @param dma{1-4} : uint : DMA channel; any unused channel * * There is no bit-order argument; the RP2040's hardware SPI always * uses MSB-first bit order. @@ -76,7 +81,8 @@ LO_IMPLEMENTATION_H(spi, struct rp2040_hwspi, rp2040_hwspi) #define rp2040_hwspi_init(self, name, \ inst_num, mode, baudrate_hz, \ min_delay_ns, bogus_data, \ - pin_miso, pin_mosi, pin_clk, pin_cs) \ + pin_miso, pin_mosi, pin_clk, pin_cs, \ + dma1, dma2, dma3, dma4) \ do { \ bi_decl(bi_4pins_with_names(pin_miso, name" SPI MISO", \ pin_mosi, name" SPI MOSI", \ @@ -85,7 +91,8 @@ LO_IMPLEMENTATION_H(spi, struct rp2040_hwspi, rp2040_hwspi) _rp2040_hwspi_init(self, \ inst_num, mode, baudrate_hz, \ min_delay_ns, bogus_data, \ - pin_miso, pin_mosi, pin_clk, pin_cs); \ + pin_miso, pin_mosi, pin_clk, pin_cs, \ + dma1, dma2, dma3, dma4); \ } while(0) void _rp2040_hwspi_init(struct rp2040_hwspi *self, enum rp2040_hwspi_instance inst_num, @@ -96,6 +103,10 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, uint pin_miso, uint pin_mosi, uint pin_clk, - uint pin_cs); + uint pin_cs, + uint dma1, + uint dma2, + uint dma3, + uint dma4); #endif /* _LIBHW_RP2040_HWSPI_H_ */ -- cgit v1.2.3-2-g168b From 7a1f764cda070de396fcb186e1f7457ff2704ba4 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Sun, 2 Mar 2025 23:22:40 -0700 Subject: Bump the CPU clock speed, wring a few more MHz out of the hwspi --- cmd/sbc_harness/CMakeLists.txt | 10 ++++++++++ cmd/sbc_harness/main.c | 2 +- libhw/rp2040_include/libhw/rp2040_hwspi.h | 4 ++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cmd/sbc_harness/CMakeLists.txt b/cmd/sbc_harness/CMakeLists.txt index 081a5fc..d7923c2 100644 --- a/cmd/sbc_harness/CMakeLists.txt +++ b/cmd/sbc_harness/CMakeLists.txt @@ -31,6 +31,16 @@ target_link_libraries(sbc_harness_objs pico_minimize_runtime(sbc_harness_objs INCLUDE PRINTF PRINTF_MINIMAL PRINTF_LONG_LONG PRINTF_PTRDIFF_T ) +target_compile_definitions(sbc_harness_objs PRIVATE + #PICO_USE_FASTEST_SUPPORTED_CLOCK=1 + + # Calculated by `./3rd-party/pico-sdk/src/rp2_common/hardware_clocks/scripts/vcocalc.py --cmake-only 170` + PLL_SYS_REFDIV=2 + PLL_SYS_VCO_FREQ_HZ=1530000000 + PLL_SYS_POSTDIV1=3 + PLL_SYS_POSTDIV2=3 + SYS_CLK_HZ=170000000 +) suppress_tinyusb_warnings() diff --git a/cmd/sbc_harness/main.c b/cmd/sbc_harness/main.c index 8e2c5ee..c932ec0 100644 --- a/cmd/sbc_harness/main.c +++ b/cmd/sbc_harness/main.c @@ -168,7 +168,7 @@ COROUTINE init_cr(void *) { rp2040_hwspi_init(&globals.dev_spi, "W5500", RP2040_HWSPI_0, SPI_MODE_0, /* the W5500 supports mode 0 or mode 3 */ - 31250000, /* min(w5500, hwspi); w5500=80MHz; hwspi=31.25MHz, see rp2040_hwspi.h for a comment about why this is so low */ + 42500000, /* min(w5500, hwspi); w5500=80MHz; hwspi=42.5MHz, see rp2040_hwspi.h for a comment about why this is so low */ 30, /* W5500 datasheet says min(T_CS = SCSn High Time) = 30ns */ 0, /* bogus write write data when doing a read */ 16, /* PIN_MISO */ diff --git a/libhw/rp2040_include/libhw/rp2040_hwspi.h b/libhw/rp2040_include/libhw/rp2040_hwspi.h index f90c1af..a76a2c8 100644 --- a/libhw/rp2040_include/libhw/rp2040_hwspi.h +++ b/libhw/rp2040_include/libhw/rp2040_hwspi.h @@ -73,6 +73,10 @@ LO_IMPLEMENTATION_H(spi, struct rp2040_hwspi, rp2040_hwspi) * that the CPU can't do things in time. * + Experimentally: * | clk_sys=125MHz | baud=31.25MHz | works OK | + * | clk_sys=160MHz | baud=40 MHz | works OK | + * | clk_sys=170MHz | baud=42.5 MHz | works OK | + * | clk_sys=180MHz | baud=45 MHz | mangled in funny ways? | + * | clk_sys=200MHz | baud=50 MHz | messages get shifted right a bit | * | clk_sys=125MHz | baud=62.5 MHz | messages get shifted right a bit | * * Both of these restrictions aught to be avoidable by using a -- cgit v1.2.3-2-g168b From 3f49a57b99e7fe5aafa73e70ed146d98b1ae174c Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Mon, 24 Feb 2025 22:54:30 -0700 Subject: libhw: rp2040_hwspi: Use interrupts instead of busy-polling --- build-aux/stack.c.gen | 5 +++ libhw/CMakeLists.txt | 1 + libhw/rp2040_dma.c | 56 +++++++++++++++++++++++++++++++ libhw/rp2040_dma.h | 23 ++++++++++--- libhw/rp2040_hwspi.c | 21 +++++++----- libhw/rp2040_include/libhw/rp2040_hwspi.h | 2 ++ 6 files changed, 95 insertions(+), 13 deletions(-) create mode 100644 libhw/rp2040_dma.c diff --git a/build-aux/stack.c.gen b/build-aux/stack.c.gen index 66d837a..60f51fe 100755 --- a/build-aux/stack.c.gen +++ b/build-aux/stack.c.gen @@ -505,6 +505,7 @@ class LibHWPlugin: "hostclock_handle_sig_alarm", "hostnet_handle_sig_io", "gpioirq_handler", + "dmairq_handler", ] def extra_nodes(self) -> typing.Collection[Node]: @@ -527,6 +528,10 @@ class LibHWPlugin: return [ "w5500_intrhandler", ], False + if "/rp2040_dmairq.c:" in loc and "handler->fn" in line: + return [ + "rp2040_hwspi_intrhandler", + ], False return None def skip_call(self, chain: list[str], call: str) -> bool: diff --git a/libhw/CMakeLists.txt b/libhw/CMakeLists.txt index bd92e04..242a3fa 100644 --- a/libhw/CMakeLists.txt +++ b/libhw/CMakeLists.txt @@ -14,6 +14,7 @@ if (PICO_PLATFORM STREQUAL "rp2040") libcr_ipc ) target_sources(libhw INTERFACE + rp2040_dma.c rp2040_gpioirq.c rp2040_hwspi.c rp2040_hwtimer.c diff --git a/libhw/rp2040_dma.c b/libhw/rp2040_dma.c new file mode 100644 index 0000000..dfbf136 --- /dev/null +++ b/libhw/rp2040_dma.c @@ -0,0 +1,56 @@ +/* libhw/rp2040_dma.c - Utilities for sharing the DMA IRQs + * + * Copyright (C) 2025 Luke T. Shumaker + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#include + +#include /* for irq_set_exclusive_handler() */ + +#include "rp2040_dma.h" + +struct dmairq_handler_entry { + dmairq_handler_t fn; + void *arg; +}; +struct dmairq_handler_entry dmairq_handlers[NUM_DMA_CHANNELS] = {0}; + +bool dmairq_initialized[NUM_DMA_IRQS] = {0}; + +static void dmairq_handler(void) { + enum dmairq irq = __get_current_exception() - VTABLE_FIRST_IRQ; + size_t irq_idx = irq - DMAIRQ_0; + assert(irq_idx < NUM_DMA_IRQS); + + uint32_t regval = dma_hw->irq_ctrl[irq_idx].ints; + for (uint channel = 0; channel < NUM_DMA_CHANNELS; channel++) { + if (regval & 1u<fn) + handler->fn(handler->arg, irq, channel); + } + } + /* acknowledge irq */ + dma_hw->intr = regval; +} + +void dmairq_set_and_enable_exclusive_handler(enum dmairq irq, uint channel, dmairq_handler_t fn, void *arg) { + assert(irq == DMAIRQ_0 || irq == DMAIRQ_1); + assert(channel < NUM_DMA_CHANNELS); + assert(fn); + + assert(dmairq_handlers[channel].fn == NULL); + + dmairq_handlers[channel].fn = fn; + dmairq_handlers[channel].arg = arg; + + size_t irq_idx = irq - DMAIRQ_0; + hw_set_bits(&dma_hw->irq_ctrl[irq_idx].inte, 1u<ch[channel].al1_ctrl & DMA_CH0_CTRL_TRIG_BUSY_BITS; -} - /* Our own code ***************************************************************/ +enum dmairq { + DMAIRQ_0 = DMA_IRQ_0, + DMAIRQ_1 = DMA_IRQ_1, +}; + +typedef void (*dmairq_handler_t)(void *arg, enum dmairq irq, uint channel); + +/** + * Register `fn(arg, ...)` to be called when `channel` completes or + * has a NULL trigger (depending on the channel's configuration). + * + * Your handler does not need to acknowledge the IRQ; that will be + * done for you after your handler is called. + * + * It is illegal to enable the same channel on more than one IRQ. + */ +void dmairq_set_and_enable_exclusive_handler(enum dmairq irq, uint channel, dmairq_handler_t fn, void *arg); + #define DMA_CTRL_ENABLE (1<<0) #define DMA_CTRL_HI_PRIO (1<<1) #define DMA_CTRL_DATA_SIZE(sz) ((sz)<<2) diff --git a/libhw/rp2040_hwspi.c b/libhw/rp2040_hwspi.c index 1c4e096..f747b1e 100644 --- a/libhw/rp2040_hwspi.c +++ b/libhw/rp2040_hwspi.c @@ -33,6 +33,12 @@ LO_IMPLEMENTATION_C(io_duplex_readwriter, struct rp2040_hwspi, rp2040_hwspi, static) LO_IMPLEMENTATION_C(spi, struct rp2040_hwspi, rp2040_hwspi, static) +static void rp2040_hwspi_intrhandler(void *_self, enum dmairq LM_UNUSED(irq), uint LM_UNUSED(channel)) { + struct rp2040_hwspi *self = _self; + gpio_put(self->pin_cs, 1); + cr_sema_signal_from_intrhandler(&self->sema); +} + #define assert_4distinct(a, b, c, d) \ assert(a != b); \ assert(a != c); \ @@ -124,6 +130,10 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, self->dma_tx_data = dma3; self->dma_rx_data = dma4; self->dead_until_ns = 0; + self->sema = (cr_sema_t){0}; + + /* Initialize the interrupt handler. */ + dmairq_set_and_enable_exclusive_handler(DMAIRQ_0, self->dma_rx_data, rp2040_hwspi_intrhandler, self); } static void rp2040_hwspi_readwritev(struct rp2040_hwspi *self, const struct duplex_iovec *iov, int iovcnt) { @@ -225,15 +235,10 @@ static void rp2040_hwspi_readwritev(struct rp2040_hwspi *self, const struct dupl uint64_t now = LO_CALL(bootclock, get_time_ns); if (now < self->dead_until_ns) sleep_until_ns(self->dead_until_ns); - /* TODO: Use interrupts instead of busy-polling. */ + bool saved = cr_save_and_disable_interrupts(); gpio_put(self->pin_cs, 0); dma_hw->multi_channel_trigger = (1u<dma_tx_ctrl) | (1u<dma_rx_ctrl); - while (dma_channel_is_busy(self->dma_tx_ctrl) - || dma_channel_is_busy(self->dma_tx_data) - || dma_channel_is_busy(self->dma_rx_ctrl) - || dma_channel_is_busy(self->dma_rx_data)) - tight_loop_contents(); - __compiler_memory_barrier(); - gpio_put(self->pin_cs, 1); + cr_restore_interrupts(saved); + cr_sema_wait(&self->sema); self->dead_until_ns = LO_CALL(bootclock, get_time_ns) + self->min_delay_ns; } diff --git a/libhw/rp2040_include/libhw/rp2040_hwspi.h b/libhw/rp2040_include/libhw/rp2040_hwspi.h index a76a2c8..eb54cdc 100644 --- a/libhw/rp2040_include/libhw/rp2040_hwspi.h +++ b/libhw/rp2040_include/libhw/rp2040_hwspi.h @@ -9,6 +9,7 @@ #include /* for bi_* */ +#include #include #include @@ -32,6 +33,7 @@ struct rp2040_hwspi { /* mutable */ uint64_t dead_until_ns; + cr_sema_t sema; END_PRIVATE(LIBHW_RP2040_HWSPI_H) }; LO_IMPLEMENTATION_H(io_duplex_readwriter, struct rp2040_hwspi, rp2040_hwspi) -- cgit v1.2.3-2-g168b