From 5a3d7bafd47067e9659c5773e371e796e6d3585b Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Mon, 24 Feb 2025 22:54:30 -0700 Subject: libhw: rp2040_hwspi: Pull out a assert_4distinct macro --- libhw/rp2040_hwspi.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'libhw/rp2040_hwspi.c') diff --git a/libhw/rp2040_hwspi.c b/libhw/rp2040_hwspi.c index 8c87666..bdfaa62 100644 --- a/libhw/rp2040_hwspi.c +++ b/libhw/rp2040_hwspi.c @@ -15,6 +15,14 @@ LO_IMPLEMENTATION_C(io_duplex_readwriter, struct rp2040_hwspi, rp2040_hwspi, static) LO_IMPLEMENTATION_C(spi, struct rp2040_hwspi, rp2040_hwspi, static) +#define assert_4distinct(a, b, c, d) \ + assert(a != b); \ + assert(a != c); \ + assert(a != d); \ + assert(b != c); \ + assert(b != d); \ + assert(c != d); + void _rp2040_hwspi_init(struct rp2040_hwspi *self, enum rp2040_hwspi_instance inst_num, enum spi_mode mode, @@ -29,12 +37,7 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, assert(self); assert(baudrate_hz); - assert(pin_miso != pin_mosi); - assert(pin_miso != pin_clk); - assert(pin_miso != pin_cs); - assert(pin_mosi != pin_clk); - assert(pin_mosi != pin_cs); - assert(pin_clk != pin_cs); + assert_4distinct(pin_miso, pin_mosi, pin_clk, pin_cs); /* Regarding the constraints on pin assignments: see the * RP2040 datasheet, table 2, in §1.4.3 "GPIO Functions". */ -- cgit v1.2.3-2-g168b From 6354a440b24d9a0b157c8c5571403f09dd04c245 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Sun, 2 Mar 2025 21:16:35 -0700 Subject: libhw: Update comments and asserts about clock rate --- libhw/rp2040_hwspi.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'libhw/rp2040_hwspi.c') diff --git a/libhw/rp2040_hwspi.c b/libhw/rp2040_hwspi.c index bdfaa62..ac46451 100644 --- a/libhw/rp2040_hwspi.c +++ b/libhw/rp2040_hwspi.c @@ -4,14 +4,26 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -#include /* pico-sdk:hardware_spi */ -#include /* pico-sdk:hardware_gpio */ +#include /* for PRIu{n} */ + +#include /* for clock_get_hz() and clk_peri */ +#include +#include #include +#define LOG_NAME RP2040_SPI +#include + #define IMPLEMENTATION_FOR_LIBHW_RP2040_HWSPI_H YES #include +#include "config.h" + +#ifndef CONFIG_RP2040_SPI_DEBUG + #error config.h must define CONFIG_RP2040_SPI_DEBUG (bool) +#endif + LO_IMPLEMENTATION_C(io_duplex_readwriter, struct rp2040_hwspi, rp2040_hwspi, static) LO_IMPLEMENTATION_C(spi, struct rp2040_hwspi, rp2040_hwspi, static) @@ -34,9 +46,13 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, /* Be not weary: This is but 12 lines of actual code; and many * lines of comments and assert()s. */ spi_inst_t *inst; + uint actual_baudrate_hz; assert(self); assert(baudrate_hz); + uint32_t clk_peri_hz = clock_get_hz(clk_peri); + debugf("clk_peri = %"PRIu32"Hz", clk_peri_hz); + assert(baudrate_hz*2 <= clk_peri_hz); assert_4distinct(pin_miso, pin_mosi, pin_clk, pin_cs); /* Regarding the constraints on pin assignments: see the @@ -60,7 +76,9 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, assert_notreached("invalid hwspi instance number"); } - spi_init(inst, baudrate_hz); + actual_baudrate_hz = spi_init(inst, baudrate_hz); + debugf("baudrate = %uHz", actual_baudrate_hz); + assert(actual_baudrate_hz == baudrate_hz); spi_set_format(inst, 8, (mode & 0b10) ? SPI_CPOL_1 : SPI_CPOL_0, (mode & 0b01) ? SPI_CPHA_1 : SPI_CPHA_0, -- cgit v1.2.3-2-g168b From fb73355711c99003c559df48164a1ce6db93cff9 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Tue, 4 Mar 2025 00:05:35 -0700 Subject: libhw: rp2040_hwspi: Add more config knobs --- libhw/rp2040_hwspi.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'libhw/rp2040_hwspi.c') diff --git a/libhw/rp2040_hwspi.c b/libhw/rp2040_hwspi.c index ac46451..8dd49d6 100644 --- a/libhw/rp2040_hwspi.c +++ b/libhw/rp2040_hwspi.c @@ -18,6 +18,8 @@ #define IMPLEMENTATION_FOR_LIBHW_RP2040_HWSPI_H YES #include +#include + #include "config.h" #ifndef CONFIG_RP2040_SPI_DEBUG @@ -39,6 +41,8 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, enum rp2040_hwspi_instance inst_num, enum spi_mode mode, uint baudrate_hz, + uint64_t min_delay_ns, + uint8_t bogus_data, uint pin_miso, uint pin_mosi, uint pin_clk, @@ -100,9 +104,12 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, gpio_set_dir(pin_cs, GPIO_OUT); gpio_put(pin_cs, 1); - /* Return. */ + /* Initialize self. */ self->inst = inst; + self->min_delay_ns = min_delay_ns; + self->bogus_data = bogus_data; self->pin_cs = pin_cs; + self->dead_until_ns = 0; } static void rp2040_hwspi_readwritev(struct rp2040_hwspi *self, const struct duplex_iovec *iov, int iovcnt) { @@ -113,6 +120,9 @@ static void rp2040_hwspi_readwritev(struct rp2040_hwspi *self, const struct dupl assert(iov); assert(iovcnt); + uint64_t now = LO_CALL(bootclock, get_time_ns); + if (now < self->dead_until_ns) + sleep_until_ns(self->dead_until_ns); gpio_put(self->pin_cs, 0); /* TODO: Replace blocking reads+writes with DMA. */ for (int i = 0; i < iovcnt; i++) { @@ -121,9 +131,10 @@ static void rp2040_hwspi_readwritev(struct rp2040_hwspi *self, const struct dupl else if (iov[i].iov_write_src) spi_write_blocking(inst, iov[i].iov_write_src, iov[i].iov_len); else if (iov[i].iov_read_dst) - spi_read_blocking(inst, 0, iov[i].iov_read_dst, iov[i].iov_len); + spi_read_blocking(inst, self->bogus_data, iov[i].iov_read_dst, iov[i].iov_len); else assert_notreached("duplex_iovec is neither read nor write"); } gpio_put(self->pin_cs, 1); + self->dead_until_ns = LO_CALL(bootclock, get_time_ns) + self->min_delay_ns; } -- cgit v1.2.3-2-g168b From c336bf7f2205131c86e6d2991770a2c150d85ca9 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Mon, 24 Feb 2025 22:54:30 -0700 Subject: libhw: rp2040_hwspi: Use DMA --- libhw/rp2040_hwspi.c | 131 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 115 insertions(+), 16 deletions(-) (limited to 'libhw/rp2040_hwspi.c') diff --git a/libhw/rp2040_hwspi.c b/libhw/rp2040_hwspi.c index 8dd49d6..1c4e096 100644 --- a/libhw/rp2040_hwspi.c +++ b/libhw/rp2040_hwspi.c @@ -4,12 +4,14 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ +#include #include /* for PRIu{n} */ #include /* for clock_get_hz() and clk_peri */ #include #include +#include #include #define LOG_NAME RP2040_SPI @@ -20,6 +22,8 @@ #include +#include "rp2040_dma.h" + #include "config.h" #ifndef CONFIG_RP2040_SPI_DEBUG @@ -46,7 +50,12 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, uint pin_miso, uint pin_mosi, uint pin_clk, - uint pin_cs) { + uint pin_cs, + uint dma1, + uint dma2, + uint dma3, + uint dma4) +{ /* Be not weary: This is but 12 lines of actual code; and many * lines of comments and assert()s. */ spi_inst_t *inst; @@ -58,6 +67,7 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, debugf("clk_peri = %"PRIu32"Hz", clk_peri_hz); assert(baudrate_hz*2 <= clk_peri_hz); assert_4distinct(pin_miso, pin_mosi, pin_clk, pin_cs); + assert_4distinct(dma1, dma2, dma3, dma4); /* Regarding the constraints on pin assignments: see the * RP2040 datasheet, table 2, in §1.4.3 "GPIO Functions". */ @@ -109,32 +119,121 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, self->min_delay_ns = min_delay_ns; self->bogus_data = bogus_data; self->pin_cs = pin_cs; + self->dma_tx_ctrl = dma1; + self->dma_rx_ctrl = dma2; + self->dma_tx_data = dma3; + self->dma_rx_data = dma4; self->dead_until_ns = 0; } static void rp2040_hwspi_readwritev(struct rp2040_hwspi *self, const struct duplex_iovec *iov, int iovcnt) { assert(self); - spi_inst_t *inst = self->inst; - - assert(inst); + assert(self->inst); assert(iov); - assert(iovcnt); + assert(iovcnt > 0); + /* At this time, I have no intention to run SPI faster than + * 80MHz (= 80Mb/s = 10MB/s). If we ran the CPU at just + * 100MHz (we'll be running it faster than that, maybe even + * 200MHz), that means we'd have 10 clock cycles to send each + * byte. + * + * This affords us substantial simplifications, like being + * able to afford 4-cycle changeovers between DMA blocks, and + * not having to worry about alignment because we can just use + * DMA_SIZE_8. + */ + + uint8_t bogus_rx_dst; + + int pruned_iovcnt = 0; + for (int i = 0; i < iovcnt; i++) + if (iov[i].iov_len) + pruned_iovcnt++; + if (!pruned_iovcnt) + return; + + /* For tx_data_blocks, it doesn't really matter which aliases + * we choose: + * - None of our fields can be NULL (so no + * false-termination). + * - Moving const fields first so they don't have to be + * re-programmed each time isn't possible for us there need + * to be at least 2 const fields, and we only have 1 + * (read_addr for rx_data_blocks, and write_addr for + * tx_data_blocks). + * + * But for rx_data_blocks, we need ctrl to be the trigger + * register so that the DMA_CTRL_IRQ_QUIET flag isn't cleared + * before we get to the trigger; and while for tx_data_blocks + * it doesn't really matter, the inverse would be nice. + */ + struct dma_alias1 *tx_data_blocks = alloca(sizeof(struct dma_alias1)*(pruned_iovcnt+1)); + struct dma_alias0 *rx_data_blocks = alloca(sizeof(struct dma_alias0)*(pruned_iovcnt+1)); + + for (int i = 0, j = 0; i < iovcnt; i++) { + if (!iov[i].iov_len) + continue; + tx_data_blocks[j] = (typeof(tx_data_blocks[0])){ + .read_addr = iov[i].iov_write_src ?: &self->bogus_data, + .write_addr = &spi_get_hw(self->inst)->dr, + .trans_count = iov[i].iov_len, + .ctrl = (DMA_CTRL_ENABLE + | DMA_CTRL_DATA_SIZE(DMA_SIZE_8) + | (iov[i].iov_write_src ? DMA_CTRL_INCR_READ : 0) + | DMA_CTRL_CHAIN_TO(self->dma_tx_ctrl) + | DMA_CTRL_TREQ_SEL(SPI_DREQ_NUM(self->inst, true)) + | DMA_CTRL_IRQ_QUIET), + }; + rx_data_blocks[j] = (typeof(rx_data_blocks[0])){ + .read_addr = &spi_get_hw(self->inst)->dr, + .write_addr = iov[i].iov_read_dst ?: &bogus_rx_dst, + .trans_count = iov[i].iov_len, + .ctrl = (DMA_CTRL_ENABLE + | DMA_CTRL_DATA_SIZE(DMA_SIZE_8) + | (iov[i].iov_read_dst ? DMA_CTRL_INCR_WRITE : 0) + | DMA_CTRL_CHAIN_TO(self->dma_rx_ctrl) + | DMA_CTRL_TREQ_SEL(SPI_DREQ_NUM(self->inst, false)) + | DMA_CTRL_IRQ_QUIET), + }; + j++; + } + tx_data_blocks[pruned_iovcnt] = (typeof(tx_data_blocks[0])){0}; + rx_data_blocks[pruned_iovcnt] = (typeof(rx_data_blocks[0])){0}; + + /* Set up ctrl. */ + DMA_NONTRIGGER(self->dma_tx_ctrl, read_addr) = tx_data_blocks; + DMA_NONTRIGGER(self->dma_tx_ctrl, write_addr) = DMA_CHAN_ADDR(self->dma_tx_data, typeof(tx_data_blocks[0])); + DMA_NONTRIGGER(self->dma_tx_ctrl, trans_count) = DMA_CHAN_WR_TRANS_COUNT(typeof(tx_data_blocks[0])); + DMA_NONTRIGGER(self->dma_tx_ctrl, ctrl) = (DMA_CTRL_ENABLE + | DMA_CHAN_WR_CTRL(typeof(tx_data_blocks[0])) + | DMA_CTRL_INCR_READ + | DMA_CTRL_CHAIN_TO(self->dma_tx_data) + | DMA_CTRL_TREQ_SEL(DREQ_FORCE) + | DMA_CTRL_IRQ_QUIET); + DMA_NONTRIGGER(self->dma_rx_ctrl, read_addr) = rx_data_blocks; + DMA_NONTRIGGER(self->dma_rx_ctrl, write_addr) = DMA_CHAN_ADDR(self->dma_rx_data, typeof(rx_data_blocks[0])); + DMA_NONTRIGGER(self->dma_rx_ctrl, trans_count) = DMA_CHAN_WR_TRANS_COUNT(typeof(rx_data_blocks[0])); + DMA_NONTRIGGER(self->dma_rx_ctrl, ctrl) = (DMA_CTRL_ENABLE + | DMA_CHAN_WR_CTRL(typeof(rx_data_blocks[0])) + | DMA_CTRL_INCR_READ + | DMA_CTRL_CHAIN_TO(self->dma_rx_data) + | DMA_CTRL_TREQ_SEL(DREQ_FORCE) + | DMA_CTRL_IRQ_QUIET); + + /* Run. */ uint64_t now = LO_CALL(bootclock, get_time_ns); if (now < self->dead_until_ns) sleep_until_ns(self->dead_until_ns); + /* TODO: Use interrupts instead of busy-polling. */ gpio_put(self->pin_cs, 0); - /* TODO: Replace blocking reads+writes with DMA. */ - for (int i = 0; i < iovcnt; i++) { - if (iov[i].iov_write_src && iov[i].iov_read_dst) - spi_write_read_blocking(inst, iov[i].iov_write_src, iov[i].iov_read_dst, iov[i].iov_len); - else if (iov[i].iov_write_src) - spi_write_blocking(inst, iov[i].iov_write_src, iov[i].iov_len); - else if (iov[i].iov_read_dst) - spi_read_blocking(inst, self->bogus_data, iov[i].iov_read_dst, iov[i].iov_len); - else - assert_notreached("duplex_iovec is neither read nor write"); - } + dma_hw->multi_channel_trigger = (1u<dma_tx_ctrl) | (1u<dma_rx_ctrl); + while (dma_channel_is_busy(self->dma_tx_ctrl) + || dma_channel_is_busy(self->dma_tx_data) + || dma_channel_is_busy(self->dma_rx_ctrl) + || dma_channel_is_busy(self->dma_rx_data)) + tight_loop_contents(); + __compiler_memory_barrier(); gpio_put(self->pin_cs, 1); self->dead_until_ns = LO_CALL(bootclock, get_time_ns) + self->min_delay_ns; } -- cgit v1.2.3-2-g168b From 3f49a57b99e7fe5aafa73e70ed146d98b1ae174c Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Mon, 24 Feb 2025 22:54:30 -0700 Subject: libhw: rp2040_hwspi: Use interrupts instead of busy-polling --- libhw/rp2040_hwspi.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'libhw/rp2040_hwspi.c') diff --git a/libhw/rp2040_hwspi.c b/libhw/rp2040_hwspi.c index 1c4e096..f747b1e 100644 --- a/libhw/rp2040_hwspi.c +++ b/libhw/rp2040_hwspi.c @@ -33,6 +33,12 @@ LO_IMPLEMENTATION_C(io_duplex_readwriter, struct rp2040_hwspi, rp2040_hwspi, static) LO_IMPLEMENTATION_C(spi, struct rp2040_hwspi, rp2040_hwspi, static) +static void rp2040_hwspi_intrhandler(void *_self, enum dmairq LM_UNUSED(irq), uint LM_UNUSED(channel)) { + struct rp2040_hwspi *self = _self; + gpio_put(self->pin_cs, 1); + cr_sema_signal_from_intrhandler(&self->sema); +} + #define assert_4distinct(a, b, c, d) \ assert(a != b); \ assert(a != c); \ @@ -124,6 +130,10 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, self->dma_tx_data = dma3; self->dma_rx_data = dma4; self->dead_until_ns = 0; + self->sema = (cr_sema_t){0}; + + /* Initialize the interrupt handler. */ + dmairq_set_and_enable_exclusive_handler(DMAIRQ_0, self->dma_rx_data, rp2040_hwspi_intrhandler, self); } static void rp2040_hwspi_readwritev(struct rp2040_hwspi *self, const struct duplex_iovec *iov, int iovcnt) { @@ -225,15 +235,10 @@ static void rp2040_hwspi_readwritev(struct rp2040_hwspi *self, const struct dupl uint64_t now = LO_CALL(bootclock, get_time_ns); if (now < self->dead_until_ns) sleep_until_ns(self->dead_until_ns); - /* TODO: Use interrupts instead of busy-polling. */ + bool saved = cr_save_and_disable_interrupts(); gpio_put(self->pin_cs, 0); dma_hw->multi_channel_trigger = (1u<dma_tx_ctrl) | (1u<dma_rx_ctrl); - while (dma_channel_is_busy(self->dma_tx_ctrl) - || dma_channel_is_busy(self->dma_tx_data) - || dma_channel_is_busy(self->dma_rx_ctrl) - || dma_channel_is_busy(self->dma_rx_data)) - tight_loop_contents(); - __compiler_memory_barrier(); - gpio_put(self->pin_cs, 1); + cr_restore_interrupts(saved); + cr_sema_wait(&self->sema); self->dead_until_ns = LO_CALL(bootclock, get_time_ns) + self->min_delay_ns; } -- cgit v1.2.3-2-g168b