diff options
Diffstat (limited to 'libhw/rp2040_hwspi.c')
-rw-r--r-- | libhw/rp2040_hwspi.c | 131 |
1 files changed, 115 insertions, 16 deletions
diff --git a/libhw/rp2040_hwspi.c b/libhw/rp2040_hwspi.c index 8dd49d6..1c4e096 100644 --- a/libhw/rp2040_hwspi.c +++ b/libhw/rp2040_hwspi.c @@ -4,12 +4,14 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ +#include <alloca.h> #include <inttypes.h> /* for PRIu{n} */ #include <hardware/clocks.h> /* for clock_get_hz() and clk_peri */ #include <hardware/gpio.h> #include <hardware/spi.h> +#include <libcr/coroutine.h> #include <libmisc/assert.h> #define LOG_NAME RP2040_SPI @@ -20,6 +22,8 @@ #include <libhw/generic/alarmclock.h> +#include "rp2040_dma.h" + #include "config.h" #ifndef CONFIG_RP2040_SPI_DEBUG @@ -46,7 +50,12 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, uint pin_miso, uint pin_mosi, uint pin_clk, - uint pin_cs) { + uint pin_cs, + uint dma1, + uint dma2, + uint dma3, + uint dma4) +{ /* Be not weary: This is but 12 lines of actual code; and many * lines of comments and assert()s. */ spi_inst_t *inst; @@ -58,6 +67,7 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, debugf("clk_peri = %"PRIu32"Hz", clk_peri_hz); assert(baudrate_hz*2 <= clk_peri_hz); assert_4distinct(pin_miso, pin_mosi, pin_clk, pin_cs); + assert_4distinct(dma1, dma2, dma3, dma4); /* Regarding the constraints on pin assignments: see the * RP2040 datasheet, table 2, in ยง1.4.3 "GPIO Functions". */ @@ -109,32 +119,121 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self, self->min_delay_ns = min_delay_ns; self->bogus_data = bogus_data; self->pin_cs = pin_cs; + self->dma_tx_ctrl = dma1; + self->dma_rx_ctrl = dma2; + self->dma_tx_data = dma3; + self->dma_rx_data = dma4; self->dead_until_ns = 0; } static void rp2040_hwspi_readwritev(struct rp2040_hwspi *self, const struct duplex_iovec *iov, int iovcnt) { assert(self); - spi_inst_t *inst = self->inst; - - assert(inst); + assert(self->inst); assert(iov); - assert(iovcnt); + assert(iovcnt > 0); + /* At this time, I have no intention to run SPI faster than + * 80MHz (= 80Mb/s = 10MB/s). If we ran the CPU at just + * 100MHz (we'll be running it faster than that, maybe even + * 200MHz), that means we'd have 10 clock cycles to send each + * byte. + * + * This affords us substantial simplifications, like being + * able to afford 4-cycle changeovers between DMA blocks, and + * not having to worry about alignment because we can just use + * DMA_SIZE_8. + */ + + uint8_t bogus_rx_dst; + + int pruned_iovcnt = 0; + for (int i = 0; i < iovcnt; i++) + if (iov[i].iov_len) + pruned_iovcnt++; + if (!pruned_iovcnt) + return; + + /* For tx_data_blocks, it doesn't really matter which aliases + * we choose: + * - None of our fields can be NULL (so no + * false-termination). + * - Moving const fields first so they don't have to be + * re-programmed each time isn't possible for us there need + * to be at least 2 const fields, and we only have 1 + * (read_addr for rx_data_blocks, and write_addr for + * tx_data_blocks). + * + * But for rx_data_blocks, we need ctrl to be the trigger + * register so that the DMA_CTRL_IRQ_QUIET flag isn't cleared + * before we get to the trigger; and while for tx_data_blocks + * it doesn't really matter, the inverse would be nice. + */ + struct dma_alias1 *tx_data_blocks = alloca(sizeof(struct dma_alias1)*(pruned_iovcnt+1)); + struct dma_alias0 *rx_data_blocks = alloca(sizeof(struct dma_alias0)*(pruned_iovcnt+1)); + + for (int i = 0, j = 0; i < iovcnt; i++) { + if (!iov[i].iov_len) + continue; + tx_data_blocks[j] = (typeof(tx_data_blocks[0])){ + .read_addr = iov[i].iov_write_src ?: &self->bogus_data, + .write_addr = &spi_get_hw(self->inst)->dr, + .trans_count = iov[i].iov_len, + .ctrl = (DMA_CTRL_ENABLE + | DMA_CTRL_DATA_SIZE(DMA_SIZE_8) + | (iov[i].iov_write_src ? DMA_CTRL_INCR_READ : 0) + | DMA_CTRL_CHAIN_TO(self->dma_tx_ctrl) + | DMA_CTRL_TREQ_SEL(SPI_DREQ_NUM(self->inst, true)) + | DMA_CTRL_IRQ_QUIET), + }; + rx_data_blocks[j] = (typeof(rx_data_blocks[0])){ + .read_addr = &spi_get_hw(self->inst)->dr, + .write_addr = iov[i].iov_read_dst ?: &bogus_rx_dst, + .trans_count = iov[i].iov_len, + .ctrl = (DMA_CTRL_ENABLE + | DMA_CTRL_DATA_SIZE(DMA_SIZE_8) + | (iov[i].iov_read_dst ? DMA_CTRL_INCR_WRITE : 0) + | DMA_CTRL_CHAIN_TO(self->dma_rx_ctrl) + | DMA_CTRL_TREQ_SEL(SPI_DREQ_NUM(self->inst, false)) + | DMA_CTRL_IRQ_QUIET), + }; + j++; + } + tx_data_blocks[pruned_iovcnt] = (typeof(tx_data_blocks[0])){0}; + rx_data_blocks[pruned_iovcnt] = (typeof(rx_data_blocks[0])){0}; + + /* Set up ctrl. */ + DMA_NONTRIGGER(self->dma_tx_ctrl, read_addr) = tx_data_blocks; + DMA_NONTRIGGER(self->dma_tx_ctrl, write_addr) = DMA_CHAN_ADDR(self->dma_tx_data, typeof(tx_data_blocks[0])); + DMA_NONTRIGGER(self->dma_tx_ctrl, trans_count) = DMA_CHAN_WR_TRANS_COUNT(typeof(tx_data_blocks[0])); + DMA_NONTRIGGER(self->dma_tx_ctrl, ctrl) = (DMA_CTRL_ENABLE + | DMA_CHAN_WR_CTRL(typeof(tx_data_blocks[0])) + | DMA_CTRL_INCR_READ + | DMA_CTRL_CHAIN_TO(self->dma_tx_data) + | DMA_CTRL_TREQ_SEL(DREQ_FORCE) + | DMA_CTRL_IRQ_QUIET); + DMA_NONTRIGGER(self->dma_rx_ctrl, read_addr) = rx_data_blocks; + DMA_NONTRIGGER(self->dma_rx_ctrl, write_addr) = DMA_CHAN_ADDR(self->dma_rx_data, typeof(rx_data_blocks[0])); + DMA_NONTRIGGER(self->dma_rx_ctrl, trans_count) = DMA_CHAN_WR_TRANS_COUNT(typeof(rx_data_blocks[0])); + DMA_NONTRIGGER(self->dma_rx_ctrl, ctrl) = (DMA_CTRL_ENABLE + | DMA_CHAN_WR_CTRL(typeof(rx_data_blocks[0])) + | DMA_CTRL_INCR_READ + | DMA_CTRL_CHAIN_TO(self->dma_rx_data) + | DMA_CTRL_TREQ_SEL(DREQ_FORCE) + | DMA_CTRL_IRQ_QUIET); + + /* Run. */ uint64_t now = LO_CALL(bootclock, get_time_ns); if (now < self->dead_until_ns) sleep_until_ns(self->dead_until_ns); + /* TODO: Use interrupts instead of busy-polling. */ gpio_put(self->pin_cs, 0); - /* TODO: Replace blocking reads+writes with DMA. */ - for (int i = 0; i < iovcnt; i++) { - if (iov[i].iov_write_src && iov[i].iov_read_dst) - spi_write_read_blocking(inst, iov[i].iov_write_src, iov[i].iov_read_dst, iov[i].iov_len); - else if (iov[i].iov_write_src) - spi_write_blocking(inst, iov[i].iov_write_src, iov[i].iov_len); - else if (iov[i].iov_read_dst) - spi_read_blocking(inst, self->bogus_data, iov[i].iov_read_dst, iov[i].iov_len); - else - assert_notreached("duplex_iovec is neither read nor write"); - } + dma_hw->multi_channel_trigger = (1u<<self->dma_tx_ctrl) | (1u<<self->dma_rx_ctrl); + while (dma_channel_is_busy(self->dma_tx_ctrl) + || dma_channel_is_busy(self->dma_tx_data) + || dma_channel_is_busy(self->dma_rx_ctrl) + || dma_channel_is_busy(self->dma_rx_data)) + tight_loop_contents(); + __compiler_memory_barrier(); gpio_put(self->pin_cs, 1); self->dead_until_ns = LO_CALL(bootclock, get_time_ns) + self->min_delay_ns; } |