summaryrefslogtreecommitdiff
path: root/libhw/rp2040_hwspi.c
diff options
context:
space:
mode:
Diffstat (limited to 'libhw/rp2040_hwspi.c')
-rw-r--r--libhw/rp2040_hwspi.c131
1 files changed, 115 insertions, 16 deletions
diff --git a/libhw/rp2040_hwspi.c b/libhw/rp2040_hwspi.c
index 8dd49d6..1c4e096 100644
--- a/libhw/rp2040_hwspi.c
+++ b/libhw/rp2040_hwspi.c
@@ -4,12 +4,14 @@
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
+#include <alloca.h>
#include <inttypes.h> /* for PRIu{n} */
#include <hardware/clocks.h> /* for clock_get_hz() and clk_peri */
#include <hardware/gpio.h>
#include <hardware/spi.h>
+#include <libcr/coroutine.h>
#include <libmisc/assert.h>
#define LOG_NAME RP2040_SPI
@@ -20,6 +22,8 @@
#include <libhw/generic/alarmclock.h>
+#include "rp2040_dma.h"
+
#include "config.h"
#ifndef CONFIG_RP2040_SPI_DEBUG
@@ -46,7 +50,12 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self,
uint pin_miso,
uint pin_mosi,
uint pin_clk,
- uint pin_cs) {
+ uint pin_cs,
+ uint dma1,
+ uint dma2,
+ uint dma3,
+ uint dma4)
+{
/* Be not weary: This is but 12 lines of actual code; and many
* lines of comments and assert()s. */
spi_inst_t *inst;
@@ -58,6 +67,7 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self,
debugf("clk_peri = %"PRIu32"Hz", clk_peri_hz);
assert(baudrate_hz*2 <= clk_peri_hz);
assert_4distinct(pin_miso, pin_mosi, pin_clk, pin_cs);
+ assert_4distinct(dma1, dma2, dma3, dma4);
/* Regarding the constraints on pin assignments: see the
* RP2040 datasheet, table 2, in ยง1.4.3 "GPIO Functions". */
@@ -109,32 +119,121 @@ void _rp2040_hwspi_init(struct rp2040_hwspi *self,
self->min_delay_ns = min_delay_ns;
self->bogus_data = bogus_data;
self->pin_cs = pin_cs;
+ self->dma_tx_ctrl = dma1;
+ self->dma_rx_ctrl = dma2;
+ self->dma_tx_data = dma3;
+ self->dma_rx_data = dma4;
self->dead_until_ns = 0;
}
static void rp2040_hwspi_readwritev(struct rp2040_hwspi *self, const struct duplex_iovec *iov, int iovcnt) {
assert(self);
- spi_inst_t *inst = self->inst;
-
- assert(inst);
+ assert(self->inst);
assert(iov);
- assert(iovcnt);
+ assert(iovcnt > 0);
+ /* At this time, I have no intention to run SPI faster than
+ * 80MHz (= 80Mb/s = 10MB/s). If we ran the CPU at just
+ * 100MHz (we'll be running it faster than that, maybe even
+ * 200MHz), that means we'd have 10 clock cycles to send each
+ * byte.
+ *
+ * This affords us substantial simplifications, like being
+ * able to afford 4-cycle changeovers between DMA blocks, and
+ * not having to worry about alignment because we can just use
+ * DMA_SIZE_8.
+ */
+
+ uint8_t bogus_rx_dst;
+
+ int pruned_iovcnt = 0;
+ for (int i = 0; i < iovcnt; i++)
+ if (iov[i].iov_len)
+ pruned_iovcnt++;
+ if (!pruned_iovcnt)
+ return;
+
+ /* For tx_data_blocks, it doesn't really matter which aliases
+ * we choose:
+ * - None of our fields can be NULL (so no
+ * false-termination).
+ * - Moving const fields first so they don't have to be
+ * re-programmed each time isn't possible for us there need
+ * to be at least 2 const fields, and we only have 1
+ * (read_addr for rx_data_blocks, and write_addr for
+ * tx_data_blocks).
+ *
+ * But for rx_data_blocks, we need ctrl to be the trigger
+ * register so that the DMA_CTRL_IRQ_QUIET flag isn't cleared
+ * before we get to the trigger; and while for tx_data_blocks
+ * it doesn't really matter, the inverse would be nice.
+ */
+ struct dma_alias1 *tx_data_blocks = alloca(sizeof(struct dma_alias1)*(pruned_iovcnt+1));
+ struct dma_alias0 *rx_data_blocks = alloca(sizeof(struct dma_alias0)*(pruned_iovcnt+1));
+
+ for (int i = 0, j = 0; i < iovcnt; i++) {
+ if (!iov[i].iov_len)
+ continue;
+ tx_data_blocks[j] = (typeof(tx_data_blocks[0])){
+ .read_addr = iov[i].iov_write_src ?: &self->bogus_data,
+ .write_addr = &spi_get_hw(self->inst)->dr,
+ .trans_count = iov[i].iov_len,
+ .ctrl = (DMA_CTRL_ENABLE
+ | DMA_CTRL_DATA_SIZE(DMA_SIZE_8)
+ | (iov[i].iov_write_src ? DMA_CTRL_INCR_READ : 0)
+ | DMA_CTRL_CHAIN_TO(self->dma_tx_ctrl)
+ | DMA_CTRL_TREQ_SEL(SPI_DREQ_NUM(self->inst, true))
+ | DMA_CTRL_IRQ_QUIET),
+ };
+ rx_data_blocks[j] = (typeof(rx_data_blocks[0])){
+ .read_addr = &spi_get_hw(self->inst)->dr,
+ .write_addr = iov[i].iov_read_dst ?: &bogus_rx_dst,
+ .trans_count = iov[i].iov_len,
+ .ctrl = (DMA_CTRL_ENABLE
+ | DMA_CTRL_DATA_SIZE(DMA_SIZE_8)
+ | (iov[i].iov_read_dst ? DMA_CTRL_INCR_WRITE : 0)
+ | DMA_CTRL_CHAIN_TO(self->dma_rx_ctrl)
+ | DMA_CTRL_TREQ_SEL(SPI_DREQ_NUM(self->inst, false))
+ | DMA_CTRL_IRQ_QUIET),
+ };
+ j++;
+ }
+ tx_data_blocks[pruned_iovcnt] = (typeof(tx_data_blocks[0])){0};
+ rx_data_blocks[pruned_iovcnt] = (typeof(rx_data_blocks[0])){0};
+
+ /* Set up ctrl. */
+ DMA_NONTRIGGER(self->dma_tx_ctrl, read_addr) = tx_data_blocks;
+ DMA_NONTRIGGER(self->dma_tx_ctrl, write_addr) = DMA_CHAN_ADDR(self->dma_tx_data, typeof(tx_data_blocks[0]));
+ DMA_NONTRIGGER(self->dma_tx_ctrl, trans_count) = DMA_CHAN_WR_TRANS_COUNT(typeof(tx_data_blocks[0]));
+ DMA_NONTRIGGER(self->dma_tx_ctrl, ctrl) = (DMA_CTRL_ENABLE
+ | DMA_CHAN_WR_CTRL(typeof(tx_data_blocks[0]))
+ | DMA_CTRL_INCR_READ
+ | DMA_CTRL_CHAIN_TO(self->dma_tx_data)
+ | DMA_CTRL_TREQ_SEL(DREQ_FORCE)
+ | DMA_CTRL_IRQ_QUIET);
+ DMA_NONTRIGGER(self->dma_rx_ctrl, read_addr) = rx_data_blocks;
+ DMA_NONTRIGGER(self->dma_rx_ctrl, write_addr) = DMA_CHAN_ADDR(self->dma_rx_data, typeof(rx_data_blocks[0]));
+ DMA_NONTRIGGER(self->dma_rx_ctrl, trans_count) = DMA_CHAN_WR_TRANS_COUNT(typeof(rx_data_blocks[0]));
+ DMA_NONTRIGGER(self->dma_rx_ctrl, ctrl) = (DMA_CTRL_ENABLE
+ | DMA_CHAN_WR_CTRL(typeof(rx_data_blocks[0]))
+ | DMA_CTRL_INCR_READ
+ | DMA_CTRL_CHAIN_TO(self->dma_rx_data)
+ | DMA_CTRL_TREQ_SEL(DREQ_FORCE)
+ | DMA_CTRL_IRQ_QUIET);
+
+ /* Run. */
uint64_t now = LO_CALL(bootclock, get_time_ns);
if (now < self->dead_until_ns)
sleep_until_ns(self->dead_until_ns);
+ /* TODO: Use interrupts instead of busy-polling. */
gpio_put(self->pin_cs, 0);
- /* TODO: Replace blocking reads+writes with DMA. */
- for (int i = 0; i < iovcnt; i++) {
- if (iov[i].iov_write_src && iov[i].iov_read_dst)
- spi_write_read_blocking(inst, iov[i].iov_write_src, iov[i].iov_read_dst, iov[i].iov_len);
- else if (iov[i].iov_write_src)
- spi_write_blocking(inst, iov[i].iov_write_src, iov[i].iov_len);
- else if (iov[i].iov_read_dst)
- spi_read_blocking(inst, self->bogus_data, iov[i].iov_read_dst, iov[i].iov_len);
- else
- assert_notreached("duplex_iovec is neither read nor write");
- }
+ dma_hw->multi_channel_trigger = (1u<<self->dma_tx_ctrl) | (1u<<self->dma_rx_ctrl);
+ while (dma_channel_is_busy(self->dma_tx_ctrl)
+ || dma_channel_is_busy(self->dma_tx_data)
+ || dma_channel_is_busy(self->dma_rx_ctrl)
+ || dma_channel_is_busy(self->dma_rx_data))
+ tight_loop_contents();
+ __compiler_memory_barrier();
gpio_put(self->pin_cs, 1);
self->dead_until_ns = LO_CALL(bootclock, get_time_ns) + self->min_delay_ns;
}