/* libhw/rp2040_hwspi.c - implementation for the RP2040's ARM Primecell SSP (PL022) * * Copyright (C) 2024-2025 Luke T. Shumaker * SPDX-License-Identifier: AGPL-3.0-or-later */ #include #include /* for PRIu{n} */ #include /* for clock_get_hz() and clk_peri */ #include #include #include #include #define LOG_NAME RP2040_SPI #include #define IMPLEMENTATION_FOR_LIBHW_RP2040_HWSPI_H YES #include #include #include "rp2040_dma.h" #include "config.h" #ifndef CONFIG_RP2040_SPI_DEBUG #error config.h must define CONFIG_RP2040_SPI_DEBUG (bool) #endif LO_IMPLEMENTATION_C(io_duplex_readwriter, struct rp2040_hwspi, rp2040_hwspi, static) LO_IMPLEMENTATION_C(spi, struct rp2040_hwspi, rp2040_hwspi, static) static void rp2040_hwspi_intrhandler(void *_self, enum dmairq LM_UNUSED(irq), uint LM_UNUSED(channel)) { struct rp2040_hwspi *self = _self; gpio_put(self->pin_cs, 1); cr_sema_signal_from_intrhandler(&self->sema); } #define assert_4distinct(a, b, c, d) \ assert(a != b); \ assert(a != c); \ assert(a != d); \ assert(b != c); \ assert(b != d); \ assert(c != d); void _rp2040_hwspi_init(struct rp2040_hwspi *self, enum rp2040_hwspi_instance inst_num, enum spi_mode mode, uint baudrate_hz, uint64_t min_delay_ns, uint8_t bogus_data, uint pin_miso, uint pin_mosi, uint pin_clk, uint pin_cs, uint dma1, uint dma2, uint dma3, uint dma4) { /* Be not weary: This is but 12 lines of actual code; and many * lines of comments and assert()s. */ spi_inst_t *inst; uint actual_baudrate_hz; assert(self); assert(baudrate_hz); uint32_t clk_peri_hz = clock_get_hz(clk_peri); debugf("clk_peri = %"PRIu32"Hz", clk_peri_hz); assert(baudrate_hz*2 <= clk_peri_hz); assert_4distinct(pin_miso, pin_mosi, pin_clk, pin_cs); assert_4distinct(dma1, dma2, dma3, dma4); /* Regarding the constraints on pin assignments: see the * RP2040 datasheet, table 2, in §1.4.3 "GPIO Functions". */ switch (inst_num) { case RP2040_HWSPI_0: inst = spi0; assert(pin_miso == 0 || pin_miso == 4 || pin_miso == 16 || pin_miso == 20); /*assert(pin_cs == 1 || pin_cs == 5 || pin_cs == 17 || pin_cs == 21);*/ assert(pin_clk == 2 || pin_clk == 6 || pin_clk == 18 || pin_clk == 22); assert(pin_mosi == 3 || pin_mosi == 7 || pin_mosi == 19 || pin_mosi == 23); break; case RP2040_HWSPI_1: inst = spi1; assert(pin_miso == 8 || pin_miso == 12 || pin_miso == 24 || pin_miso == 28); /*assert(pin_cs == 9 || pin_cs == 13 || pin_cs == 25 || pin_cs == 29);*/ assert(pin_clk == 10 || pin_clk == 14 || pin_clk == 26); assert(pin_mosi == 11 || pin_mosi == 15 || pin_mosi == 27); break; default: assert_notreached("invalid hwspi instance number"); } actual_baudrate_hz = spi_init(inst, baudrate_hz); debugf("baudrate = %uHz", actual_baudrate_hz); assert(actual_baudrate_hz == baudrate_hz); spi_set_format(inst, 8, (mode & 0b10) ? SPI_CPOL_1 : SPI_CPOL_0, (mode & 0b01) ? SPI_CPHA_1 : SPI_CPHA_0, SPI_MSB_FIRST); /* Connect the pins to the PL022; set them each to "function * 1" (again, see the RP2040 datasheet, table 2, in §1.4.3 * "GPIO Functions"). * * ("GPIO_FUNC_SPI" is how the pico-sdk spells "function 1", * since on the RP2040 all of the "function 1" functions are * some part of SPI.) */ gpio_set_function(pin_clk, GPIO_FUNC_SPI); gpio_set_function(pin_mosi, GPIO_FUNC_SPI); gpio_set_function(pin_miso, GPIO_FUNC_SPI); /* Initialize the CS pin for software control. */ gpio_init(pin_cs); gpio_set_dir(pin_cs, GPIO_OUT); gpio_put(pin_cs, 1); /* Initialize self. */ self->inst = inst; self->min_delay_ns = min_delay_ns; self->bogus_data = bogus_data; self->pin_cs = pin_cs; self->dma_tx_ctrl = dma1; self->dma_rx_ctrl = dma2; self->dma_tx_data = dma3; self->dma_rx_data = dma4; self->dead_until_ns = 0; self->sema = (cr_sema_t){0}; /* Initialize the interrupt handler. */ dmairq_set_and_enable_exclusive_handler(DMAIRQ_0, self->dma_rx_data, rp2040_hwspi_intrhandler, self); } static void rp2040_hwspi_readwritev(struct rp2040_hwspi *self, const struct duplex_iovec *iov, int iovcnt) { assert(self); assert(self->inst); assert(iov); assert(iovcnt > 0); /* At this time, I have no intention to run SPI faster than * 80MHz (= 80Mb/s = 10MB/s). If we ran the CPU at just * 100MHz (we'll be running it faster than that, maybe even * 200MHz), that means we'd have 10 clock cycles to send each * byte. * * This affords us substantial simplifications, like being * able to afford 4-cycle changeovers between DMA blocks, and * not having to worry about alignment because we can just use * DMA_SIZE_8. */ uint8_t bogus_rx_dst; int pruned_iovcnt = 0; for (int i = 0; i < iovcnt; i++) if (iov[i].iov_len) pruned_iovcnt++; if (!pruned_iovcnt) return; /* For tx_data_blocks, it doesn't really matter which aliases * we choose: * - None of our fields can be NULL (so no * false-termination). * - Moving const fields first so they don't have to be * re-programmed each time isn't possible for us there need * to be at least 2 const fields, and we only have 1 * (read_addr for rx_data_blocks, and write_addr for * tx_data_blocks). * * But for rx_data_blocks, we need ctrl to be the trigger * register so that the DMA_CTRL_IRQ_QUIET flag isn't cleared * before we get to the trigger; and while for tx_data_blocks * it doesn't really matter, the inverse would be nice. */ struct dma_alias1 *tx_data_blocks = alloca(sizeof(struct dma_alias1)*(pruned_iovcnt+1)); struct dma_alias0 *rx_data_blocks = alloca(sizeof(struct dma_alias0)*(pruned_iovcnt+1)); for (int i = 0, j = 0; i < iovcnt; i++) { if (!iov[i].iov_len) continue; tx_data_blocks[j] = (typeof(tx_data_blocks[0])){ .read_addr = iov[i].iov_write_src ?: &self->bogus_data, .write_addr = &spi_get_hw(self->inst)->dr, .trans_count = iov[i].iov_len, .ctrl = (DMA_CTRL_ENABLE | DMA_CTRL_DATA_SIZE(DMA_SIZE_8) | (iov[i].iov_write_src ? DMA_CTRL_INCR_READ : 0) | DMA_CTRL_CHAIN_TO(self->dma_tx_ctrl) | DMA_CTRL_TREQ_SEL(SPI_DREQ_NUM(self->inst, true)) | DMA_CTRL_IRQ_QUIET), }; rx_data_blocks[j] = (typeof(rx_data_blocks[0])){ .read_addr = &spi_get_hw(self->inst)->dr, .write_addr = iov[i].iov_read_dst ?: &bogus_rx_dst, .trans_count = iov[i].iov_len, .ctrl = (DMA_CTRL_ENABLE | DMA_CTRL_DATA_SIZE(DMA_SIZE_8) | (iov[i].iov_read_dst ? DMA_CTRL_INCR_WRITE : 0) | DMA_CTRL_CHAIN_TO(self->dma_rx_ctrl) | DMA_CTRL_TREQ_SEL(SPI_DREQ_NUM(self->inst, false)) | DMA_CTRL_IRQ_QUIET), }; j++; } tx_data_blocks[pruned_iovcnt] = (typeof(tx_data_blocks[0])){0}; rx_data_blocks[pruned_iovcnt] = (typeof(rx_data_blocks[0])){0}; /* Set up ctrl. */ DMA_NONTRIGGER(self->dma_tx_ctrl, read_addr) = tx_data_blocks; DMA_NONTRIGGER(self->dma_tx_ctrl, write_addr) = DMA_CHAN_ADDR(self->dma_tx_data, typeof(tx_data_blocks[0])); DMA_NONTRIGGER(self->dma_tx_ctrl, trans_count) = DMA_CHAN_WR_TRANS_COUNT(typeof(tx_data_blocks[0])); DMA_NONTRIGGER(self->dma_tx_ctrl, ctrl) = (DMA_CTRL_ENABLE | DMA_CHAN_WR_CTRL(typeof(tx_data_blocks[0])) | DMA_CTRL_INCR_READ | DMA_CTRL_CHAIN_TO(self->dma_tx_data) | DMA_CTRL_TREQ_SEL(DREQ_FORCE) | DMA_CTRL_IRQ_QUIET); DMA_NONTRIGGER(self->dma_rx_ctrl, read_addr) = rx_data_blocks; DMA_NONTRIGGER(self->dma_rx_ctrl, write_addr) = DMA_CHAN_ADDR(self->dma_rx_data, typeof(rx_data_blocks[0])); DMA_NONTRIGGER(self->dma_rx_ctrl, trans_count) = DMA_CHAN_WR_TRANS_COUNT(typeof(rx_data_blocks[0])); DMA_NONTRIGGER(self->dma_rx_ctrl, ctrl) = (DMA_CTRL_ENABLE | DMA_CHAN_WR_CTRL(typeof(rx_data_blocks[0])) | DMA_CTRL_INCR_READ | DMA_CTRL_CHAIN_TO(self->dma_rx_data) | DMA_CTRL_TREQ_SEL(DREQ_FORCE) | DMA_CTRL_IRQ_QUIET); /* Run. */ uint64_t now = LO_CALL(bootclock, get_time_ns); if (now < self->dead_until_ns) sleep_until_ns(self->dead_until_ns); bool saved = cr_save_and_disable_interrupts(); gpio_put(self->pin_cs, 0); dma_hw->multi_channel_trigger = (1u<dma_tx_ctrl) | (1u<dma_rx_ctrl); cr_restore_interrupts(saved); cr_sema_wait(&self->sema); self->dead_until_ns = LO_CALL(bootclock, get_time_ns) + self->min_delay_ns; }