summaryrefslogtreecommitdiff
path: root/libhw_cr/rp2040_hwspi.c
blob: 646d8ba01ba3c6084ab8446e0925b7c7380ae0a5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
/* libhw_cr/rp2040_hwspi.c - <libhw/generic/spi.h> implementation for the RP2040's ARM Primecell SSP (PL022)
 *
 * Copyright (C) 2024-2025  Luke T. Shumaker <lukeshu@lukeshu.com>
 * SPDX-License-Identifier: AGPL-3.0-or-later
 */

#include <alloca.h>
#include <inttypes.h> /* for PRIu{n} */

#include <hardware/clocks.h> /* for clock_get_hz() and clk_peri */
#include <hardware/gpio.h>
#include <hardware/spi.h>

#include <libcr/coroutine.h>
#include <libmisc/assert.h>

#define LOG_NAME RP2040_SPI
#include <libmisc/log.h>

#define IMPLEMENTATION_FOR_LIBHW_RP2040_HWSPI_H YES
#include <libhw/rp2040_hwspi.h>

#include <libhw/generic/alarmclock.h>

#include "rp2040_dma.h"

#include "config.h"

#ifndef CONFIG_RP2040_SPI_DEBUG
	#error config.h must define CONFIG_RP2040_SPI_DEBUG (bool)
#endif

LO_IMPLEMENTATION_C(io_duplex_readwriter, struct rp2040_hwspi, rp2040_hwspi, static);
LO_IMPLEMENTATION_C(spi, struct rp2040_hwspi, rp2040_hwspi, static);

static void rp2040_hwspi_intrhandler(void *_self, enum dmairq LM_UNUSED(irq), uint LM_UNUSED(channel)) {
	struct rp2040_hwspi *self = _self;
	gpio_put(self->pin_cs, 1);
	cr_sema_signal_from_intrhandler(&self->sema);
}

#define assert_4distinct(a, b, c, d) \
	assert(a != b); \
	assert(a != c); \
	assert(a != d); \
	assert(b != c); \
	assert(b != d); \
	assert(c != d);

void _rp2040_hwspi_init(struct rp2040_hwspi *self,
                        enum rp2040_hwspi_instance inst_num,
                        enum spi_mode mode,
                        uint baudrate_hz,
                        uint64_t min_delay_ns,
                        uint8_t bogus_data,
                        uint pin_miso,
                        uint pin_mosi,
                        uint pin_clk,
                        uint pin_cs,
                        uint dma1,
                        uint dma2,
                        uint dma3,
                        uint dma4)
{
	/* Be not weary: This is but 12 lines of actual code; and many
	 * lines of comments and assert()s.  */
	spi_inst_t *inst;
	uint actual_baudrate_hz;

	assert(self);
	assert(baudrate_hz);
	uint32_t clk_peri_hz = clock_get_hz(clk_peri);
	debugf("clk_peri = %"PRIu32"Hz", clk_peri_hz);
	assert(baudrate_hz*2 <= clk_peri_hz);
	assert_4distinct(pin_miso, pin_mosi, pin_clk, pin_cs);
	assert_4distinct(dma1, dma2, dma3, dma4);

	/* Regarding the constraints on pin assignments: see the
	 * RP2040 datasheet, table 2, in §1.4.3 "GPIO Functions".  */
	switch (inst_num) {
	case RP2040_HWSPI_0:
		inst = spi0;
		assert(pin_miso ==  0 || pin_miso ==  4 || pin_miso == 16 || pin_miso == 20);
		/*assert(pin_cs   ==  1 || pin_cs   ==  5 || pin_cs   == 17 || pin_cs   == 21);*/
		assert(pin_clk  ==  2 || pin_clk  ==  6 || pin_clk  == 18 || pin_clk  == 22);
		assert(pin_mosi ==  3 || pin_mosi ==  7 || pin_mosi == 19 || pin_mosi == 23);
		break;
	case RP2040_HWSPI_1:
		inst = spi1;
		assert(pin_miso ==  8 || pin_miso == 12 || pin_miso == 24 || pin_miso == 28);
		/*assert(pin_cs   ==  9 || pin_cs   == 13 || pin_cs   == 25 || pin_cs   == 29);*/
		assert(pin_clk  == 10 || pin_clk  == 14 || pin_clk  == 26);
		assert(pin_mosi == 11 || pin_mosi == 15 || pin_mosi == 27);
		break;
	default:
		assert_notreached("invalid hwspi instance number");
	}

	actual_baudrate_hz = spi_init(inst, baudrate_hz);
	debugf("baudrate = %uHz", actual_baudrate_hz);
	assert(actual_baudrate_hz == baudrate_hz);
	spi_set_format(inst, 8,
	               (mode & 0b10) ? SPI_CPOL_1 : SPI_CPOL_0,
	               (mode & 0b01) ? SPI_CPHA_1 : SPI_CPHA_0,
	               SPI_MSB_FIRST);

	/* Connect the pins to the PL022; set them each to "function
	 * 1" (again, see the RP2040 datasheet, table 2, in §1.4.3
	 * "GPIO Functions").
	 *
	 * ("GPIO_FUNC_SPI" is how the pico-sdk spells "function 1",
	 * since on the RP2040 all of the "function 1" functions are
	 * some part of SPI.)  */
	gpio_set_function(pin_clk,  GPIO_FUNC_SPI);
	gpio_set_function(pin_mosi, GPIO_FUNC_SPI);
	gpio_set_function(pin_miso, GPIO_FUNC_SPI);

	/* Initialize the CS pin for software control.  */
	gpio_init(pin_cs);
	gpio_set_dir(pin_cs, GPIO_OUT);
	gpio_put(pin_cs, 1);

	/* Initialize self.  */
	self->inst = inst;
	self->min_delay_ns = min_delay_ns;
	self->bogus_data = bogus_data;
	self->pin_cs = pin_cs;
	self->dma_tx_ctrl = dma1;
	self->dma_rx_ctrl = dma2;
	self->dma_tx_data = dma3;
	self->dma_rx_data = dma4;
	self->dead_until_ns = 0;
	self->sema = (cr_sema_t){0};

	/* Initialize the interrupt handler.  */
	/* We do this on (just) the rx channel, because the way the
	 * SSP works reads necessarily complete *after* writes.  */
	dmairq_set_and_enable_exclusive_handler(DMAIRQ_0, self->dma_rx_data, rp2040_hwspi_intrhandler, self);
}

static void rp2040_hwspi_readwritev(struct rp2040_hwspi *self, const struct duplex_iovec *iov, int iovcnt) {
	assert(self);
	assert(self->inst);
	assert(iov);
	assert(iovcnt > 0);

	/* At this time, I have no intention to run SPI faster than
	 * 80MHz (= 80Mb/s = 10MB/s).  If we ran the CPU at just
	 * 100MHz (we'll be running it faster than that, maybe even
	 * 200MHz), that means we'd have 10 clock cycles to send each
	 * byte.
	 *
	 * This affords us substantial simplifications, like being
	 * able to afford 4-cycle changeovers between DMA blocks, and
	 * not having to worry about alignment because we can just use
	 * DMA_SIZE_8.
	 */

	uint8_t bogus_rx_dst;

	int pruned_iovcnt = 0;
	for (int i = 0; i < iovcnt; i++)
		if (iov[i].iov_len)
			pruned_iovcnt++;
	if (!pruned_iovcnt)
		return;

	/* It doesn't *really* matter which aliases we choose:
	 *
	 *  - None of our fields can be NULL (so no
	 *    false-termination).
	 *
	 *  - Moving const fields first so they don't have to be
	 *    re-programmed each time isn't possible for us; there
	 *    need to be at least 2 const fields, and we only have 1
	 *    (read_addr for rx_data_blocks, and write_addr for
	 *    tx_data_blocks).
	 *
	 * The code following this initial declaration is generic to
	 * the alias, so changing which alias is used is easy.
	 *
	 * Since we have no hard requirements, here are some mild
	 * preferences:
	 *
	 *  - I like the aliases being different for each channel,
	 *    because it helps prevent alias-specific code from
	 *    sneaking in.
	 *
	 *  - I like the rx channel (the channel the interrupt handler
	 *    is wired to) having ctrl be the trigger, so that we
	 *    don't have to worry about DMA_CTRL_IRQ_QUIET being
	 *    cleared before the trigger, and at the end the control
	 *    block is clean and zeroed-out.
	 *
	 *  - Conversely, I like the tx channel (the non-interrupt
	 *    channel) having ctrl *not* be the trigger, so that
	 *    DMA_CTRL_IRQ_QUIET is cleared by the time the trigger
	 *    happens, so the IRQ machinery doesn't need to be engaged
	 *    at all.
	 */
	struct dma_alias1 *tx_data_blocks = alloca(sizeof(struct dma_alias1)*(pruned_iovcnt+1));
	struct dma_alias0 *rx_data_blocks = alloca(sizeof(struct dma_alias0)*(pruned_iovcnt+1));
	static_assert(!DMA_IS_TRIGGER(typeof(tx_data_blocks[0]), ctrl));
	static_assert(DMA_IS_TRIGGER(typeof(rx_data_blocks[0]), ctrl));

	for (int i = 0, j = 0; i < iovcnt; i++) {
		if (!iov[i].iov_len)
			continue;
		tx_data_blocks[j]    = (typeof(tx_data_blocks[0])){
			.read_addr   = (iov[i].iov_write_from != IOVEC_DISCARD) ? iov[i].iov_write_from : &self->bogus_data,
			.write_addr  = &spi_get_hw(self->inst)->dr,
			.trans_count = iov[i].iov_len,
			.ctrl        = (DMA_CTRL_ENABLE
			                | DMA_CTRL_DATA_SIZE(DMA_SIZE_8)
			                | ((iov[i].iov_write_from != IOVEC_DISCARD) ? DMA_CTRL_INCR_READ : 0)
			                | DMA_CTRL_CHAIN_TO(self->dma_tx_ctrl)
			                | DMA_CTRL_TREQ_SEL(SPI_DREQ_NUM(self->inst, true))
			                | DMA_CTRL_IRQ_QUIET),
		};
		rx_data_blocks[j]    = (typeof(rx_data_blocks[0])){
			.read_addr   = &spi_get_hw(self->inst)->dr,
			.write_addr  = (iov[i].iov_read_to != IOVEC_DISCARD) ? iov[i].iov_read_to : &bogus_rx_dst,
			.trans_count = iov[i].iov_len,
			.ctrl        = (DMA_CTRL_ENABLE
			                | DMA_CTRL_DATA_SIZE(DMA_SIZE_8)
			                | ((iov[i].iov_read_to != IOVEC_DISCARD) ? DMA_CTRL_INCR_WRITE : 0)
			                | DMA_CTRL_CHAIN_TO(self->dma_rx_ctrl)
			                | DMA_CTRL_TREQ_SEL(SPI_DREQ_NUM(self->inst, false))
			                | DMA_CTRL_IRQ_QUIET),
		};
		j++;
	}
	tx_data_blocks[pruned_iovcnt] = (typeof(tx_data_blocks[0])){0};
	rx_data_blocks[pruned_iovcnt] = (typeof(rx_data_blocks[0])){0};
	/* If ctrl isn't the trigger then we need to make sure that
	 * DMA_CTRL_IRQ_QUIET isn't cleared before the trigger
	 * happens.  */
	if (!DMA_IS_TRIGGER(typeof(rx_data_blocks[0]), ctrl))
		rx_data_blocks[pruned_iovcnt].ctrl = DMA_CTRL_IRQ_QUIET;

	/* Set up ctrl.  */
	DMA_NONTRIGGER(self->dma_tx_ctrl, read_addr) = tx_data_blocks;
	DMA_NONTRIGGER(self->dma_tx_ctrl, write_addr) = DMA_CHAN_ADDR(self->dma_tx_data, typeof(tx_data_blocks[0]));
	DMA_NONTRIGGER(self->dma_tx_ctrl, trans_count) = DMA_CHAN_WR_TRANS_COUNT(typeof(tx_data_blocks[0]));
	DMA_NONTRIGGER(self->dma_tx_ctrl, ctrl) = (DMA_CTRL_ENABLE
	                                           | DMA_CHAN_WR_CTRL(typeof(tx_data_blocks[0]))
	                                           | DMA_CTRL_INCR_READ
	                                           | DMA_CTRL_CHAIN_TO(self->dma_tx_data)
	                                           | DMA_CTRL_TREQ_SEL(DREQ_FORCE)
	                                           | DMA_CTRL_IRQ_QUIET);
	DMA_NONTRIGGER(self->dma_rx_ctrl, read_addr) = rx_data_blocks;
	DMA_NONTRIGGER(self->dma_rx_ctrl, write_addr) = DMA_CHAN_ADDR(self->dma_rx_data, typeof(rx_data_blocks[0]));
	DMA_NONTRIGGER(self->dma_rx_ctrl, trans_count) = DMA_CHAN_WR_TRANS_COUNT(typeof(rx_data_blocks[0]));
	DMA_NONTRIGGER(self->dma_rx_ctrl, ctrl) = (DMA_CTRL_ENABLE
	                                           | DMA_CHAN_WR_CTRL(typeof(rx_data_blocks[0]))
	                                           | DMA_CTRL_INCR_READ
	                                           | DMA_CTRL_CHAIN_TO(self->dma_rx_data)
	                                           | DMA_CTRL_TREQ_SEL(DREQ_FORCE)
	                                           | DMA_CTRL_IRQ_QUIET);

	/* Run.  */
	uint64_t now = LO_CALL(bootclock, get_time_ns);
	if (now < self->dead_until_ns)
		sleep_until_ns(self->dead_until_ns);
	bool saved = cr_save_and_disable_interrupts();
	gpio_put(self->pin_cs, 0);
	dma_hw->multi_channel_trigger = (1u<<self->dma_tx_ctrl) | (1u<<self->dma_rx_ctrl);
	cr_restore_interrupts(saved);
	cr_sema_wait(&self->sema);
	self->dead_until_ns = LO_CALL(bootclock, get_time_ns) + self->min_delay_ns;
}