summaryrefslogtreecommitdiff
path: root/libhw/rp2040_hwspi.c
blob: 1c4e096284da676ed90b5705b407c1adea0575f3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
/* libhw/rp2040_hwspi.c - <libhw/generic/spi.h> implementation for the RP2040's ARM Primecell SSP (PL022)
 *
 * Copyright (C) 2024-2025  Luke T. Shumaker <lukeshu@lukeshu.com>
 * SPDX-License-Identifier: AGPL-3.0-or-later
 */

#include <alloca.h>
#include <inttypes.h> /* for PRIu{n} */

#include <hardware/clocks.h> /* for clock_get_hz() and clk_peri */
#include <hardware/gpio.h>
#include <hardware/spi.h>

#include <libcr/coroutine.h>
#include <libmisc/assert.h>

#define LOG_NAME RP2040_SPI
#include <libmisc/log.h>

#define IMPLEMENTATION_FOR_LIBHW_RP2040_HWSPI_H YES
#include <libhw/rp2040_hwspi.h>

#include <libhw/generic/alarmclock.h>

#include "rp2040_dma.h"

#include "config.h"

#ifndef CONFIG_RP2040_SPI_DEBUG
	#error config.h must define CONFIG_RP2040_SPI_DEBUG (bool)
#endif

LO_IMPLEMENTATION_C(io_duplex_readwriter, struct rp2040_hwspi, rp2040_hwspi, static)
LO_IMPLEMENTATION_C(spi, struct rp2040_hwspi, rp2040_hwspi, static)

#define assert_4distinct(a, b, c, d) \
	assert(a != b); \
	assert(a != c); \
	assert(a != d); \
	assert(b != c); \
	assert(b != d); \
	assert(c != d);

void _rp2040_hwspi_init(struct rp2040_hwspi *self,
                        enum rp2040_hwspi_instance inst_num,
                        enum spi_mode mode,
                        uint baudrate_hz,
                        uint64_t min_delay_ns,
                        uint8_t bogus_data,
                        uint pin_miso,
                        uint pin_mosi,
                        uint pin_clk,
                        uint pin_cs,
                        uint dma1,
                        uint dma2,
                        uint dma3,
                        uint dma4)
{
	/* Be not weary: This is but 12 lines of actual code; and many
	 * lines of comments and assert()s.  */
	spi_inst_t *inst;
	uint actual_baudrate_hz;

	assert(self);
	assert(baudrate_hz);
	uint32_t clk_peri_hz = clock_get_hz(clk_peri);
	debugf("clk_peri = %"PRIu32"Hz", clk_peri_hz);
	assert(baudrate_hz*2 <= clk_peri_hz);
	assert_4distinct(pin_miso, pin_mosi, pin_clk, pin_cs);
	assert_4distinct(dma1, dma2, dma3, dma4);

	/* Regarding the constraints on pin assignments: see the
	 * RP2040 datasheet, table 2, in §1.4.3 "GPIO Functions".  */
	switch (inst_num) {
	case RP2040_HWSPI_0:
		inst = spi0;
		assert(pin_miso ==  0 || pin_miso ==  4 || pin_miso == 16 || pin_miso == 20);
		/*assert(pin_cs   ==  1 || pin_cs   ==  5 || pin_cs   == 17 || pin_cs   == 21);*/
		assert(pin_clk  ==  2 || pin_clk  ==  6 || pin_clk  == 18 || pin_clk  == 22);
		assert(pin_mosi ==  3 || pin_mosi ==  7 || pin_mosi == 19 || pin_mosi == 23);
		break;
	case RP2040_HWSPI_1:
		inst = spi1;
		assert(pin_miso ==  8 || pin_miso == 12 || pin_miso == 24 || pin_miso == 28);
		/*assert(pin_cs   ==  9 || pin_cs   == 13 || pin_cs   == 25 || pin_cs   == 29);*/
		assert(pin_clk  == 10 || pin_clk  == 14 || pin_clk  == 26);
		assert(pin_mosi == 11 || pin_mosi == 15 || pin_mosi == 27);
		break;
	default:
		assert_notreached("invalid hwspi instance number");
	}

	actual_baudrate_hz = spi_init(inst, baudrate_hz);
	debugf("baudrate = %uHz", actual_baudrate_hz);
	assert(actual_baudrate_hz == baudrate_hz);
	spi_set_format(inst, 8,
	               (mode & 0b10) ? SPI_CPOL_1 : SPI_CPOL_0,
	               (mode & 0b01) ? SPI_CPHA_1 : SPI_CPHA_0,
	               SPI_MSB_FIRST);

	/* Connect the pins to the PL022; set them each to "function
	 * 1" (again, see the RP2040 datasheet, table 2, in §1.4.3
	 * "GPIO Functions").
	 *
	 * ("GPIO_FUNC_SPI" is how the pico-sdk spells "function 1",
	 * since on the RP2040 all of the "function 1" functions are
	 * some part of SPI.)  */
	gpio_set_function(pin_clk,  GPIO_FUNC_SPI);
	gpio_set_function(pin_mosi, GPIO_FUNC_SPI);
	gpio_set_function(pin_miso, GPIO_FUNC_SPI);

	/* Initialize the CS pin for software control.  */
	gpio_init(pin_cs);
	gpio_set_dir(pin_cs, GPIO_OUT);
	gpio_put(pin_cs, 1);

	/* Initialize self.  */
	self->inst = inst;
	self->min_delay_ns = min_delay_ns;
	self->bogus_data = bogus_data;
	self->pin_cs = pin_cs;
	self->dma_tx_ctrl = dma1;
	self->dma_rx_ctrl = dma2;
	self->dma_tx_data = dma3;
	self->dma_rx_data = dma4;
	self->dead_until_ns = 0;
}

static void rp2040_hwspi_readwritev(struct rp2040_hwspi *self, const struct duplex_iovec *iov, int iovcnt) {
	assert(self);
	assert(self->inst);
	assert(iov);
	assert(iovcnt > 0);

	/* At this time, I have no intention to run SPI faster than
	 * 80MHz (= 80Mb/s = 10MB/s).  If we ran the CPU at just
	 * 100MHz (we'll be running it faster than that, maybe even
	 * 200MHz), that means we'd have 10 clock cycles to send each
	 * byte.
	 *
	 * This affords us substantial simplifications, like being
	 * able to afford 4-cycle changeovers between DMA blocks, and
	 * not having to worry about alignment because we can just use
	 * DMA_SIZE_8.
	 */

	uint8_t bogus_rx_dst;

	int pruned_iovcnt = 0;
	for (int i = 0; i < iovcnt; i++)
		if (iov[i].iov_len)
			pruned_iovcnt++;
	if (!pruned_iovcnt)
		return;

	/* For tx_data_blocks, it doesn't really matter which aliases
	 * we choose:
	 *  - None of our fields can be NULL (so no
	 *    false-termination).
	 *  - Moving const fields first so they don't have to be
	 *    re-programmed each time isn't possible for us there need
	 *    to be at least 2 const fields, and we only have 1
	 *    (read_addr for rx_data_blocks, and write_addr for
	 *    tx_data_blocks).
	 *
	 * But for rx_data_blocks, we need ctrl to be the trigger
	 * register so that the DMA_CTRL_IRQ_QUIET flag isn't cleared
	 * before we get to the trigger; and while for tx_data_blocks
	 * it doesn't really matter, the inverse would be nice.
	 */
	struct dma_alias1 *tx_data_blocks = alloca(sizeof(struct dma_alias1)*(pruned_iovcnt+1));
	struct dma_alias0 *rx_data_blocks = alloca(sizeof(struct dma_alias0)*(pruned_iovcnt+1));

	for (int i = 0, j = 0; i < iovcnt; i++) {
		if (!iov[i].iov_len)
			continue;
		tx_data_blocks[j]    = (typeof(tx_data_blocks[0])){
			.read_addr   = iov[i].iov_write_src ?: &self->bogus_data,
			.write_addr  = &spi_get_hw(self->inst)->dr,
			.trans_count = iov[i].iov_len,
			.ctrl        = (DMA_CTRL_ENABLE
			                | DMA_CTRL_DATA_SIZE(DMA_SIZE_8)
			                | (iov[i].iov_write_src ? DMA_CTRL_INCR_READ : 0)
			                | DMA_CTRL_CHAIN_TO(self->dma_tx_ctrl)
			                | DMA_CTRL_TREQ_SEL(SPI_DREQ_NUM(self->inst, true))
			                | DMA_CTRL_IRQ_QUIET),
		};
		rx_data_blocks[j]    = (typeof(rx_data_blocks[0])){
			.read_addr   = &spi_get_hw(self->inst)->dr,
			.write_addr  = iov[i].iov_read_dst ?: &bogus_rx_dst,
			.trans_count = iov[i].iov_len,
			.ctrl        = (DMA_CTRL_ENABLE
			                | DMA_CTRL_DATA_SIZE(DMA_SIZE_8)
			                | (iov[i].iov_read_dst ? DMA_CTRL_INCR_WRITE : 0)
			                | DMA_CTRL_CHAIN_TO(self->dma_rx_ctrl)
			                | DMA_CTRL_TREQ_SEL(SPI_DREQ_NUM(self->inst, false))
			                | DMA_CTRL_IRQ_QUIET),
		};
		j++;
	}
	tx_data_blocks[pruned_iovcnt] = (typeof(tx_data_blocks[0])){0};
	rx_data_blocks[pruned_iovcnt] = (typeof(rx_data_blocks[0])){0};

	/* Set up ctrl.  */
	DMA_NONTRIGGER(self->dma_tx_ctrl, read_addr) = tx_data_blocks;
	DMA_NONTRIGGER(self->dma_tx_ctrl, write_addr) = DMA_CHAN_ADDR(self->dma_tx_data, typeof(tx_data_blocks[0]));
	DMA_NONTRIGGER(self->dma_tx_ctrl, trans_count) = DMA_CHAN_WR_TRANS_COUNT(typeof(tx_data_blocks[0]));
	DMA_NONTRIGGER(self->dma_tx_ctrl, ctrl) = (DMA_CTRL_ENABLE
	                                           | DMA_CHAN_WR_CTRL(typeof(tx_data_blocks[0]))
	                                           | DMA_CTRL_INCR_READ
	                                           | DMA_CTRL_CHAIN_TO(self->dma_tx_data)
	                                           | DMA_CTRL_TREQ_SEL(DREQ_FORCE)
	                                           | DMA_CTRL_IRQ_QUIET);
	DMA_NONTRIGGER(self->dma_rx_ctrl, read_addr) = rx_data_blocks;
	DMA_NONTRIGGER(self->dma_rx_ctrl, write_addr) = DMA_CHAN_ADDR(self->dma_rx_data, typeof(rx_data_blocks[0]));
	DMA_NONTRIGGER(self->dma_rx_ctrl, trans_count) = DMA_CHAN_WR_TRANS_COUNT(typeof(rx_data_blocks[0]));
	DMA_NONTRIGGER(self->dma_rx_ctrl, ctrl) = (DMA_CTRL_ENABLE
	                                           | DMA_CHAN_WR_CTRL(typeof(rx_data_blocks[0]))
	                                           | DMA_CTRL_INCR_READ
	                                           | DMA_CTRL_CHAIN_TO(self->dma_rx_data)
	                                           | DMA_CTRL_TREQ_SEL(DREQ_FORCE)
	                                           | DMA_CTRL_IRQ_QUIET);

	/* Run.  */
	uint64_t now = LO_CALL(bootclock, get_time_ns);
	if (now < self->dead_until_ns)
		sleep_until_ns(self->dead_until_ns);
	/* TODO: Use interrupts instead of busy-polling.  */
	gpio_put(self->pin_cs, 0);
	dma_hw->multi_channel_trigger = (1u<<self->dma_tx_ctrl) | (1u<<self->dma_rx_ctrl);
	while (dma_channel_is_busy(self->dma_tx_ctrl)
	       || dma_channel_is_busy(self->dma_tx_data)
	       || dma_channel_is_busy(self->dma_rx_ctrl)
	       || dma_channel_is_busy(self->dma_rx_data))
		tight_loop_contents();
	__compiler_memory_barrier();
	gpio_put(self->pin_cs, 1);
	self->dead_until_ns = LO_CALL(bootclock, get_time_ns) + self->min_delay_ns;
}