summaryrefslogtreecommitdiff
path: root/libcr/coroutine.c
diff options
context:
space:
mode:
Diffstat (limited to 'libcr/coroutine.c')
-rw-r--r--libcr/coroutine.c398
1 files changed, 398 insertions, 0 deletions
diff --git a/libcr/coroutine.c b/libcr/coroutine.c
new file mode 100644
index 0000000..12d4a25
--- /dev/null
+++ b/libcr/coroutine.c
@@ -0,0 +1,398 @@
+/* coroutine.c - Simple embeddable coroutine implementation
+ *
+ * Copyright (C) 2024 Luke T. Shumaker <lukeshu@lukeshu.com>
+ * SPDX-Licence-Identifier: AGPL-3.0-or-later
+ */
+
+#include <stdint.h> /* for uint8_t */
+#include <stdio.h> /* for printf(), fprintf(), stderr */
+#include <stdlib.h> /* for malloc(), free() */
+#include <assert.h>
+#include <setjmp.h>
+
+#include "coroutine.h"
+
+/* Configuration **************************************************************/
+
+#define USE_CONFIG_COROUTINE
+#include "config.h"
+
+#ifndef CONFIG_COROUTINE_DEFAULT_STACK_SIZE
+# error config.h must define CONFIG_COROUTINE_DEFAULT_STACK_SIZE
+#endif
+#ifndef CONFIG_COROUTINE_NUM
+# error config.h must define CONFIG_COROUTINE_NUM
+#endif
+#ifndef CONFIG_COROUTINE_MEASURE_STACK
+# error config.h must define CONFIG_COROUTINE_MEASURE_STACK
+#endif
+#ifndef CONFIG_COROUTINE_PROTECT_STACK
+# error config.h must define CONFIG_COROUTINE_PROTECT_STACK
+#endif
+#ifndef CONFIG_COROUTINE_DEBUG
+# error config.h must define CONFIG_COROUTINE_DEBUG
+#endif
+
+/* Implementation *************************************************************/
+
+/*
+ * Portability notes:
+ *
+ * - It uses GCC `__attribute__`s, though these can likely be easily
+ * swapped for other compilers.
+ *
+ * - It has a small bit of CPU-specific code (assembly, and definition
+ * of a STACK_GROWS_DOWNWARD={0,1} macro) in
+ * coroutine.c:call_with_stack(). Other than this, it should be
+ * portable to other CPUs. It currently contains implementations
+ * for __x86_64__ and __arm__, and should be fairly easy to add
+ * implementations for other CPUs.
+ *
+ * - It uses setjmp()/longjmp() in "unsafe" ways. POSIX-2017
+ * longjmp(3) says
+ *
+ * > If the most recent invocation of setjmp() with the
+ * > corresponding jmp_buf ... or if the function containing the
+ * > invocation of setjmp() has terminated execution in the interim,
+ * > or if the invocation of setjmp() was within the scope of an
+ * > identifier with variably modified type and execution has left
+ * > that scope in the interim, the behavior is undefined.
+ *
+ * We use longjmp() both of these scenarios, but make it OK by using
+ * call_with_stack() to manage the stack ourselves, assuming the
+ * sole reason that longjmp() behavior is undefined in such cases is
+ * because the stack that its saved stack-pointer points to is no
+ * longer around. It seems absurd that an implementation would
+ * choose to do something else, but I'm calling it out here because
+ * you never know.
+ *
+ * Note that setjmp()/longjmp() are defined in 3 places: in the libc
+ * (glibc/newlib), as GCC intrinsics, and the lower-level GCC
+ * __builtin_{setjmp,newlib} which the libc and intrinsic versions
+ * likely use. Our assumptions seem to be valid for
+ * x86_64-pc-linux-gnu/gcc-14.2.1/glibc-2.40 and
+ * arm-none-eabi/gcc-14.1.0/newlib-4.4.0.
+ *
+ * Why not use <ucontext.h>, the now-deprecated (was in POSIX.1-2001,
+ * is gone in POSIX-2008) predecesor to <setjmp.h>? It would let us
+ * do this without any assembly or unsafe assumptions. Simply:
+ * because newlib does not provide it.
+ */
+
+/*
+ * Design decisions and notes:
+ *
+ * - Coroutines are launched with a stack stack that is filled with
+ * known values (zero or a pre-determined pattern, depending on the
+ * config above). Because all stack variables should be initialized
+ * before they are read, this "shouldn't" make a difference, but:
+ * (1) Initializing it to a known value allows us to measure how
+ * much of the stack was written to, which is helpful to tune stack
+ * sizes. (2) Leaving it uninitialized just gives me the willies.
+ *
+ * - Because embedded programs should be adverse to using the heap,
+ * CONFIG_COROUTINE_NUM is fixed, instead of having coroutine_add()
+ * dynamically grow the coroutine_table as-needed.
+ *
+ * - On the flip-side, coroutine stacks are allocated on the heap
+ * instead of having them be statically-allocated along with
+ * coroutine_table. (1) This reduced the blast-area of damage for a
+ * stack-overflow; and indeed if the end of the stack alignes with a
+ * page-boundary then memory-protection can even detect the overflow
+ * for us. (2) Having different-looking addresses for stack-area vs
+ * static-area is handy for making things jump out at you when
+ * debugging. (3) This can likely also improve things with being
+ * page-aligned.
+ *
+ * - Coroutines must use cr_exit() instead of returning because if
+ * they return then they will return to call_with_stack() in
+ * coroutine_add() (not to after the longjmp() call in
+ * coroutine_main()), and besides being
+ * wrong-for-our-desired-flow-control, that's a stack location that
+ * no longer exists.
+ *
+ * Things to consider changing:
+ *
+ * - Consider having _cr_transition() go ahead and find the next
+ * coroutine to run and longjmp() direcly to it, instead of first
+ * jumping back to coroutine_main(). This could save a few cycles
+ * and a few bytes.
+ */
+
+#if CONFIG_COROUTINE_DEBUG
+# define debugf(...) printf("dbg: " __VA_ARGS__)
+#else
+# define debugf(...)
+#endif
+
+static jmp_buf coroutine_add_env;
+static jmp_buf coroutine_main_env;
+
+enum coroutine_state {
+ CR_NONE = 0, /* this slot in the table is empty */
+ CR_INITIALIZING, /* running, before cr_begin() */
+ CR_RUNNING, /* running, after cr_begin() */
+ CR_RUNNABLE, /* not running, but runnable */
+ CR_PAUSED, /* not running, and not runnable */
+};
+
+/*
+ * Invariants (and non-invariants):
+ *
+ * - exactly 0 or 1 coroutines have state CR_INITIALIZING
+ * - exactly 0 or 1 coroutines have state CR_RUNNING
+ * - if coroutine_running is not zero, then
+ * coroutine_table[coroutine_running-1] is the currently-running
+ * coroutine
+ * - the coroutine_running coroutine either has state CR_RUNNING or
+ * CR_INITIALIZNG
+ * - a coroutine having state CR_RUNNING does *not* imply that
+ * coroutine_running points at that coroutine; if that coroutine is
+ * in the middle of coroutine_add(), it coroutine_running points at
+ * the CR_INITIALIZING child coroutine, while leaving the parent
+ * coroutine as CR_RUNNING.
+ */
+
+struct coroutine {
+ volatile enum coroutine_state state;
+ volatile bool sig_unpause;
+ jmp_buf env;
+ size_t stack_size;
+ void *stack;
+};
+
+static struct coroutine coroutine_table[CONFIG_COROUTINE_NUM] = {0};
+static cid_t coroutine_running = 0;
+
+static void call_with_stack(void *stack, cr_fn_t fn, void *args) {
+ static void *saved_sp = NULL;
+
+ /* As part of sbc-harness, this only really needs to support
+ * ARM-32, but being able to run it on x86-64 is useful for
+ * debugging. */
+#if __x86_64__
+#define STACK_GROWS_DOWNWARD 1
+ asm volatile ("movq %%rsp , %0\n\t" /* saved_sp = sp */
+ "movq %1 , %%rsp\n\t" /* sp = stack */
+ "movq %3 , %%rdi\n\t" /* arg0 = args */
+ "call *%2\n\t" /* fn() */
+ "movq %0 , %%rsp" /* sp = saved_sp */
+ :
+ : /* %0 */"m"(saved_sp),
+ /* %1 */"r"(stack),
+ /* %2 */"r"(fn),
+ /* %3 */"r"(args)
+ : "rdi"
+ );
+#elif __arm__
+#define STACK_GROWS_DOWNWARD 1
+ /* str/ldr can only work with a "lo" register, which sp is
+ * not, so we use r0 as an intermediate because we're going to
+ * clobber it with args anyway. */
+ asm volatile ("mov r0, sp\n\t" /* [saved_sp = sp */
+ "str r0, %0\n\t" /* ] */
+ "mov sp, %1\n\t" /* [sp = stack] */
+ "mov r0, %3\n\t" /* [arg0 = args] */
+ "blx %2\n\t" /* [fn()] */
+ "ldr r0, %0\n\t" /* [sp = staved_sp */
+ "mov sp, r0" /* ] */
+ :
+ : /* %0 */"m"(saved_sp),
+ /* %1 */"r"(stack),
+ /* %2 */"r"(fn),
+ /* %3 */"r"(args)
+ : "r0"
+ );
+#else
+# error unsupported architecture
+#endif
+}
+
+#if CONFIG_COROUTINE_MEASURE_STACK || CONFIG_COROUTINE_PROTECT_STACK
+/* We just need a pattern that is unlikely to occur naturaly; this is
+ * just a few bytes that I read from /dev/random. */
+static const uint8_t stack_pattern[] = {0x1e, 0x15, 0x16, 0x0a, 0xcc, 0x52, 0x7e, 0xb7};
+#endif
+
+static void inline assert_cid(cid_t cid) {
+ assert(cid > 0);
+ assert(cid <= CONFIG_COROUTINE_NUM);
+#if CONFIG_COROUTINE_PROTECT_STACK
+ assert(coroutine_table[cid-1].stack_size);
+ assert(coroutine_table[cid-1].stack);
+ for (size_t i = 0; i < sizeof(stack_pattern); i++) {
+ size_t j = coroutine_table[cid-1].stack_size - (i+1);
+ assert(((uint8_t*)coroutine_table[cid-1].stack)[i] == stack_pattern[i]);
+ assert(((uint8_t*)coroutine_table[cid-1].stack)[j] == stack_pattern[j%sizeof(stack_pattern)]);
+ }
+#endif
+}
+
+#define assert_cid_state(cid, opstate) do { \
+ assert_cid(cid); \
+ assert(coroutine_table[(cid)-1].state opstate); \
+ } while (0)
+
+cid_t coroutine_add_with_stack_size(size_t stack_size, cr_fn_t fn, void *args) {
+ static cid_t last_created = 0;
+ cid_t parent = coroutine_running;
+
+ if (!stack_size)
+ stack_size = CONFIG_COROUTINE_DEFAULT_STACK_SIZE;
+
+ if (parent)
+ assert_cid_state(parent, == CR_RUNNING);
+ assert(stack_size);
+ assert(fn);
+ debugf("coroutine_add_with_stack_size(%zu, %#p, %#p)...\n", stack_size, fn, args);
+
+ cid_t child;
+ {
+ size_t idx_base = last_created;
+ for (size_t idx_shift = 0; idx_shift < CONFIG_COROUTINE_NUM; idx_shift++) {
+ child = ((idx_base + idx_shift) % CONFIG_COROUTINE_NUM) + 1;
+ if (coroutine_table[child-1].state == CR_NONE)
+ goto found;
+ }
+ return 0;
+ found:
+ }
+ debugf("...child=%zu\n", child);
+
+ last_created = child;
+
+ coroutine_table[child-1].stack_size = stack_size;
+ coroutine_table[child-1].stack = malloc(stack_size);
+#if CONFIG_COROUTINE_MEASURE_STACK || CONFIG_COROUTINE_PROTECT_STACK
+ for (size_t i = 0; i < stack_size; i++)
+ ((uint8_t*)coroutine_table[child-1].stack)[i] = stack_pattern[i%sizeof(stack_pattern)];
+#endif
+
+ coroutine_running = child;
+ coroutine_table[child-1].state = CR_INITIALIZING;
+ if (!setjmp(coroutine_add_env)) { /* point=a */
+ void *stack_base = coroutine_table[child-1].stack + (STACK_GROWS_DOWNWARD ? stack_size : 0);
+#if CONFIG_COROUTINE_PROTECT_STACK
+# if STACK_GROWS_DOWNWARD
+ stack_base -= sizeof(stack_pattern);
+# else
+ stack_base += sizeof(stack_pattern);
+# endif
+#endif
+ debugf("...stack =%#p\n", coroutine_table[child-1].stack);
+ debugf("...stack_base=%#p\n", stack_base);
+ /* run until cr_begin() */
+ call_with_stack(stack_base, fn, args);
+ assert(false); /* should cr_begin() instead of returning */
+ }
+ assert_cid_state(child, == CR_RUNNABLE);
+ if (parent)
+ assert_cid_state(parent, == CR_RUNNING);
+ coroutine_running = parent;
+
+ return child;
+}
+
+void coroutine_main(void) {
+ debugf("coroutine_main()\n");
+ bool ran;
+ for (coroutine_running = 1;; coroutine_running = (coroutine_running%CONFIG_COROUTINE_NUM)+1) {
+ if (coroutine_running == 1)
+ ran = false;
+ struct coroutine *cr = &coroutine_table[coroutine_running-1];
+ if (cr->state == CR_RUNNABLE) {
+ debugf("running cid=%zu...\n", coroutine_running);
+ ran = true;
+ cr->state = CR_RUNNING;
+ if (!setjmp(coroutine_main_env)) { /* point=b */
+ longjmp(cr->env, 1); /* jump to point=c */
+ assert(false); /* should cr_exit() instead of returning */
+ }
+ assert_cid_state(coroutine_running, != CR_RUNNING);
+ if (cr->state == CR_NONE) {
+#if CONFIG_COROUTINE_MEASURE_STACK
+ size_t stack_size = cr->stack_size - (CONFIG_COROUTINE_PROTECT_STACK ? 2*sizeof(stack_pattern) : 0);
+ size_t stack_used = stack_size;
+ for (;;) {
+ size_t i = STACK_GROWS_DOWNWARD
+ ? (CONFIG_COROUTINE_PROTECT_STACK ? sizeof(stack_pattern) : 0) + stack_size - stack_used
+ : stack_used - 1 - (CONFIG_COROUTINE_PROTECT_STACK ? sizeof(stack_pattern) : 0);
+ if (stack_used == 0 || ((uint8_t*)cr->stack)[i] != stack_pattern[i%sizeof(stack_pattern)])
+ break;
+ stack_used--;
+ }
+ printf("info: cid=%zu: exited having used %zu B stack space\n", coroutine_running, stack_used);
+#endif
+ free(cr->stack);
+ coroutine_table[coroutine_running-1] = (struct coroutine){0};
+ }
+ }
+ if (coroutine_running == CONFIG_COROUTINE_NUM && !ran) {
+ fprintf(stderr, "error: no runnable coroutines\n");
+ return;
+ }
+ }
+}
+
+void cr_begin(void) {
+ assert_cid_state(coroutine_running, == CR_INITIALIZING);
+
+ coroutine_table[coroutine_running-1].state = CR_RUNNABLE;
+ if (!setjmp(coroutine_table[coroutine_running-1].env)) /* point=c1 */
+ longjmp(coroutine_add_env, 1); /* jump to point=a */
+}
+
+static inline void _cr_transition(enum coroutine_state state) {
+ debugf("cid=%zu: transition %i->%i\n", coroutine_running, coroutine_table[coroutine_running-1].state, state);
+
+ coroutine_table[coroutine_running-1].state = state;
+ if (!setjmp(coroutine_table[coroutine_running-1].env)) /* point=c2 */
+ longjmp(coroutine_main_env, 1); /* jump to point=b */
+}
+
+void cr_yield(void) {
+ assert_cid_state(coroutine_running, == CR_RUNNING);
+ _cr_transition(CR_RUNNABLE);
+}
+
+void cr_pause_and_yield(void) {
+ assert_cid_state(coroutine_running, == CR_RUNNING);
+ if (coroutine_table[cid-1].sig_unpause)
+ _cr_transition(CR_RUNNABLE);
+ else
+ _cr_transition(CR_PAUSED);
+ coroutine_table[cid-1].sig_unpause = false;
+}
+
+void cr_exit(void) {
+ assert_cid_state(coroutine_running, == CR_RUNNING);
+ debugf("cid=%zu: exit\n", coroutine_running);
+
+ coroutine_table[coroutine_running-1].state = CR_NONE;
+ longjmp(coroutine_main_env, 1); /* jump to point=b */
+}
+
+void cr_unpause(cid_t cid) {
+ assert_cid_state(cid, == CR_PAUSED);
+ debugf("cr_unpause(%zu)\n", cid);
+
+ coroutine_table[cid-1].sig_unpause = false;
+ coroutine_table[cid-1].state = CR_RUNNABLE;
+}
+
+void cr_unpause_from_sighandler(cid_t cid) {
+ assert_cid(cid);
+
+ switch (coroutine_table[cid-1].state) {
+ case CR_RUNNING:
+ coroutine_table[cid-1].sig_unpause = true;
+ case CR_PAUSED:
+ coroutine_table[cid-1].state = CR_RUNNABLE;
+ default:
+ assert(false);
+ }
+}
+
+cid_t cr_getcid(void) {
+ return coroutine_running;
+}