summaryrefslogtreecommitdiff
path: root/libmisc/fmt.c
diff options
context:
space:
mode:
Diffstat (limited to 'libmisc/fmt.c')
-rw-r--r--libmisc/fmt.c225
1 files changed, 225 insertions, 0 deletions
diff --git a/libmisc/fmt.c b/libmisc/fmt.c
new file mode 100644
index 0000000..33788b6
--- /dev/null
+++ b/libmisc/fmt.c
@@ -0,0 +1,225 @@
+/* libmisc/fmt.c - Write formatted text
+ *
+ * Copyright (C) 2025 Luke T. Shumaker <lukeshu@lukeshu.com>
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+#include <string.h> /* for strnlen() */
+
+#include <libmisc/fmt.h>
+
+static const char *const hexdig = "0123456789ABCDEF";
+
+/* small/trivial formatters ***************************************************/
+
+void fmt_print_byte(lo_interface fmt_dest w, uint8_t b) {
+ LO_CALL(w, putb, b);
+}
+
+void fmt_print_bool(lo_interface fmt_dest w, bool b) {
+ fmt_print_str(w, b ? "true" : "false");
+}
+
+void fmt_print_base16_u8_(lo_interface fmt_dest w, uint8_t x) {
+ fmt_print(w, "0x", (rjust, 2, '0', (base16, x)));
+}
+void fmt_print_base16_u16_(lo_interface fmt_dest w, uint16_t x) {
+ fmt_print(w, "0x", (rjust, 4, '0', (base16, x)));
+}
+void fmt_print_base16_u32_(lo_interface fmt_dest w, uint32_t x) {
+ fmt_print(w, "0x", (rjust, 8, '0', (base16, x)));
+}
+void fmt_print_base16_u64_(lo_interface fmt_dest w, uint64_t x) {
+ fmt_print(w, "0x", (rjust, 16, '0', (base16, x)));
+}
+
+void fmt_print_ptr(lo_interface fmt_dest w, void *ptr) {
+ LM_CAT3_(fmt_print_base16_u, __INTPTR_WIDTH__, _)(w, (uintptr_t)ptr);
+}
+
+/* quote **********************************************************************/
+
+/**
+ * Quote a byte to ASCII-only C syntax.
+ */
+void fmt_print_qbyte(lo_interface fmt_dest w, uint8_t b) {
+ fmt_print_byte(w, '\'');
+ if (' ' <= b && b <= '~') {
+ if (b == '\'' || b == '\\')
+ fmt_print_byte(w, '\\');
+ fmt_print_byte(w, b);
+ } else {
+ fmt_print_byte(w, '\\');
+ fmt_print_byte(w, 'x');
+ fmt_print_byte(w, hexdig[(b >> 4) & 0xF]);
+ fmt_print_byte(w, hexdig[(b >> 0) & 0xF]);
+ }
+ fmt_print_byte(w, '\'');
+}
+
+/**
+ * Quote a region of memory to ASCII-only C string syntax. Valid
+ * UTF-8 is quoted as short C-escape characters, \uABCD, or
+ * \UABCDABCD; invalid UTF-8 is quoted as \xAB.
+ */
+void fmt_print_qmem(lo_interface fmt_dest w, const void *_str, size_t size) {
+ const uint8_t *str = _str;
+ fmt_print_byte(w, '"');
+ for (size_t pos = 0; pos < size;) {
+ uint32_t ch;
+ uint8_t chlen;
+ if ((str[pos] & 0b10000000) == 0b00000000) { ch = str[pos] & 0b01111111; chlen = 1; }
+ else if ((str[pos] & 0b11100000) == 0b11000000) { ch = str[pos] & 0b00011111; chlen = 2; }
+ else if ((str[pos] & 0b11110000) == 0b11100000) { ch = str[pos] & 0b00001111; chlen = 3; }
+ else if ((str[pos] & 0b11111000) == 0b11110000) { ch = str[pos] & 0b00000111; chlen = 4; }
+ else goto invalid_utf8;
+ if ((ch == 0 && chlen != 1) || pos + chlen > size) goto invalid_utf8;
+ for (uint8_t i = 1; i < chlen; i++) {
+ if ((str[pos+i] & 0b11000000) != 0b10000000) goto invalid_utf8;
+ ch = (ch << 6) | (str[pos+i] & 0b00111111);
+ }
+ if (ch > 0x10FFFF) goto invalid_utf8;
+
+ if (ch == '\0' ||
+ ch == '\b' ||
+ ch == '\f' ||
+ ch == '\n' ||
+ ch == '\r' ||
+ ch == '\t' ||
+ ch == '\v' ||
+ ch == '\\' ||
+ ch == '\'' ||
+ ch == '"' ||
+ ch == '?') {
+ /* short C-escape */
+ fmt_print_byte(w, '\\');
+ switch (ch) {
+ case '\0': fmt_print_byte(w, '0'); break;
+ case '\a': fmt_print_byte(w, 'a'); break;
+ case '\b': fmt_print_byte(w, 'b'); break;
+ case '\f': fmt_print_byte(w, 'f'); break;
+ case '\n': fmt_print_byte(w, 'n'); break;
+ case '\r': fmt_print_byte(w, 'r'); break;
+ case '\t': fmt_print_byte(w, 't'); break;
+ case '\v': fmt_print_byte(w, 'v'); break;
+ case '\\': fmt_print_byte(w, '\\'); break;
+ case '\'': fmt_print_byte(w, '\''); break;
+ case '"': fmt_print_byte(w, '"'); break;
+ case '?': fmt_print_byte(w, '?'); break;
+ }
+ } else if (' ' <= ch && ch <= '~') {
+ /* no escaping */
+ fmt_print_byte(w, ch);
+ } else if (ch < 0x10000) {
+ /* \uABCD */
+ fmt_print_byte(w, '\\');
+ fmt_print_byte(w, 'u');
+ fmt_print_byte(w, hexdig[(ch >> 12) & 0xF]);
+ fmt_print_byte(w, hexdig[(ch >> 8) & 0xF]);
+ fmt_print_byte(w, hexdig[(ch >> 4) & 0xF]);
+ fmt_print_byte(w, hexdig[(ch >> 0) & 0xF]);
+ } else {
+ /* \UABCDABCD */
+ fmt_print_byte(w, '\\');
+ fmt_print_byte(w, 'U');
+ fmt_print_byte(w, hexdig[(ch >> 28) & 0xF]);
+ fmt_print_byte(w, hexdig[(ch >> 24) & 0xF]);
+ fmt_print_byte(w, hexdig[(ch >> 20) & 0xF]);
+ fmt_print_byte(w, hexdig[(ch >> 16) & 0xF]);
+ fmt_print_byte(w, hexdig[(ch >> 12) & 0xF]);
+ fmt_print_byte(w, hexdig[(ch >> 8) & 0xF]);
+ fmt_print_byte(w, hexdig[(ch >> 4) & 0xF]);
+ fmt_print_byte(w, hexdig[(ch >> 0) & 0xF]);
+ }
+ pos += chlen;
+ continue;
+
+ invalid_utf8:
+ /* \xAB */
+ fmt_print_byte(w, '\\');
+ fmt_print_byte(w, 'x');
+ fmt_print_byte(w, hexdig[(str[pos] >> 4) & 0xF]);
+ fmt_print_byte(w, hexdig[(str[pos] >> 0) & 0xF]);
+ pos++;
+ }
+ fmt_print_byte(w, '"');
+}
+
+void fmt_print_qstr(lo_interface fmt_dest w, const char *str) {
+ fmt_print_qmem(w, str, strlen(str));
+}
+
+void fmt_print_qstrn(lo_interface fmt_dest w, const char *str, size_t n) {
+ fmt_print_qmem(w, str, strnlen(str, n));
+}
+
+/* int ************************************************************************/
+
+#define declare(BASE, BITS) \
+ void _fmt_print_base##BASE##_s##BITS(lo_interface fmt_dest w, \
+ int##BITS##_t val) { \
+ if (val < 0) { \
+ fmt_print_byte(w, '-'); \
+ val = -val; \
+ } \
+ _fmt_print_base##BASE##_u##BITS(w, (uint##BITS##_t)val); \
+ } \
+ \
+ void _fmt_print_base##BASE##_u##BITS(lo_interface fmt_dest w, \
+ uint##BITS##_t absval) { \
+ /* This digit-counting is O(log(absval)); there are \
+ * `__builtin_clz`-based O(1) ways to do this, but when I \
+ * tried them they bloated the code-size too much. And this \
+ * function as a whole is already O(log(absval)) anyway \
+ * because of actually printing the digits. */ \
+ unsigned ndigits = 1; \
+ uint##BITS##_t div = 1; \
+ while (absval / div >= BASE) { \
+ div *= BASE; \
+ ndigits++; \
+ } \
+ \
+ for (unsigned i = 0; i < ndigits; i++) { \
+ unsigned digit = (unsigned) (absval / div); \
+ absval %= div; \
+ div /= BASE; \
+ fmt_print_byte(w, hexdig[digit]); \
+ } \
+ } \
+ LM_FORCE_SEMICOLON
+
+declare(2, 8);
+declare(2, 16);
+declare(2, 32);
+declare(2, 64);
+
+declare(8, 8);
+declare(8, 16);
+declare(8, 32);
+declare(8, 64);
+
+declare(10, 8);
+declare(10, 16);
+declare(10, 32);
+declare(10, 64);
+
+declare(16, 8);
+declare(16, 16);
+declare(16, 32);
+declare(16, 64);
+
+#undef declare
+
+/* fmt_buf ********************************************************************/
+
+LO_IMPLEMENTATION_C(fmt_dest, struct fmt_buf, fmt_buf, static);
+
+static void fmt_buf_putb(struct fmt_buf *buf, uint8_t b) {
+ if (buf->len < buf->cap)
+ ((uint8_t *)(buf->dat))[buf->len] = b;
+ buf->len++;
+}
+
+static size_t fmt_buf_tell(struct fmt_buf *buf) {
+ return buf->len;
+}