diff options
Diffstat (limited to 'libmisc/fmt.c')
-rw-r--r-- | libmisc/fmt.c | 225 |
1 files changed, 225 insertions, 0 deletions
diff --git a/libmisc/fmt.c b/libmisc/fmt.c new file mode 100644 index 0000000..33788b6 --- /dev/null +++ b/libmisc/fmt.c @@ -0,0 +1,225 @@ +/* libmisc/fmt.c - Write formatted text + * + * Copyright (C) 2025 Luke T. Shumaker <lukeshu@lukeshu.com> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#include <string.h> /* for strnlen() */ + +#include <libmisc/fmt.h> + +static const char *const hexdig = "0123456789ABCDEF"; + +/* small/trivial formatters ***************************************************/ + +void fmt_print_byte(lo_interface fmt_dest w, uint8_t b) { + LO_CALL(w, putb, b); +} + +void fmt_print_bool(lo_interface fmt_dest w, bool b) { + fmt_print_str(w, b ? "true" : "false"); +} + +void fmt_print_base16_u8_(lo_interface fmt_dest w, uint8_t x) { + fmt_print(w, "0x", (rjust, 2, '0', (base16, x))); +} +void fmt_print_base16_u16_(lo_interface fmt_dest w, uint16_t x) { + fmt_print(w, "0x", (rjust, 4, '0', (base16, x))); +} +void fmt_print_base16_u32_(lo_interface fmt_dest w, uint32_t x) { + fmt_print(w, "0x", (rjust, 8, '0', (base16, x))); +} +void fmt_print_base16_u64_(lo_interface fmt_dest w, uint64_t x) { + fmt_print(w, "0x", (rjust, 16, '0', (base16, x))); +} + +void fmt_print_ptr(lo_interface fmt_dest w, void *ptr) { + LM_CAT3_(fmt_print_base16_u, __INTPTR_WIDTH__, _)(w, (uintptr_t)ptr); +} + +/* quote **********************************************************************/ + +/** + * Quote a byte to ASCII-only C syntax. + */ +void fmt_print_qbyte(lo_interface fmt_dest w, uint8_t b) { + fmt_print_byte(w, '\''); + if (' ' <= b && b <= '~') { + if (b == '\'' || b == '\\') + fmt_print_byte(w, '\\'); + fmt_print_byte(w, b); + } else { + fmt_print_byte(w, '\\'); + fmt_print_byte(w, 'x'); + fmt_print_byte(w, hexdig[(b >> 4) & 0xF]); + fmt_print_byte(w, hexdig[(b >> 0) & 0xF]); + } + fmt_print_byte(w, '\''); +} + +/** + * Quote a region of memory to ASCII-only C string syntax. Valid + * UTF-8 is quoted as short C-escape characters, \uABCD, or + * \UABCDABCD; invalid UTF-8 is quoted as \xAB. + */ +void fmt_print_qmem(lo_interface fmt_dest w, const void *_str, size_t size) { + const uint8_t *str = _str; + fmt_print_byte(w, '"'); + for (size_t pos = 0; pos < size;) { + uint32_t ch; + uint8_t chlen; + if ((str[pos] & 0b10000000) == 0b00000000) { ch = str[pos] & 0b01111111; chlen = 1; } + else if ((str[pos] & 0b11100000) == 0b11000000) { ch = str[pos] & 0b00011111; chlen = 2; } + else if ((str[pos] & 0b11110000) == 0b11100000) { ch = str[pos] & 0b00001111; chlen = 3; } + else if ((str[pos] & 0b11111000) == 0b11110000) { ch = str[pos] & 0b00000111; chlen = 4; } + else goto invalid_utf8; + if ((ch == 0 && chlen != 1) || pos + chlen > size) goto invalid_utf8; + for (uint8_t i = 1; i < chlen; i++) { + if ((str[pos+i] & 0b11000000) != 0b10000000) goto invalid_utf8; + ch = (ch << 6) | (str[pos+i] & 0b00111111); + } + if (ch > 0x10FFFF) goto invalid_utf8; + + if (ch == '\0' || + ch == '\b' || + ch == '\f' || + ch == '\n' || + ch == '\r' || + ch == '\t' || + ch == '\v' || + ch == '\\' || + ch == '\'' || + ch == '"' || + ch == '?') { + /* short C-escape */ + fmt_print_byte(w, '\\'); + switch (ch) { + case '\0': fmt_print_byte(w, '0'); break; + case '\a': fmt_print_byte(w, 'a'); break; + case '\b': fmt_print_byte(w, 'b'); break; + case '\f': fmt_print_byte(w, 'f'); break; + case '\n': fmt_print_byte(w, 'n'); break; + case '\r': fmt_print_byte(w, 'r'); break; + case '\t': fmt_print_byte(w, 't'); break; + case '\v': fmt_print_byte(w, 'v'); break; + case '\\': fmt_print_byte(w, '\\'); break; + case '\'': fmt_print_byte(w, '\''); break; + case '"': fmt_print_byte(w, '"'); break; + case '?': fmt_print_byte(w, '?'); break; + } + } else if (' ' <= ch && ch <= '~') { + /* no escaping */ + fmt_print_byte(w, ch); + } else if (ch < 0x10000) { + /* \uABCD */ + fmt_print_byte(w, '\\'); + fmt_print_byte(w, 'u'); + fmt_print_byte(w, hexdig[(ch >> 12) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 8) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 4) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 0) & 0xF]); + } else { + /* \UABCDABCD */ + fmt_print_byte(w, '\\'); + fmt_print_byte(w, 'U'); + fmt_print_byte(w, hexdig[(ch >> 28) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 24) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 20) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 16) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 12) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 8) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 4) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 0) & 0xF]); + } + pos += chlen; + continue; + + invalid_utf8: + /* \xAB */ + fmt_print_byte(w, '\\'); + fmt_print_byte(w, 'x'); + fmt_print_byte(w, hexdig[(str[pos] >> 4) & 0xF]); + fmt_print_byte(w, hexdig[(str[pos] >> 0) & 0xF]); + pos++; + } + fmt_print_byte(w, '"'); +} + +void fmt_print_qstr(lo_interface fmt_dest w, const char *str) { + fmt_print_qmem(w, str, strlen(str)); +} + +void fmt_print_qstrn(lo_interface fmt_dest w, const char *str, size_t n) { + fmt_print_qmem(w, str, strnlen(str, n)); +} + +/* int ************************************************************************/ + +#define declare(BASE, BITS) \ + void _fmt_print_base##BASE##_s##BITS(lo_interface fmt_dest w, \ + int##BITS##_t val) { \ + if (val < 0) { \ + fmt_print_byte(w, '-'); \ + val = -val; \ + } \ + _fmt_print_base##BASE##_u##BITS(w, (uint##BITS##_t)val); \ + } \ + \ + void _fmt_print_base##BASE##_u##BITS(lo_interface fmt_dest w, \ + uint##BITS##_t absval) { \ + /* This digit-counting is O(log(absval)); there are \ + * `__builtin_clz`-based O(1) ways to do this, but when I \ + * tried them they bloated the code-size too much. And this \ + * function as a whole is already O(log(absval)) anyway \ + * because of actually printing the digits. */ \ + unsigned ndigits = 1; \ + uint##BITS##_t div = 1; \ + while (absval / div >= BASE) { \ + div *= BASE; \ + ndigits++; \ + } \ + \ + for (unsigned i = 0; i < ndigits; i++) { \ + unsigned digit = (unsigned) (absval / div); \ + absval %= div; \ + div /= BASE; \ + fmt_print_byte(w, hexdig[digit]); \ + } \ + } \ + LM_FORCE_SEMICOLON + +declare(2, 8); +declare(2, 16); +declare(2, 32); +declare(2, 64); + +declare(8, 8); +declare(8, 16); +declare(8, 32); +declare(8, 64); + +declare(10, 8); +declare(10, 16); +declare(10, 32); +declare(10, 64); + +declare(16, 8); +declare(16, 16); +declare(16, 32); +declare(16, 64); + +#undef declare + +/* fmt_buf ********************************************************************/ + +LO_IMPLEMENTATION_C(fmt_dest, struct fmt_buf, fmt_buf, static); + +static void fmt_buf_putb(struct fmt_buf *buf, uint8_t b) { + if (buf->len < buf->cap) + ((uint8_t *)(buf->dat))[buf->len] = b; + buf->len++; +} + +static size_t fmt_buf_tell(struct fmt_buf *buf) { + return buf->len; +} |