diff options
Diffstat (limited to 'libmisc/fmt.c')
-rw-r--r-- | libmisc/fmt.c | 266 |
1 files changed, 266 insertions, 0 deletions
diff --git a/libmisc/fmt.c b/libmisc/fmt.c new file mode 100644 index 0000000..7c18ef5 --- /dev/null +++ b/libmisc/fmt.c @@ -0,0 +1,266 @@ +/* libmisc/fmt.c - Write formatted text + * + * Copyright (C) 2025 Luke T. Shumaker <lukeshu@lukeshu.com> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#include <string.h> /* for strnlen() */ + +#include <libmisc/utf8.h> + +#include <libmisc/fmt.h> + +static const char *const hexdig = "0123456789ABCDEF"; + +/* small/trivial formatters ***************************************************/ + +void fmt_print_mem(lo_interface fmt_dest w, const void *_str, size_t size) { + const uint8_t *str = _str; + while (size--) + fmt_print_byte(w, *(str++)); +} +void fmt_print_str(lo_interface fmt_dest w, const char *str) { + while (*str) + fmt_print_byte(w, *(str++)); +} +void fmt_print_strn(lo_interface fmt_dest w, const char *str, size_t size) { + while (size-- && *str) + fmt_print_byte(w, *(str++)); +} + +void fmt_print_hmem(lo_interface fmt_dest w, const void *_str, size_t size) { + const uint8_t *str = _str; + fmt_print_byte(w, '{'); + for (size_t i = 0; i < size; i++) { + if (i) + fmt_print_byte(w, ','); + fmt_print_hbyte(w, str[i]); + } + fmt_print_byte(w, '}'); +} + +void fmt_print_byte(lo_interface fmt_dest w, uint8_t b) { + LO_CALL(w, putb, b); +} + +void fmt_print_bool(lo_interface fmt_dest w, bool b) { + fmt_print_str(w, b ? "true" : "false"); +} + +void fmt_print_base16_u8_(lo_interface fmt_dest w, uint8_t x) { + fmt_print(w, "0x", (rjust, 2, '0', (base16, x))); +} +void fmt_print_base16_u16_(lo_interface fmt_dest w, uint16_t x) { + fmt_print(w, "0x", (rjust, 4, '0', (base16, x))); +} +void fmt_print_base16_u32_(lo_interface fmt_dest w, uint32_t x) { + fmt_print(w, "0x", (rjust, 8, '0', (base16, x))); +} +void fmt_print_base16_u64_(lo_interface fmt_dest w, uint64_t x) { + fmt_print(w, "0x", (rjust, 16, '0', (base16, x))); +} + +void fmt_print_ptr(lo_interface fmt_dest w, void *ptr) { + LM_CAT3_(fmt_print_base16_u, __INTPTR_WIDTH__, _)(w, (uintptr_t)ptr); +} + +/* quote **********************************************************************/ + +/** + * Quote a byte to ASCII-only C syntax. + */ +void fmt_print_qbyte(lo_interface fmt_dest w, uint8_t b) { + fmt_print_byte(w, '\''); + if (b == '\0' || + b == '\b' || + b == '\f' || + b == '\n' || + b == '\r' || + b == '\t' || + b == '\v' || + b == '\\' || + b == '\'' || + b == '"' || + b == '?') { + fmt_print_byte(w, '\\'); + switch (b) { + case '\0': fmt_print_byte(w, '0'); break; + case '\a': fmt_print_byte(w, 'a'); break; + case '\b': fmt_print_byte(w, 'b'); break; + case '\f': fmt_print_byte(w, 'f'); break; + case '\n': fmt_print_byte(w, 'n'); break; + case '\r': fmt_print_byte(w, 'r'); break; + case '\t': fmt_print_byte(w, 't'); break; + case '\v': fmt_print_byte(w, 'v'); break; + case '\\': fmt_print_byte(w, '\\'); break; + case '\'': fmt_print_byte(w, '\''); break; + case '"': fmt_print_byte(w, '"'); break; + case '?': fmt_print_byte(w, '?'); break; + } + } else if (' ' <= b && b <= '~') { + fmt_print_byte(w, b); + } else { + fmt_print_byte(w, '\\'); + fmt_print_byte(w, 'x'); + fmt_print_byte(w, hexdig[(b >> 4) & 0xF]); + fmt_print_byte(w, hexdig[(b >> 0) & 0xF]); + } + fmt_print_byte(w, '\''); +} + +/** + * Quote a region of memory to ASCII-only C string syntax. Valid + * UTF-8 is quoted as short C-escape characters, \uABCD, or + * \UABCDABCD; invalid UTF-8 is quoted as \xAB. + */ +void fmt_print_qmem(lo_interface fmt_dest w, const void *_str, size_t size) { + const uint8_t *str = _str; + fmt_print_byte(w, '"'); + for (size_t pos = 0; pos < size;) { + uint32_t ch; + uint8_t chlen; + utf8_decode_codepoint(&str[pos], size-pos, &ch, &chlen); + if (!chlen) { + /* invalid UTF-8 */ + /* \xAB */ + fmt_print_byte(w, '\\'); + fmt_print_byte(w, 'x'); + fmt_print_byte(w, hexdig[(str[pos] >> 4) & 0xF]); + fmt_print_byte(w, hexdig[(str[pos] >> 0) & 0xF]); + pos++; + continue; + } + if (ch == '\0' || + ch == '\b' || + ch == '\f' || + ch == '\n' || + ch == '\r' || + ch == '\t' || + ch == '\v' || + ch == '\\' || + ch == '\'' || + ch == '"' || + ch == '?') { + /* short C-escape */ + fmt_print_byte(w, '\\'); + switch (ch) { + case '\0': fmt_print_byte(w, '0'); break; + case '\a': fmt_print_byte(w, 'a'); break; + case '\b': fmt_print_byte(w, 'b'); break; + case '\f': fmt_print_byte(w, 'f'); break; + case '\n': fmt_print_byte(w, 'n'); break; + case '\r': fmt_print_byte(w, 'r'); break; + case '\t': fmt_print_byte(w, 't'); break; + case '\v': fmt_print_byte(w, 'v'); break; + case '\\': fmt_print_byte(w, '\\'); break; + case '\'': fmt_print_byte(w, '\''); break; + case '"': fmt_print_byte(w, '"'); break; + case '?': fmt_print_byte(w, '?'); break; + } + } else if (' ' <= ch && ch <= '~') { + /* no escaping */ + fmt_print_byte(w, ch); + } else if (ch < 0x10000) { + /* \uABCD */ + fmt_print_byte(w, '\\'); + fmt_print_byte(w, 'u'); + fmt_print_byte(w, hexdig[(ch >> 12) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 8) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 4) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 0) & 0xF]); + } else { + /* \UABCDABCD */ + fmt_print_byte(w, '\\'); + fmt_print_byte(w, 'U'); + fmt_print_byte(w, hexdig[(ch >> 28) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 24) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 20) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 16) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 12) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 8) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 4) & 0xF]); + fmt_print_byte(w, hexdig[(ch >> 0) & 0xF]); + } + pos += chlen; + } + fmt_print_byte(w, '"'); +} + +void fmt_print_qstr(lo_interface fmt_dest w, const char *str) { + fmt_print_qmem(w, str, strlen(str)); +} + +void fmt_print_qstrn(lo_interface fmt_dest w, const char *str, size_t n) { + fmt_print_qmem(w, str, strnlen(str, n)); +} + +/* int ************************************************************************/ + +#define declare(BASE, BITS) \ + void _fmt_print_base##BASE##_s##BITS(lo_interface fmt_dest w, \ + int##BITS##_t val) { \ + if (val < 0) { \ + fmt_print_byte(w, '-'); \ + val = -val; \ + } \ + _fmt_print_base##BASE##_u##BITS(w, (uint##BITS##_t)val); \ + } \ + \ + void _fmt_print_base##BASE##_u##BITS(lo_interface fmt_dest w, \ + uint##BITS##_t absval) { \ + /* This digit-counting is O(log(absval)); there are \ + * `__builtin_clz`-based O(1) ways to do this, but when I \ + * tried them they bloated the code-size too much. And this \ + * function as a whole is already O(log(absval)) anyway \ + * because of actually printing the digits. */ \ + unsigned ndigits = 1; \ + uint##BITS##_t div = 1; \ + while (absval / div >= BASE) { \ + div *= BASE; \ + ndigits++; \ + } \ + \ + for (unsigned i = 0; i < ndigits; i++) { \ + unsigned digit = (unsigned) (absval / div); \ + absval %= div; \ + div /= BASE; \ + fmt_print_byte(w, hexdig[digit]); \ + } \ + } \ + LM_FORCE_SEMICOLON + +declare(2, 8); +declare(2, 16); +declare(2, 32); +declare(2, 64); + +declare(8, 8); +declare(8, 16); +declare(8, 32); +declare(8, 64); + +declare(10, 8); +declare(10, 16); +declare(10, 32); +declare(10, 64); + +declare(16, 8); +declare(16, 16); +declare(16, 32); +declare(16, 64); + +#undef declare + +/* fmt_buf ********************************************************************/ + +LO_IMPLEMENTATION_C(fmt_dest, struct fmt_buf, fmt_buf, static); + +static void fmt_buf_putb(struct fmt_buf *buf, uint8_t b) { + if (buf->len < buf->cap) + ((uint8_t *)(buf->dat))[buf->len] = b; + buf->len++; +} + +static size_t fmt_buf_tell(struct fmt_buf *buf) { + return buf->len; +} |