diff options
Diffstat (limited to 'libfmt/quote.c')
-rw-r--r-- | libfmt/quote.c | 159 |
1 files changed, 0 insertions, 159 deletions
diff --git a/libfmt/quote.c b/libfmt/quote.c deleted file mode 100644 index c91e0b0..0000000 --- a/libfmt/quote.c +++ /dev/null @@ -1,159 +0,0 @@ -/* libfmt/quote.c - C-string quoting for pico-fmt - * - * Copyright (C) 2025 Luke T. Shumaker <lukeshu@lukeshu.com> - * SPDX-License-Identifier: AGPL-3.0-or-later - */ - -#include <string.h> /* for strnlen() */ -#include <stdint.h> /* for uint{n}_t() */ - -#include <libfmt/fmt.h> - -enum quote { - QUOTE_NONE, /* c */ - QUOTE_SIMPLE, /* \c */ - QUOTE_U4, /* \uABCD */ - QUOTE_U8, /* \UABCDABCD */ -}; - -static inline enum quote needs_quote(uint32_t ch) { - if (ch == '\a' || - ch == '\b' || - ch == '\f' || - ch == '\n' || - ch == '\r' || - ch == '\t' || - ch == '\v' || - ch == '\\' || - ch == '\'' || - ch == '"' || - ch == '?') - return QUOTE_SIMPLE; - else if (' ' <= ch && ch <= '~') - return QUOTE_NONE; - else if (ch < 0x10000) - return QUOTE_U4; - else - return QUOTE_U8; -} - -/** - * Quote a string to ASCII-only C syntax. Valid UTF-8 is quoted as - * short C-escape characters, \uABCD or \UABCDABCD; invalid UTF-8 is - * quoted as \xAB. - */ -static void libfmt_conv_quote(struct fmt_state *state) { - uint32_t ch; - uint8_t chlen; - - const char *in = va_arg(*state->args, char*); - size_t in_len = strnlen(in, (state->flags & FMT_FLAG_PRECISION) ? state->precision : (size_t)-1); - - size_t out_len = 2; - for (size_t pos = 0; pos < in_len;) { - if ((in[pos] & 0b10000000) == 0b00000000) { ch = in[pos] & 0b01111111; chlen = 1; } - else if ((in[pos] & 0b11100000) == 0b11000000) { ch = in[pos] & 0b00011111; chlen = 2; } - else if ((in[pos] & 0b11110000) == 0b11100000) { ch = in[pos] & 0b00001111; chlen = 3; } - else if ((in[pos] & 0b11111000) == 0b11110000) { ch = in[pos] & 0b00000111; chlen = 4; } - else goto measure_invalid_utf8; - if ((ch == 0 && chlen != 1) || pos + chlen > in_len) goto measure_invalid_utf8; - for (uint8_t i = 1; i < chlen; i++) { - if ((in[pos+i] & 0b11000000) != 0b10000000) goto measure_invalid_utf8; - ch = (ch << 6) | (in[pos+i] & 0b00111111); - } - if (ch > 0x10FFFF) goto measure_invalid_utf8; - pos += chlen; - - switch (needs_quote(ch)) { - case QUOTE_NONE : out_len += 1; break; - case QUOTE_SIMPLE : out_len += 2; break; - case QUOTE_U4 : out_len += 6; break; - case QUOTE_U8 : out_len += 10; break; - } - continue; - measure_invalid_utf8: - pos++; - out_len += 4; /* \xAB */ - } - - if (!(state->flags & FMT_FLAG_LEFT)) { - for (size_t i = 0; i + out_len < state->width; i++) { - fmt_state_putchar(state, ' '); - } - } - - fmt_state_putchar(state, '"'); - for (size_t pos = 0; pos < in_len;) { - if ((in[pos] & 0b10000000) == 0b00000000) { ch = in[pos] & 0b01111111; chlen = 1; } - else if ((in[pos] & 0b11100000) == 0b11000000) { ch = in[pos] & 0b00011111; chlen = 2; } - else if ((in[pos] & 0b11110000) == 0b11100000) { ch = in[pos] & 0b00001111; chlen = 3; } - else if ((in[pos] & 0b11111000) == 0b11110000) { ch = in[pos] & 0b00000111; chlen = 4; } - else goto output_invalid_utf8; - if ((ch == 0 && chlen != 1) || pos + chlen > in_len) goto output_invalid_utf8; - for (uint8_t i = 1; i < chlen; i++) { - if ((in[pos+i] & 0b11000000) != 0b10000000) goto output_invalid_utf8; - ch = (ch << 6) | (in[pos+i] & 0b00111111); - } - if (ch > 0x10FFFF) goto output_invalid_utf8; - pos += chlen; - - switch (needs_quote(ch)) { - case QUOTE_NONE: - fmt_state_putchar(state, ch); - break; - case QUOTE_SIMPLE: - fmt_state_putchar(state, '\\'); - switch (ch) { - case '\a': fmt_state_putchar(state, 'a'); break; - case '\b': fmt_state_putchar(state, 'b'); break; - case '\f': fmt_state_putchar(state, 'f'); break; - case '\n': fmt_state_putchar(state, 'n'); break; - case '\r': fmt_state_putchar(state, 'r'); break; - case '\t': fmt_state_putchar(state, 't'); break; - case '\v': fmt_state_putchar(state, 'v'); break; - case '\\': fmt_state_putchar(state, '\\'); break; - case '\'': fmt_state_putchar(state, '\''); break; - case '"': fmt_state_putchar(state, '"'); break; - case '?': fmt_state_putchar(state, '?'); break; - } - break; - case QUOTE_U4: - fmt_state_putchar(state, '\\'); - fmt_state_putchar(state, 'u'); - fmt_state_putchar(state, (ch >> 12) & 0xF); - fmt_state_putchar(state, (ch >> 8) & 0xF); - fmt_state_putchar(state, (ch >> 4) & 0xF); - fmt_state_putchar(state, (ch >> 0) & 0xF); - break; - case QUOTE_U8: - fmt_state_putchar(state, '\\'); - fmt_state_putchar(state, 'U'); - fmt_state_putchar(state, (ch >> 28) & 0xF); - fmt_state_putchar(state, (ch >> 24) & 0xF); - fmt_state_putchar(state, (ch >> 20) & 0xF); - fmt_state_putchar(state, (ch >> 16) & 0xF); - fmt_state_putchar(state, (ch >> 12) & 0xF); - fmt_state_putchar(state, (ch >> 8) & 0xF); - fmt_state_putchar(state, (ch >> 4) & 0xF); - fmt_state_putchar(state, (ch >> 0) & 0xF); - break; - } - continue; - output_invalid_utf8: - fmt_state_putchar(state, '\\'); - fmt_state_putchar(state, 'x'); - fmt_state_putchar(state, (in[pos] >> 4) & 0xF); - fmt_state_putchar(state, (in[pos] >> 0) & 0xF); - pos++; - } - fmt_state_putchar(state, '"'); - - for (size_t i = 0; i + out_len < state->width; i++) { - fmt_state_putchar(state, ' '); - } -} - -[[gnu::constructor]] -static void libfmt_install_quote(void) { - fmt_install('q', libfmt_conv_quote); -} |