/* libfmt/quote.c - C-string quoting for pico-fmt * * Copyright (C) 2025 Luke T. Shumaker * SPDX-License-Identifier: AGPL-3.0-or-later */ #include /* for strnlen() */ #include /* for uint{n}_t() */ #include enum quote { QUOTE_NONE, /* c */ QUOTE_SIMPLE, /* \c */ QUOTE_U4, /* \uABCD */ QUOTE_U8, /* \UABCDABCD */ }; static inline enum quote needs_quote(uint32_t ch) { if (ch == '\a' || ch == '\b' || ch == '\f' || ch == '\n' || ch == '\r' || ch == '\t' || ch == '\v' || ch == '\\' || ch == '\'' || ch == '"' || ch == '?') return QUOTE_SIMPLE; else if (' ' <= ch && ch <= '~') return QUOTE_NONE; else if (ch < 0x10000) return QUOTE_U4; else return QUOTE_U8; } /** * Quote a string to ASCII-only C syntax. Valid UTF-8 is quoted as * short C-escape characters, \uABCD or \UABCDABCD; invalid UTF-8 is * quoted as \xAB. */ static void libfmt_conv_quote(struct fmt_state *state) { uint32_t ch; uint8_t chlen; const char *in = va_arg(*state->args, char*); size_t in_len = strnlen(in, (state->flags & FMT_FLAG_PRECISION) ? state->precision : (size_t)-1); size_t out_len = 2; for (size_t pos = 0; pos < in_len;) { if ((in[pos] & 0b10000000) == 0b00000000) { ch = in[pos] & 0b01111111; chlen = 1; } else if ((in[pos] & 0b11100000) == 0b11000000) { ch = in[pos] & 0b00011111; chlen = 2; } else if ((in[pos] & 0b11110000) == 0b11100000) { ch = in[pos] & 0b00001111; chlen = 3; } else if ((in[pos] & 0b11111000) == 0b11110000) { ch = in[pos] & 0b00000111; chlen = 4; } else goto measure_invalid_utf8; if ((ch == 0 && chlen != 1) || pos + chlen > in_len) goto measure_invalid_utf8; for (uint8_t i = 1; i < chlen; i++) { if ((in[pos+i] & 0b11000000) != 0b10000000) goto measure_invalid_utf8; ch = (ch << 6) | (in[pos+i] & 0b00111111); } if (ch > 0x10FFFF) goto measure_invalid_utf8; pos += chlen; switch (needs_quote(ch)) { case QUOTE_NONE : out_len += 1; break; case QUOTE_SIMPLE : out_len += 2; break; case QUOTE_U4 : out_len += 6; break; case QUOTE_U8 : out_len += 10; break; } continue; measure_invalid_utf8: pos++; out_len += 4; /* \xAB */ } if (!(state->flags & FMT_FLAG_LEFT)) { for (size_t i = 0; i + out_len < state->width; i++) { fmt_state_putchar(state, ' '); } } fmt_state_putchar(state, '"'); for (size_t pos = 0; pos < in_len;) { if ((in[pos] & 0b10000000) == 0b00000000) { ch = in[pos] & 0b01111111; chlen = 1; } else if ((in[pos] & 0b11100000) == 0b11000000) { ch = in[pos] & 0b00011111; chlen = 2; } else if ((in[pos] & 0b11110000) == 0b11100000) { ch = in[pos] & 0b00001111; chlen = 3; } else if ((in[pos] & 0b11111000) == 0b11110000) { ch = in[pos] & 0b00000111; chlen = 4; } else goto output_invalid_utf8; if ((ch == 0 && chlen != 1) || pos + chlen > in_len) goto output_invalid_utf8; for (uint8_t i = 1; i < chlen; i++) { if ((in[pos+i] & 0b11000000) != 0b10000000) goto output_invalid_utf8; ch = (ch << 6) | (in[pos+i] & 0b00111111); } if (ch > 0x10FFFF) goto output_invalid_utf8; pos += chlen; switch (needs_quote(ch)) { case QUOTE_NONE: fmt_state_putchar(state, ch); break; case QUOTE_SIMPLE: fmt_state_putchar(state, '\\'); switch (ch) { case '\a': fmt_state_putchar(state, 'a'); break; case '\b': fmt_state_putchar(state, 'b'); break; case '\f': fmt_state_putchar(state, 'f'); break; case '\n': fmt_state_putchar(state, 'n'); break; case '\r': fmt_state_putchar(state, 'r'); break; case '\t': fmt_state_putchar(state, 't'); break; case '\v': fmt_state_putchar(state, 'v'); break; case '\\': fmt_state_putchar(state, '\\'); break; case '\'': fmt_state_putchar(state, '\''); break; case '"': fmt_state_putchar(state, '"'); break; case '?': fmt_state_putchar(state, '?'); break; } break; case QUOTE_U4: fmt_state_putchar(state, '\\'); fmt_state_putchar(state, 'u'); fmt_state_putchar(state, (ch >> 12) & 0xF); fmt_state_putchar(state, (ch >> 8) & 0xF); fmt_state_putchar(state, (ch >> 4) & 0xF); fmt_state_putchar(state, (ch >> 0) & 0xF); break; case QUOTE_U8: fmt_state_putchar(state, '\\'); fmt_state_putchar(state, 'U'); fmt_state_putchar(state, (ch >> 28) & 0xF); fmt_state_putchar(state, (ch >> 24) & 0xF); fmt_state_putchar(state, (ch >> 20) & 0xF); fmt_state_putchar(state, (ch >> 16) & 0xF); fmt_state_putchar(state, (ch >> 12) & 0xF); fmt_state_putchar(state, (ch >> 8) & 0xF); fmt_state_putchar(state, (ch >> 4) & 0xF); fmt_state_putchar(state, (ch >> 0) & 0xF); break; } continue; output_invalid_utf8: fmt_state_putchar(state, '\\'); fmt_state_putchar(state, 'x'); fmt_state_putchar(state, (in[pos] >> 4) & 0xF); fmt_state_putchar(state, (in[pos] >> 0) & 0xF); pos++; } fmt_state_putchar(state, '"'); for (size_t i = 0; i + out_len < state->width; i++) { fmt_state_putchar(state, ' '); } } [[gnu::constructor]] static void libfmt_install_quote(void) { fmt_install('q', libfmt_conv_quote); }