summaryrefslogtreecommitdiff
path: root/libfmt/quote.c
diff options
context:
space:
mode:
Diffstat (limited to 'libfmt/quote.c')
-rw-r--r--libfmt/quote.c159
1 files changed, 0 insertions, 159 deletions
diff --git a/libfmt/quote.c b/libfmt/quote.c
deleted file mode 100644
index c91e0b0..0000000
--- a/libfmt/quote.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/* libfmt/quote.c - C-string quoting for pico-fmt
- *
- * Copyright (C) 2025 Luke T. Shumaker <lukeshu@lukeshu.com>
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-#include <string.h> /* for strnlen() */
-#include <stdint.h> /* for uint{n}_t() */
-
-#include <libfmt/fmt.h>
-
-enum quote {
- QUOTE_NONE, /* c */
- QUOTE_SIMPLE, /* \c */
- QUOTE_U4, /* \uABCD */
- QUOTE_U8, /* \UABCDABCD */
-};
-
-static inline enum quote needs_quote(uint32_t ch) {
- if (ch == '\a' ||
- ch == '\b' ||
- ch == '\f' ||
- ch == '\n' ||
- ch == '\r' ||
- ch == '\t' ||
- ch == '\v' ||
- ch == '\\' ||
- ch == '\'' ||
- ch == '"' ||
- ch == '?')
- return QUOTE_SIMPLE;
- else if (' ' <= ch && ch <= '~')
- return QUOTE_NONE;
- else if (ch < 0x10000)
- return QUOTE_U4;
- else
- return QUOTE_U8;
-}
-
-/**
- * Quote a string to ASCII-only C syntax. Valid UTF-8 is quoted as
- * short C-escape characters, \uABCD or \UABCDABCD; invalid UTF-8 is
- * quoted as \xAB.
- */
-static void libfmt_conv_quote(struct fmt_state *state) {
- uint32_t ch;
- uint8_t chlen;
-
- const char *in = va_arg(*state->args, char*);
- size_t in_len = strnlen(in, (state->flags & FMT_FLAG_PRECISION) ? state->precision : (size_t)-1);
-
- size_t out_len = 2;
- for (size_t pos = 0; pos < in_len;) {
- if ((in[pos] & 0b10000000) == 0b00000000) { ch = in[pos] & 0b01111111; chlen = 1; }
- else if ((in[pos] & 0b11100000) == 0b11000000) { ch = in[pos] & 0b00011111; chlen = 2; }
- else if ((in[pos] & 0b11110000) == 0b11100000) { ch = in[pos] & 0b00001111; chlen = 3; }
- else if ((in[pos] & 0b11111000) == 0b11110000) { ch = in[pos] & 0b00000111; chlen = 4; }
- else goto measure_invalid_utf8;
- if ((ch == 0 && chlen != 1) || pos + chlen > in_len) goto measure_invalid_utf8;
- for (uint8_t i = 1; i < chlen; i++) {
- if ((in[pos+i] & 0b11000000) != 0b10000000) goto measure_invalid_utf8;
- ch = (ch << 6) | (in[pos+i] & 0b00111111);
- }
- if (ch > 0x10FFFF) goto measure_invalid_utf8;
- pos += chlen;
-
- switch (needs_quote(ch)) {
- case QUOTE_NONE : out_len += 1; break;
- case QUOTE_SIMPLE : out_len += 2; break;
- case QUOTE_U4 : out_len += 6; break;
- case QUOTE_U8 : out_len += 10; break;
- }
- continue;
- measure_invalid_utf8:
- pos++;
- out_len += 4; /* \xAB */
- }
-
- if (!(state->flags & FMT_FLAG_LEFT)) {
- for (size_t i = 0; i + out_len < state->width; i++) {
- fmt_state_putchar(state, ' ');
- }
- }
-
- fmt_state_putchar(state, '"');
- for (size_t pos = 0; pos < in_len;) {
- if ((in[pos] & 0b10000000) == 0b00000000) { ch = in[pos] & 0b01111111; chlen = 1; }
- else if ((in[pos] & 0b11100000) == 0b11000000) { ch = in[pos] & 0b00011111; chlen = 2; }
- else if ((in[pos] & 0b11110000) == 0b11100000) { ch = in[pos] & 0b00001111; chlen = 3; }
- else if ((in[pos] & 0b11111000) == 0b11110000) { ch = in[pos] & 0b00000111; chlen = 4; }
- else goto output_invalid_utf8;
- if ((ch == 0 && chlen != 1) || pos + chlen > in_len) goto output_invalid_utf8;
- for (uint8_t i = 1; i < chlen; i++) {
- if ((in[pos+i] & 0b11000000) != 0b10000000) goto output_invalid_utf8;
- ch = (ch << 6) | (in[pos+i] & 0b00111111);
- }
- if (ch > 0x10FFFF) goto output_invalid_utf8;
- pos += chlen;
-
- switch (needs_quote(ch)) {
- case QUOTE_NONE:
- fmt_state_putchar(state, ch);
- break;
- case QUOTE_SIMPLE:
- fmt_state_putchar(state, '\\');
- switch (ch) {
- case '\a': fmt_state_putchar(state, 'a'); break;
- case '\b': fmt_state_putchar(state, 'b'); break;
- case '\f': fmt_state_putchar(state, 'f'); break;
- case '\n': fmt_state_putchar(state, 'n'); break;
- case '\r': fmt_state_putchar(state, 'r'); break;
- case '\t': fmt_state_putchar(state, 't'); break;
- case '\v': fmt_state_putchar(state, 'v'); break;
- case '\\': fmt_state_putchar(state, '\\'); break;
- case '\'': fmt_state_putchar(state, '\''); break;
- case '"': fmt_state_putchar(state, '"'); break;
- case '?': fmt_state_putchar(state, '?'); break;
- }
- break;
- case QUOTE_U4:
- fmt_state_putchar(state, '\\');
- fmt_state_putchar(state, 'u');
- fmt_state_putchar(state, (ch >> 12) & 0xF);
- fmt_state_putchar(state, (ch >> 8) & 0xF);
- fmt_state_putchar(state, (ch >> 4) & 0xF);
- fmt_state_putchar(state, (ch >> 0) & 0xF);
- break;
- case QUOTE_U8:
- fmt_state_putchar(state, '\\');
- fmt_state_putchar(state, 'U');
- fmt_state_putchar(state, (ch >> 28) & 0xF);
- fmt_state_putchar(state, (ch >> 24) & 0xF);
- fmt_state_putchar(state, (ch >> 20) & 0xF);
- fmt_state_putchar(state, (ch >> 16) & 0xF);
- fmt_state_putchar(state, (ch >> 12) & 0xF);
- fmt_state_putchar(state, (ch >> 8) & 0xF);
- fmt_state_putchar(state, (ch >> 4) & 0xF);
- fmt_state_putchar(state, (ch >> 0) & 0xF);
- break;
- }
- continue;
- output_invalid_utf8:
- fmt_state_putchar(state, '\\');
- fmt_state_putchar(state, 'x');
- fmt_state_putchar(state, (in[pos] >> 4) & 0xF);
- fmt_state_putchar(state, (in[pos] >> 0) & 0xF);
- pos++;
- }
- fmt_state_putchar(state, '"');
-
- for (size_t i = 0; i + out_len < state->width; i++) {
- fmt_state_putchar(state, ' ');
- }
-}
-
-[[gnu::constructor]]
-static void libfmt_install_quote(void) {
- fmt_install('q', libfmt_conv_quote);
-}