summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke T. Shumaker <lukeshu@lukeshu.com>2025-03-08 20:54:55 -0700
committerLuke T. Shumaker <lukeshu@lukeshu.com>2025-04-02 20:44:53 -0600
commit8b7f4ae67bca75e1d2e9429805011f3044941cac (patch)
treecb9a91881263f2eb309a333b6f4f4a5f3b7205ba
parentbb33c30ad89e28d5ff9f4d8073d4f9ee068f484d (diff)
libfmt: Add %q to quote strings
-rw-r--r--build-aux/measurestack/app_plugins.py2
-rw-r--r--libfmt/CMakeLists.txt1
-rw-r--r--libfmt/quote.c159
3 files changed, 162 insertions, 0 deletions
diff --git a/build-aux/measurestack/app_plugins.py b/build-aux/measurestack/app_plugins.py
index 1155098..3a09272 100644
--- a/build-aux/measurestack/app_plugins.py
+++ b/build-aux/measurestack/app_plugins.py
@@ -517,6 +517,7 @@ class PicoFmtPlugin:
QName("conv_pct"),
# libfmt
QName("libfmt_conv_formatter"),
+ QName("libfmt_conv_quote"),
], False
return None
@@ -957,6 +958,7 @@ class LibGCCPlugin:
def init_array(self) -> typing.Collection[QName]:
return [
QName("libfmt_install_formatter"),
+ QName("libfmt_install_quote"),
]
def extra_includes(self) -> typing.Collection[BaseName]:
diff --git a/libfmt/CMakeLists.txt b/libfmt/CMakeLists.txt
index 2d44c03..1b3a80f 100644
--- a/libfmt/CMakeLists.txt
+++ b/libfmt/CMakeLists.txt
@@ -8,6 +8,7 @@ target_include_directories(libfmt SYSTEM INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/i
target_sources(libfmt INTERFACE
libmisc.c
libobj.c
+ quote.c
)
target_link_libraries(libfmt INTERFACE
pico_fmt
diff --git a/libfmt/quote.c b/libfmt/quote.c
new file mode 100644
index 0000000..c91e0b0
--- /dev/null
+++ b/libfmt/quote.c
@@ -0,0 +1,159 @@
+/* libfmt/quote.c - C-string quoting for pico-fmt
+ *
+ * Copyright (C) 2025 Luke T. Shumaker <lukeshu@lukeshu.com>
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+#include <string.h> /* for strnlen() */
+#include <stdint.h> /* for uint{n}_t() */
+
+#include <libfmt/fmt.h>
+
+enum quote {
+ QUOTE_NONE, /* c */
+ QUOTE_SIMPLE, /* \c */
+ QUOTE_U4, /* \uABCD */
+ QUOTE_U8, /* \UABCDABCD */
+};
+
+static inline enum quote needs_quote(uint32_t ch) {
+ if (ch == '\a' ||
+ ch == '\b' ||
+ ch == '\f' ||
+ ch == '\n' ||
+ ch == '\r' ||
+ ch == '\t' ||
+ ch == '\v' ||
+ ch == '\\' ||
+ ch == '\'' ||
+ ch == '"' ||
+ ch == '?')
+ return QUOTE_SIMPLE;
+ else if (' ' <= ch && ch <= '~')
+ return QUOTE_NONE;
+ else if (ch < 0x10000)
+ return QUOTE_U4;
+ else
+ return QUOTE_U8;
+}
+
+/**
+ * Quote a string to ASCII-only C syntax. Valid UTF-8 is quoted as
+ * short C-escape characters, \uABCD or \UABCDABCD; invalid UTF-8 is
+ * quoted as \xAB.
+ */
+static void libfmt_conv_quote(struct fmt_state *state) {
+ uint32_t ch;
+ uint8_t chlen;
+
+ const char *in = va_arg(*state->args, char*);
+ size_t in_len = strnlen(in, (state->flags & FMT_FLAG_PRECISION) ? state->precision : (size_t)-1);
+
+ size_t out_len = 2;
+ for (size_t pos = 0; pos < in_len;) {
+ if ((in[pos] & 0b10000000) == 0b00000000) { ch = in[pos] & 0b01111111; chlen = 1; }
+ else if ((in[pos] & 0b11100000) == 0b11000000) { ch = in[pos] & 0b00011111; chlen = 2; }
+ else if ((in[pos] & 0b11110000) == 0b11100000) { ch = in[pos] & 0b00001111; chlen = 3; }
+ else if ((in[pos] & 0b11111000) == 0b11110000) { ch = in[pos] & 0b00000111; chlen = 4; }
+ else goto measure_invalid_utf8;
+ if ((ch == 0 && chlen != 1) || pos + chlen > in_len) goto measure_invalid_utf8;
+ for (uint8_t i = 1; i < chlen; i++) {
+ if ((in[pos+i] & 0b11000000) != 0b10000000) goto measure_invalid_utf8;
+ ch = (ch << 6) | (in[pos+i] & 0b00111111);
+ }
+ if (ch > 0x10FFFF) goto measure_invalid_utf8;
+ pos += chlen;
+
+ switch (needs_quote(ch)) {
+ case QUOTE_NONE : out_len += 1; break;
+ case QUOTE_SIMPLE : out_len += 2; break;
+ case QUOTE_U4 : out_len += 6; break;
+ case QUOTE_U8 : out_len += 10; break;
+ }
+ continue;
+ measure_invalid_utf8:
+ pos++;
+ out_len += 4; /* \xAB */
+ }
+
+ if (!(state->flags & FMT_FLAG_LEFT)) {
+ for (size_t i = 0; i + out_len < state->width; i++) {
+ fmt_state_putchar(state, ' ');
+ }
+ }
+
+ fmt_state_putchar(state, '"');
+ for (size_t pos = 0; pos < in_len;) {
+ if ((in[pos] & 0b10000000) == 0b00000000) { ch = in[pos] & 0b01111111; chlen = 1; }
+ else if ((in[pos] & 0b11100000) == 0b11000000) { ch = in[pos] & 0b00011111; chlen = 2; }
+ else if ((in[pos] & 0b11110000) == 0b11100000) { ch = in[pos] & 0b00001111; chlen = 3; }
+ else if ((in[pos] & 0b11111000) == 0b11110000) { ch = in[pos] & 0b00000111; chlen = 4; }
+ else goto output_invalid_utf8;
+ if ((ch == 0 && chlen != 1) || pos + chlen > in_len) goto output_invalid_utf8;
+ for (uint8_t i = 1; i < chlen; i++) {
+ if ((in[pos+i] & 0b11000000) != 0b10000000) goto output_invalid_utf8;
+ ch = (ch << 6) | (in[pos+i] & 0b00111111);
+ }
+ if (ch > 0x10FFFF) goto output_invalid_utf8;
+ pos += chlen;
+
+ switch (needs_quote(ch)) {
+ case QUOTE_NONE:
+ fmt_state_putchar(state, ch);
+ break;
+ case QUOTE_SIMPLE:
+ fmt_state_putchar(state, '\\');
+ switch (ch) {
+ case '\a': fmt_state_putchar(state, 'a'); break;
+ case '\b': fmt_state_putchar(state, 'b'); break;
+ case '\f': fmt_state_putchar(state, 'f'); break;
+ case '\n': fmt_state_putchar(state, 'n'); break;
+ case '\r': fmt_state_putchar(state, 'r'); break;
+ case '\t': fmt_state_putchar(state, 't'); break;
+ case '\v': fmt_state_putchar(state, 'v'); break;
+ case '\\': fmt_state_putchar(state, '\\'); break;
+ case '\'': fmt_state_putchar(state, '\''); break;
+ case '"': fmt_state_putchar(state, '"'); break;
+ case '?': fmt_state_putchar(state, '?'); break;
+ }
+ break;
+ case QUOTE_U4:
+ fmt_state_putchar(state, '\\');
+ fmt_state_putchar(state, 'u');
+ fmt_state_putchar(state, (ch >> 12) & 0xF);
+ fmt_state_putchar(state, (ch >> 8) & 0xF);
+ fmt_state_putchar(state, (ch >> 4) & 0xF);
+ fmt_state_putchar(state, (ch >> 0) & 0xF);
+ break;
+ case QUOTE_U8:
+ fmt_state_putchar(state, '\\');
+ fmt_state_putchar(state, 'U');
+ fmt_state_putchar(state, (ch >> 28) & 0xF);
+ fmt_state_putchar(state, (ch >> 24) & 0xF);
+ fmt_state_putchar(state, (ch >> 20) & 0xF);
+ fmt_state_putchar(state, (ch >> 16) & 0xF);
+ fmt_state_putchar(state, (ch >> 12) & 0xF);
+ fmt_state_putchar(state, (ch >> 8) & 0xF);
+ fmt_state_putchar(state, (ch >> 4) & 0xF);
+ fmt_state_putchar(state, (ch >> 0) & 0xF);
+ break;
+ }
+ continue;
+ output_invalid_utf8:
+ fmt_state_putchar(state, '\\');
+ fmt_state_putchar(state, 'x');
+ fmt_state_putchar(state, (in[pos] >> 4) & 0xF);
+ fmt_state_putchar(state, (in[pos] >> 0) & 0xF);
+ pos++;
+ }
+ fmt_state_putchar(state, '"');
+
+ for (size_t i = 0; i + out_len < state->width; i++) {
+ fmt_state_putchar(state, ' ');
+ }
+}
+
+[[gnu::constructor]]
+static void libfmt_install_quote(void) {
+ fmt_install('q', libfmt_conv_quote);
+}