summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libmisc/fmt.c30
-rw-r--r--libmisc/tests/test_fmt.c64
-rw-r--r--libmisc/utf8.c18
3 files changed, 102 insertions, 10 deletions
diff --git a/libmisc/fmt.c b/libmisc/fmt.c
index a8baa84..7c18ef5 100644
--- a/libmisc/fmt.c
+++ b/libmisc/fmt.c
@@ -71,9 +71,33 @@ void fmt_print_ptr(lo_interface fmt_dest w, void *ptr) {
*/
void fmt_print_qbyte(lo_interface fmt_dest w, uint8_t b) {
fmt_print_byte(w, '\'');
- if (' ' <= b && b <= '~') {
- if (b == '\'' || b == '\\')
- fmt_print_byte(w, '\\');
+ if (b == '\0' ||
+ b == '\b' ||
+ b == '\f' ||
+ b == '\n' ||
+ b == '\r' ||
+ b == '\t' ||
+ b == '\v' ||
+ b == '\\' ||
+ b == '\'' ||
+ b == '"' ||
+ b == '?') {
+ fmt_print_byte(w, '\\');
+ switch (b) {
+ case '\0': fmt_print_byte(w, '0'); break;
+ case '\a': fmt_print_byte(w, 'a'); break;
+ case '\b': fmt_print_byte(w, 'b'); break;
+ case '\f': fmt_print_byte(w, 'f'); break;
+ case '\n': fmt_print_byte(w, 'n'); break;
+ case '\r': fmt_print_byte(w, 'r'); break;
+ case '\t': fmt_print_byte(w, 't'); break;
+ case '\v': fmt_print_byte(w, 'v'); break;
+ case '\\': fmt_print_byte(w, '\\'); break;
+ case '\'': fmt_print_byte(w, '\''); break;
+ case '"': fmt_print_byte(w, '"'); break;
+ case '?': fmt_print_byte(w, '?'); break;
+ }
+ } else if (' ' <= b && b <= '~') {
fmt_print_byte(w, b);
} else {
fmt_print_byte(w, '\\');
diff --git a/libmisc/tests/test_fmt.c b/libmisc/tests/test_fmt.c
index 6a6eb7c..a9157d6 100644
--- a/libmisc/tests/test_fmt.c
+++ b/libmisc/tests/test_fmt.c
@@ -62,6 +62,26 @@ int main() {
test_assert(strcmp(str, "\"hell\"") == 0);
memset(str, 0, sizeof(str));
+ do_print((qstr, "hello\xFFworld🚧"));
+ test_assert(strcmp(str, "\"hello\\xFFworld\\U0001F6A7\"") == 0);
+ memset(str, 0, sizeof(str));
+
+ do_print((qstr, "¡hello world!"));
+ test_assert(strcmp(str, "\"\\u00A1hello world!\"") == 0);
+ memset(str, 0, sizeof(str));
+
+ do_print((qmem, "🚧", 3)); /* truncated UTF-8 */
+ test_assert(strcmp(str, "\"\\xF0\\x9F\\x9A\"") == 0);
+ memset(str, 0, sizeof(str));
+
+ do_print((qmem, "\xF7\xBF\xBF\xBF", 4)); /* over unicode_max */
+ test_assert(strcmp(str, "\"\\xF7\\xBF\\xBF\\xBF\"") == 0);
+ memset(str, 0, sizeof(str));
+
+ do_print((qmem, "\xE0\xA0", 2)); /* non-optimal encoding (of ' ') */
+ test_assert(strcmp(str, "\"\\xE0\\xA0\"") == 0);
+ memset(str, 0, sizeof(str));
+
do_print((byte, 'h'), (byte, 'w'));
test_assert(strcmp(str, "hw") == 0);
memset(str, 0, sizeof(str));
@@ -70,6 +90,26 @@ int main() {
test_assert(strcmp(str, "'h''w'") == 0);
memset(str, 0, sizeof(str));
+ do_print((qbyte, 0));
+ test_assert(strcmp(str, "'\\0'") == 0);
+ memset(str, 0, sizeof(str));
+
+ do_print((qbyte, '\\'));
+ test_assert(strcmp(str, "'\\\\'") == 0);
+ memset(str, 0, sizeof(str));
+
+ do_print((qbyte, '\''));
+ test_assert(strcmp(str, "'\\''") == 0);
+ memset(str, 0, sizeof(str));
+
+ do_print((qbyte, '\n'));
+ test_assert(strcmp(str, "'\\n'") == 0);
+ memset(str, 0, sizeof(str));
+
+ do_print((qbyte, 0xff));
+ test_assert(strcmp(str, "'\\xFF'") == 0);
+ memset(str, 0, sizeof(str));
+
do_print("zero ", 0);
test_assert(strcmp(str, "zero 0") == 0);
memset(str, 0, sizeof(str));
@@ -166,5 +206,29 @@ int main() {
test_assert(strcmp(str, " 1x") == 0);
memset(str, 0, sizeof(str));
+ do_print((base16_u8_, 1));
+ test_assert(strcmp(str, "0x01") == 0);
+ memset(str, 0, sizeof(str));
+
+ do_print((base16_u16_, 1));
+ test_assert(strcmp(str, "0x0001") == 0);
+ memset(str, 0, sizeof(str));
+
+ do_print((base16_u32_, 1));
+ test_assert(strcmp(str, "0x00000001") == 0);
+ memset(str, 0, sizeof(str));
+
+ do_print((base16_u64_, 1));
+ test_assert(strcmp(str, "0x0000000000000001") == 0);
+ memset(str, 0, sizeof(str));
+
+ do_print((hbyte, 1));
+ test_assert(strcmp(str, "0x01") == 0);
+ memset(str, 0, sizeof(str));
+
+ do_print((hmem, "hello", 6));
+ test_assert(strcmp(str, "{0x68,0x65,0x6C,0x6C,0x6F,0x00}") == 0);
+ memset(str, 0, sizeof(str));
+
return 0;
}
diff --git a/libmisc/utf8.c b/libmisc/utf8.c
index 5f91021..28357f0 100644
--- a/libmisc/utf8.c
+++ b/libmisc/utf8.c
@@ -9,17 +9,21 @@
void utf8_decode_codepoint(const uint8_t *str, size_t len, uint32_t *ret_ch, uint8_t *ret_chlen) {
uint32_t ch;
uint8_t chlen;
- if ((str[0] & 0b10000000) == 0b00000000) { ch = str[0] & 0b01111111; chlen = 1; }
- else if ((str[0] & 0b11100000) == 0b11000000) { ch = str[0] & 0b00011111; chlen = 2; }
- else if ((str[0] & 0b11110000) == 0b11100000) { ch = str[0] & 0b00001111; chlen = 3; }
- else if ((str[0] & 0b11111000) == 0b11110000) { ch = str[0] & 0b00000111; chlen = 4; }
+ uint32_t chmin;
+ if ((str[0] & 0b10000000) == 0b00000000) { ch = str[0] & 0b01111111; chlen = 1; chmin = 0; } /* bits=7+(0*6)= 7 */
+ else if ((str[0] & 0b11100000) == 0b11000000) { ch = str[0] & 0b00011111; chlen = 2; chmin = UINT32_C(1)<< 7; } /* bits=5+(1*6)=11 */
+ else if ((str[0] & 0b11110000) == 0b11100000) { ch = str[0] & 0b00001111; chlen = 3; chmin = UINT32_C(1)<<11; } /* bits=4+(2*6)=16 */
+ else if ((str[0] & 0b11111000) == 0b11110000) { ch = str[0] & 0b00000111; chlen = 4; chmin = UINT32_C(1)<<16; } /* bits=3+(3*6)=21 */
else goto invalid;
- if ((ch == 0 && chlen != 1) || chlen > len) goto invalid;
+ if (chlen > len)
+ goto invalid;
for (uint8_t i = 1; i < chlen; i++) {
- if ((str[i] & 0b11000000) != 0b10000000) goto invalid;
+ if ((str[i] & 0b11000000) != 0b10000000)
+ goto invalid;
ch = (ch << 6) | (str[i] & 0b00111111);
}
- if (ch > 0x10FFFF) goto invalid;
+ if (ch > 0x10FFFF || ch < chmin)
+ goto invalid;
*ret_ch = ch;
*ret_chlen = chlen;
return;