summaryrefslogtreecommitdiff
path: root/lib9p/utf8.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib9p/utf8.h')
-rw-r--r--lib9p/utf8.h34
1 files changed, 34 insertions, 0 deletions
diff --git a/lib9p/utf8.h b/lib9p/utf8.h
new file mode 100644
index 0000000..5ffd674
--- /dev/null
+++ b/lib9p/utf8.h
@@ -0,0 +1,34 @@
+/* lib9p/utf8.h - Internal UTF-8 validation
+ *
+ * Copyright (C) 2024-2025 Luke T. Shumaker <lukeshu@lukeshu.com>
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+#ifndef _LIB9P_UTF8_H_
+#define _LIB9P_UTF8_H_
+
+static inline bool _is_valid_utf8(uint8_t *str, size_t len, bool forbid_nul) {
+ uint32_t ch;
+ uint8_t chlen;
+ assert(str);
+ for (size_t pos = 0; pos < len;) {
+ if ((str[pos] & 0b10000000) == 0b00000000) { ch = str[pos] & 0b01111111; chlen = 1; }
+ else if ((str[pos] & 0b11100000) == 0b11000000) { ch = str[pos] & 0b00011111; chlen = 2; }
+ else if ((str[pos] & 0b11110000) == 0b11100000) { ch = str[pos] & 0b00001111; chlen = 3; }
+ else if ((str[pos] & 0b11111000) == 0b11110000) { ch = str[pos] & 0b00000111; chlen = 4; }
+ else return false;
+ if ((ch == 0 && (chlen != 1 || forbid_nul)) || pos + chlen > len) return false;
+ for (uint8_t i = 1; i < chlen; i++) {
+ if ((str[pos+i] & 0b11000000) != 0b10000000) return false;
+ ch = (ch << 6) | (str[pos+i] & 0b00111111);
+ }
+ if (ch > 0x10FFFF) return false;
+ pos += chlen;
+ }
+ return true;
+}
+
+#define is_valid_utf8(str, len) _is_valid_utf8(str, len, false)
+#define is_valid_utf8_without_nul(str, len) _is_valid_utf8(str, len, true)
+
+#endif /* _LIB9P_UTF8_H_ */