diff options
author | Luke T. Shumaker <lukeshu@lukeshu.com> | 2025-05-17 03:20:11 -0600 |
---|---|---|
committer | Luke T. Shumaker <lukeshu@lukeshu.com> | 2025-05-17 12:57:08 -0600 |
commit | ab9103440ade87509a1a3bd1eaad0b5396a89d1e (patch) | |
tree | 423673909d0af66d4ef5e260ce58b4b554bf2024 /lib9p/core_utf8.h | |
parent | d505a998aafe5af8b02a2b2c2acf7e708812c3fc (diff) |
Pull UTF-8 decoding into libmisc/utf8.c
Diffstat (limited to 'lib9p/core_utf8.h')
-rw-r--r-- | lib9p/core_utf8.h | 36 |
1 files changed, 0 insertions, 36 deletions
diff --git a/lib9p/core_utf8.h b/lib9p/core_utf8.h deleted file mode 100644 index 2c451e0..0000000 --- a/lib9p/core_utf8.h +++ /dev/null @@ -1,36 +0,0 @@ -/* lib9p/core_utf8.h - Internal UTF-8 validation - * - * Copyright (C) 2024-2025 Luke T. Shumaker <lukeshu@lukeshu.com> - * SPDX-License-Identifier: AGPL-3.0-or-later - */ - -#ifndef _LIB9P_CORE_UTF8_H_ -#define _LIB9P_CORE_UTF8_H_ - -#include <stddef.h> /* for size_t */ -#include <stdint.h> /* for uint{n}_t */ - -static inline bool _is_valid_utf8(uint8_t *str, size_t len, bool forbid_nul) { - uint32_t ch; - uint8_t chlen; - for (size_t pos = 0; pos < len;) { - if ((str[pos] & 0b10000000) == 0b00000000) { ch = str[pos] & 0b01111111; chlen = 1; } - else if ((str[pos] & 0b11100000) == 0b11000000) { ch = str[pos] & 0b00011111; chlen = 2; } - else if ((str[pos] & 0b11110000) == 0b11100000) { ch = str[pos] & 0b00001111; chlen = 3; } - else if ((str[pos] & 0b11111000) == 0b11110000) { ch = str[pos] & 0b00000111; chlen = 4; } - else return false; - if ((ch == 0 && (chlen != 1 || forbid_nul)) || pos + chlen > len) return false; - for (uint8_t i = 1; i < chlen; i++) { - if ((str[pos+i] & 0b11000000) != 0b10000000) return false; - ch = (ch << 6) | (str[pos+i] & 0b00111111); - } - if (ch > 0x10FFFF) return false; - pos += chlen; - } - return true; -} - -#define is_valid_utf8(str, len) _is_valid_utf8(str, len, false) -#define is_valid_utf8_without_nul(str, len) _is_valid_utf8(str, len, true) - -#endif /* _LIB9P_CORE_UTF8_H_ */ |