From 1e4ad99311611a7fbb5cdfe4b1b79202381dc717 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Sun, 13 Apr 2025 22:13:43 -0600 Subject: Split lib9p into lib9p_core and lib9p_srv --- lib9p/core_utf8.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 lib9p/core_utf8.h (limited to 'lib9p/core_utf8.h') diff --git a/lib9p/core_utf8.h b/lib9p/core_utf8.h new file mode 100644 index 0000000..636d4eb --- /dev/null +++ b/lib9p/core_utf8.h @@ -0,0 +1,34 @@ +/* lib9p/core_utf8.h - Internal UTF-8 validation + * + * Copyright (C) 2024-2025 Luke T. Shumaker + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#ifndef _LIB9P_CORE_UTF8_H_ +#define _LIB9P_CORE_UTF8_H_ + +static inline bool _is_valid_utf8(uint8_t *str, size_t len, bool forbid_nul) { + uint32_t ch; + uint8_t chlen; + assert(str); + for (size_t pos = 0; pos < len;) { + if ((str[pos] & 0b10000000) == 0b00000000) { ch = str[pos] & 0b01111111; chlen = 1; } + else if ((str[pos] & 0b11100000) == 0b11000000) { ch = str[pos] & 0b00011111; chlen = 2; } + else if ((str[pos] & 0b11110000) == 0b11100000) { ch = str[pos] & 0b00001111; chlen = 3; } + else if ((str[pos] & 0b11111000) == 0b11110000) { ch = str[pos] & 0b00000111; chlen = 4; } + else return false; + if ((ch == 0 && (chlen != 1 || forbid_nul)) || pos + chlen > len) return false; + for (uint8_t i = 1; i < chlen; i++) { + if ((str[pos+i] & 0b11000000) != 0b10000000) return false; + ch = (ch << 6) | (str[pos+i] & 0b00111111); + } + if (ch > 0x10FFFF) return false; + pos += chlen; + } + return true; +} + +#define is_valid_utf8(str, len) _is_valid_utf8(str, len, false) +#define is_valid_utf8_without_nul(str, len) _is_valid_utf8(str, len, true) + +#endif /* _LIB9P_CORE_UTF8_H_ */ -- cgit v1.2.3-2-g168b