blob: 5ffd674e82ab3dbd1720f1bd6aaf2b0fde69d26f (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
/* lib9p/utf8.h - Internal UTF-8 validation
*
* Copyright (C) 2024-2025 Luke T. Shumaker <lukeshu@lukeshu.com>
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#ifndef _LIB9P_UTF8_H_
#define _LIB9P_UTF8_H_
static inline bool _is_valid_utf8(uint8_t *str, size_t len, bool forbid_nul) {
uint32_t ch;
uint8_t chlen;
assert(str);
for (size_t pos = 0; pos < len;) {
if ((str[pos] & 0b10000000) == 0b00000000) { ch = str[pos] & 0b01111111; chlen = 1; }
else if ((str[pos] & 0b11100000) == 0b11000000) { ch = str[pos] & 0b00011111; chlen = 2; }
else if ((str[pos] & 0b11110000) == 0b11100000) { ch = str[pos] & 0b00001111; chlen = 3; }
else if ((str[pos] & 0b11111000) == 0b11110000) { ch = str[pos] & 0b00000111; chlen = 4; }
else return false;
if ((ch == 0 && (chlen != 1 || forbid_nul)) || pos + chlen > len) return false;
for (uint8_t i = 1; i < chlen; i++) {
if ((str[pos+i] & 0b11000000) != 0b10000000) return false;
ch = (ch << 6) | (str[pos+i] & 0b00111111);
}
if (ch > 0x10FFFF) return false;
pos += chlen;
}
return true;
}
#define is_valid_utf8(str, len) _is_valid_utf8(str, len, false)
#define is_valid_utf8_without_nul(str, len) _is_valid_utf8(str, len, true)
#endif /* _LIB9P_UTF8_H_ */
|