summaryrefslogtreecommitdiff
path: root/libmisc/utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'libmisc/utf8.c')
-rw-r--r--libmisc/utf8.c18
1 files changed, 11 insertions, 7 deletions
diff --git a/libmisc/utf8.c b/libmisc/utf8.c
index 5f91021..28357f0 100644
--- a/libmisc/utf8.c
+++ b/libmisc/utf8.c
@@ -9,17 +9,21 @@
void utf8_decode_codepoint(const uint8_t *str, size_t len, uint32_t *ret_ch, uint8_t *ret_chlen) {
uint32_t ch;
uint8_t chlen;
- if ((str[0] & 0b10000000) == 0b00000000) { ch = str[0] & 0b01111111; chlen = 1; }
- else if ((str[0] & 0b11100000) == 0b11000000) { ch = str[0] & 0b00011111; chlen = 2; }
- else if ((str[0] & 0b11110000) == 0b11100000) { ch = str[0] & 0b00001111; chlen = 3; }
- else if ((str[0] & 0b11111000) == 0b11110000) { ch = str[0] & 0b00000111; chlen = 4; }
+ uint32_t chmin;
+ if ((str[0] & 0b10000000) == 0b00000000) { ch = str[0] & 0b01111111; chlen = 1; chmin = 0; } /* bits=7+(0*6)= 7 */
+ else if ((str[0] & 0b11100000) == 0b11000000) { ch = str[0] & 0b00011111; chlen = 2; chmin = UINT32_C(1)<< 7; } /* bits=5+(1*6)=11 */
+ else if ((str[0] & 0b11110000) == 0b11100000) { ch = str[0] & 0b00001111; chlen = 3; chmin = UINT32_C(1)<<11; } /* bits=4+(2*6)=16 */
+ else if ((str[0] & 0b11111000) == 0b11110000) { ch = str[0] & 0b00000111; chlen = 4; chmin = UINT32_C(1)<<16; } /* bits=3+(3*6)=21 */
else goto invalid;
- if ((ch == 0 && chlen != 1) || chlen > len) goto invalid;
+ if (chlen > len)
+ goto invalid;
for (uint8_t i = 1; i < chlen; i++) {
- if ((str[i] & 0b11000000) != 0b10000000) goto invalid;
+ if ((str[i] & 0b11000000) != 0b10000000)
+ goto invalid;
ch = (ch << 6) | (str[i] & 0b00111111);
}
- if (ch > 0x10FFFF) goto invalid;
+ if (ch > 0x10FFFF || ch < chmin)
+ goto invalid;
*ret_ch = ch;
*ret_chlen = chlen;
return;