diff options
author | Luke T. Shumaker <lukeshu@lukeshu.com> | 2025-05-27 19:47:25 -0400 |
---|---|---|
committer | Luke T. Shumaker <lukeshu@lukeshu.com> | 2025-05-27 19:47:25 -0400 |
commit | bf3667b8b76eefd95e33e32b4f5abbf2de0e2065 (patch) | |
tree | 241b909f0d68f3c0f355ad924375c2fa5b9a19ae /libmisc | |
parent | 42fb27570262b52e2ca889030c621b5f4af76fe1 (diff) | |
parent | 968f7710458f44d5e62d2624461f4e8459c04168 (diff) |
Diffstat (limited to 'libmisc')
-rw-r--r-- | libmisc/CMakeLists.txt | 4 | ||||
-rw-r--r-- | libmisc/endian.c | 136 | ||||
-rw-r--r-- | libmisc/fmt.c | 25 | ||||
-rw-r--r-- | libmisc/hash.c | 24 | ||||
-rw-r--r-- | libmisc/include/libmisc/endian.h | 219 | ||||
-rw-r--r-- | libmisc/include/libmisc/fmt.h | 22 | ||||
-rw-r--r-- | libmisc/include/libmisc/hash.h | 20 | ||||
-rw-r--r-- | libmisc/include/libmisc/rand.h | 32 | ||||
-rw-r--r-- | libmisc/include/libmisc/utf8.h | 33 | ||||
-rw-r--r-- | libmisc/rand.c | 38 | ||||
-rw-r--r-- | libmisc/utf8.c | 40 |
11 files changed, 303 insertions, 290 deletions
diff --git a/libmisc/CMakeLists.txt b/libmisc/CMakeLists.txt index 48407bd..c6405ad 100644 --- a/libmisc/CMakeLists.txt +++ b/libmisc/CMakeLists.txt @@ -7,11 +7,15 @@ add_library(libmisc INTERFACE) target_include_directories(libmisc PUBLIC INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include) target_sources(libmisc INTERFACE assert.c + endian.c fmt.c + hash.c intercept.c linkedlist.c log.c map.c + rand.c + utf8.c ) add_lib_test(libmisc test_assert) diff --git a/libmisc/endian.c b/libmisc/endian.c new file mode 100644 index 0000000..2528f48 --- /dev/null +++ b/libmisc/endian.c @@ -0,0 +1,136 @@ +/* libmisc/endian.c - Endian-conversion helpers + * + * Copyright (C) 2024-2025 Luke T. Shumaker <lukeshu@lukeshu.com> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#include <libmisc/macro.h> /* for LM_FORCE_SEMICOLON */ + +#include <libmisc/endian.h> + +#define endian_declare_wrappers(NBIT, ENDIAN) \ + uint##NBIT##ENDIAN##_t uint##NBIT##ENDIAN##_marshal(uint##NBIT##_t in) { \ + uint##NBIT##ENDIAN##_t out; \ + uint##NBIT##ENDIAN##_encode(out.octets, in); \ + return out; \ + } \ + uint##NBIT##_t uint##NBIT##ENDIAN##_unmarshal(uint##NBIT##ENDIAN##_t in) { \ + return uint##NBIT##ENDIAN##_decode(in.octets); \ + } \ + LM_FORCE_SEMICOLON + +/* Big endian *****************************************************************/ + +size_t uint16be_encode(uint8_t *out, uint16_t in) { + out[0] = (uint8_t)((in >> 8) & 0xFF); + out[1] = (uint8_t)((in >> 0) & 0xFF); + return 2; +} + +uint16_t uint16be_decode(uint8_t *in) { + return (((uint16_t)(in[0])) << 8) + | (((uint16_t)(in[1])) << 0) + ; +} + +size_t uint32be_encode(uint8_t *out, uint32_t in) { + out[0] = (uint8_t)((in >> 24) & 0xFF); + out[1] = (uint8_t)((in >> 16) & 0xFF); + out[2] = (uint8_t)((in >> 8) & 0xFF); + out[3] = (uint8_t)((in >> 0) & 0xFF); + return 4; +} + +uint32_t uint32be_decode(uint8_t *in) { + return (((uint32_t)(in[0])) << 24) + | (((uint32_t)(in[1])) << 16) + | (((uint32_t)(in[2])) << 8) + | (((uint32_t)(in[3])) << 0) + ; +} + +size_t uint64be_encode(uint8_t *out, uint64_t in) { + out[0] = (uint8_t)((in >> 56) & 0xFF); + out[1] = (uint8_t)((in >> 48) & 0xFF); + out[2] = (uint8_t)((in >> 40) & 0xFF); + out[3] = (uint8_t)((in >> 32) & 0xFF); + out[4] = (uint8_t)((in >> 24) & 0xFF); + out[5] = (uint8_t)((in >> 16) & 0xFF); + out[6] = (uint8_t)((in >> 8) & 0xFF); + out[7] = (uint8_t)((in >> 0) & 0xFF); + return 8; +} + +uint64_t uint64be_decode(uint8_t *in) { + return (((uint64_t)(in[0])) << 56) + | (((uint64_t)(in[1])) << 48) + | (((uint64_t)(in[2])) << 40) + | (((uint64_t)(in[3])) << 32) + | (((uint64_t)(in[4])) << 24) + | (((uint64_t)(in[5])) << 16) + | (((uint64_t)(in[6])) << 8) + | (((uint64_t)(in[7])) << 0) + ; +} + +endian_declare_wrappers(16, be); +endian_declare_wrappers(32, be); +endian_declare_wrappers(64, be); + +/* Little endian **************************************************************/ + +size_t uint16le_encode(uint8_t *out, uint16_t in) { + out[0] = (uint8_t)((in >> 0) & 0xFF); + out[1] = (uint8_t)((in >> 8) & 0xFF); + return 2; +} + +uint16_t uint16le_decode(uint8_t *in) { + return (((uint16_t)(in[0])) << 0) + | (((uint16_t)(in[1])) << 8) + ; +} + +size_t uint32le_encode(uint8_t *out, uint32_t in) { + out[0] = (uint8_t)((in >> 0) & 0xFF); + out[1] = (uint8_t)((in >> 8) & 0xFF); + out[2] = (uint8_t)((in >> 16) & 0xFF); + out[3] = (uint8_t)((in >> 24) & 0xFF); + return 4; +} + +uint32_t uint32le_decode(uint8_t *in) { + return (((uint32_t)(in[0])) << 0) + | (((uint32_t)(in[1])) << 8) + | (((uint32_t)(in[2])) << 16) + | (((uint32_t)(in[3])) << 24) + ; +} + +size_t uint64le_encode(uint8_t *out, uint64_t in) { + out[0] = (uint8_t)((in >> 0) & 0xFF); + out[1] = (uint8_t)((in >> 8) & 0xFF); + out[2] = (uint8_t)((in >> 16) & 0xFF); + out[3] = (uint8_t)((in >> 24) & 0xFF); + out[4] = (uint8_t)((in >> 32) & 0xFF); + out[5] = (uint8_t)((in >> 40) & 0xFF); + out[6] = (uint8_t)((in >> 48) & 0xFF); + out[7] = (uint8_t)((in >> 56) & 0xFF); + return 8; +} + +uint64_t uint64le_decode(uint8_t *in) { + return (((uint64_t)(in[0])) << 0) + | (((uint64_t)(in[1])) << 8) + | (((uint64_t)(in[2])) << 16) + | (((uint64_t)(in[3])) << 24) + | (((uint64_t)(in[4])) << 32) + | (((uint64_t)(in[5])) << 40) + | (((uint64_t)(in[6])) << 48) + | (((uint64_t)(in[7])) << 56) + ; +} + +endian_declare_wrappers(16, le); +endian_declare_wrappers(32, le); +endian_declare_wrappers(64, le); diff --git a/libmisc/fmt.c b/libmisc/fmt.c index 6cf1d8d..a8baa84 100644 --- a/libmisc/fmt.c +++ b/libmisc/fmt.c @@ -14,6 +14,31 @@ static const char *const hexdig = "0123456789ABCDEF"; /* small/trivial formatters ***************************************************/ +void fmt_print_mem(lo_interface fmt_dest w, const void *_str, size_t size) { + const uint8_t *str = _str; + while (size--) + fmt_print_byte(w, *(str++)); +} +void fmt_print_str(lo_interface fmt_dest w, const char *str) { + while (*str) + fmt_print_byte(w, *(str++)); +} +void fmt_print_strn(lo_interface fmt_dest w, const char *str, size_t size) { + while (size-- && *str) + fmt_print_byte(w, *(str++)); +} + +void fmt_print_hmem(lo_interface fmt_dest w, const void *_str, size_t size) { + const uint8_t *str = _str; + fmt_print_byte(w, '{'); + for (size_t i = 0; i < size; i++) { + if (i) + fmt_print_byte(w, ','); + fmt_print_hbyte(w, str[i]); + } + fmt_print_byte(w, '}'); +} + void fmt_print_byte(lo_interface fmt_dest w, uint8_t b) { LO_CALL(w, putb, b); } diff --git a/libmisc/hash.c b/libmisc/hash.c new file mode 100644 index 0000000..3814cec --- /dev/null +++ b/libmisc/hash.c @@ -0,0 +1,24 @@ +/* libmisc/hash.c - General-purpose hash utilities + * + * Copyright (C) 2024-2025 Luke T. Shumaker <lukeshu@lukeshu.com> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#include <libmisc/hash.h> + +/* djb2 hash */ +void hash_init(hash_t *hash) { + *hash = 5381; +} +void hash_write(hash_t *hash, void *dat, size_t len) { + for (size_t i = 0; i < len; i++) + *hash = (*hash * 33) + (hash_t)(((unsigned char *)dat)[i]); +} + +/* utilities */ +hash_t hash(void *dat, size_t len) { + hash_t h; + hash_init(&h); + hash_write(&h, dat, len); + return h; +} diff --git a/libmisc/include/libmisc/endian.h b/libmisc/include/libmisc/endian.h index 75240fe..966c3bc 100644 --- a/libmisc/include/libmisc/endian.h +++ b/libmisc/include/libmisc/endian.h @@ -10,204 +10,25 @@ #include <stddef.h> /* for size_t */ #include <stdint.h> /* for uint{n}_t */ -#include <libmisc/assert.h> - -/* Big endian *****************************************************************/ - -typedef struct { - uint8_t octets[2]; -} uint16be_t; -static_assert(sizeof(uint16be_t) == 2); - -static inline size_t uint16be_encode(uint8_t *out, uint16_t in) { - out[0] = (uint8_t)((in >> 8) & 0xFF); - out[1] = (uint8_t)((in >> 0) & 0xFF); - return 2; -} - -static inline uint16_t uint16be_decode(uint8_t *in) { - return (((uint16_t)(in[0])) << 8) - | (((uint16_t)(in[1])) << 0) - ; -} - -static inline uint16be_t uint16be_marshal(uint16_t in) { - uint16be_t out; - uint16be_encode(out.octets, in); - return out; -} - -static inline uint16_t uint16be_unmarshal(uint16be_t in) { - return uint16be_decode(in.octets); -} - -typedef struct { - uint8_t octets[4]; -} uint32be_t; -static_assert(sizeof(uint32be_t) == 4); - -static inline size_t uint32be_encode(uint8_t *out, uint32_t in) { - out[0] = (uint8_t)((in >> 24) & 0xFF); - out[1] = (uint8_t)((in >> 16) & 0xFF); - out[2] = (uint8_t)((in >> 8) & 0xFF); - out[3] = (uint8_t)((in >> 0) & 0xFF); - return 4; -} - -static inline uint32_t uint32be_decode(uint8_t *in) { - return (((uint32_t)(in[0])) << 24) - | (((uint32_t)(in[1])) << 16) - | (((uint32_t)(in[2])) << 8) - | (((uint32_t)(in[3])) << 0) - ; -} - -static inline uint32be_t uint32be_marshal(uint32_t in) { - uint32be_t out; - uint32be_encode(out.octets, in); - return out; -} - -static inline uint32_t uint32be_unmarshal(uint32be_t in) { - return uint32be_decode(in.octets); -} - -typedef struct { - uint8_t octets[8]; -} uint64be_t; -static_assert(sizeof(uint64be_t) == 8); - -static inline size_t uint64be_encode(uint8_t *out, uint64_t in) { - out[0] = (uint8_t)((in >> 56) & 0xFF); - out[1] = (uint8_t)((in >> 48) & 0xFF); - out[2] = (uint8_t)((in >> 40) & 0xFF); - out[3] = (uint8_t)((in >> 32) & 0xFF); - out[4] = (uint8_t)((in >> 24) & 0xFF); - out[5] = (uint8_t)((in >> 16) & 0xFF); - out[6] = (uint8_t)((in >> 8) & 0xFF); - out[7] = (uint8_t)((in >> 0) & 0xFF); - return 8; -} - -static inline uint64_t uint64be_decode(uint8_t *in) { - return (((uint64_t)(in[0])) << 56) - | (((uint64_t)(in[1])) << 48) - | (((uint64_t)(in[2])) << 40) - | (((uint64_t)(in[3])) << 32) - | (((uint64_t)(in[4])) << 24) - | (((uint64_t)(in[5])) << 16) - | (((uint64_t)(in[6])) << 8) - | (((uint64_t)(in[7])) << 0) - ; -} - -static inline uint64be_t uint64be_marshal(uint64_t in) { - uint64be_t out; - uint64be_encode(out.octets, in); - return out; -} - -static inline uint64_t uint64be_unmarshal(uint64be_t in) { - return uint64be_decode(in.octets); -} - -/* Little endian **************************************************************/ - -typedef struct { - uint8_t octets[2]; -} uint16le_t; -static_assert(sizeof(uint16le_t) == 2); - -static inline size_t uint16le_encode(uint8_t *out, uint16_t in) { - out[0] = (uint8_t)((in >> 0) & 0xFF); - out[1] = (uint8_t)((in >> 8) & 0xFF); - return 2; -} - -static inline uint16_t uint16le_decode(uint8_t *in) { - return (((uint16_t)(in[0])) << 0) - | (((uint16_t)(in[1])) << 8) - ; -} - -static inline uint16le_t uint16le_marshal(uint16_t in) { - uint16le_t out; - uint16le_encode(out.octets, in); - return out; -} - -static inline uint16_t uint16le_unmarshal(uint16le_t in) { - return uint16le_decode(in.octets); -} - -typedef struct { - uint8_t octets[4]; -} uint32le_t; -static_assert(sizeof(uint32le_t) == 4); - -static inline size_t uint32le_encode(uint8_t *out, uint32_t in) { - out[0] = (uint8_t)((in >> 0) & 0xFF); - out[1] = (uint8_t)((in >> 8) & 0xFF); - out[2] = (uint8_t)((in >> 16) & 0xFF); - out[3] = (uint8_t)((in >> 24) & 0xFF); - return 4; -} - -static inline uint32_t uint32le_decode(uint8_t *in) { - return (((uint32_t)(in[0])) << 0) - | (((uint32_t)(in[1])) << 8) - | (((uint32_t)(in[2])) << 16) - | (((uint32_t)(in[3])) << 24) - ; -} - -static inline uint32le_t uint32le_marshal(uint32_t in) { - uint32le_t out; - uint32le_encode(out.octets, in); - return out; -} - -static inline uint32_t uint32le_unmarshal(uint32le_t in) { - return uint32le_decode(in.octets); -} - -typedef struct { - uint8_t octets[8]; -} uint64le_t; -static_assert(sizeof(uint64le_t) == 8); - -static inline size_t uint64le_encode(uint8_t *out, uint64_t in) { - out[0] = (uint8_t)((in >> 0) & 0xFF); - out[1] = (uint8_t)((in >> 8) & 0xFF); - out[2] = (uint8_t)((in >> 16) & 0xFF); - out[3] = (uint8_t)((in >> 24) & 0xFF); - out[4] = (uint8_t)((in >> 32) & 0xFF); - out[5] = (uint8_t)((in >> 40) & 0xFF); - out[6] = (uint8_t)((in >> 48) & 0xFF); - out[7] = (uint8_t)((in >> 56) & 0xFF); - return 8; -} - -static inline uint64_t uint64le_decode(uint8_t *in) { - return (((uint64_t)(in[0])) << 0) - | (((uint64_t)(in[1])) << 8) - | (((uint64_t)(in[2])) << 16) - | (((uint64_t)(in[3])) << 24) - | (((uint64_t)(in[4])) << 32) - | (((uint64_t)(in[5])) << 40) - | (((uint64_t)(in[6])) << 48) - | (((uint64_t)(in[7])) << 56) - ; -} - -static inline uint64le_t uint64le_marshal(uint64_t in) { - uint64le_t out; - uint64le_encode(out.octets, in); - return out; -} - -static inline uint64_t uint64le_unmarshal(uint64le_t in) { - return uint64le_decode(in.octets); -} +#define _endian_declare_conv(NBIT, ENDIAN) \ + /* byte array encode/decode */ \ + size_t uint##NBIT##ENDIAN##_encode(uint8_t *out, uint##NBIT##_t in); \ + uint##NBIT##_t uint##NBIT##ENDIAN##_decode(uint8_t *in); \ + /* struct marshal/unmarshal */ \ + typedef struct { \ + uint8_t octets[NBIT/8]; \ + } uint##NBIT##ENDIAN##_t; \ + uint##NBIT##ENDIAN##_t uint##NBIT##ENDIAN##_marshal(uint##NBIT##_t in); \ + uint##NBIT##_t uint##NBIT##ENDIAN##_unmarshal(uint##NBIT##ENDIAN##_t in) + +_endian_declare_conv(16, be); +_endian_declare_conv(32, be); +_endian_declare_conv(64, be); + +_endian_declare_conv(16, le); +_endian_declare_conv(32, le); +_endian_declare_conv(64, le); + +#undef _endian_declare_conv #endif /* _LIBMISC_ENDIAN_H_ */ diff --git a/libmisc/include/libmisc/fmt.h b/libmisc/include/libmisc/fmt.h index c29c085..c0743ff 100644 --- a/libmisc/include/libmisc/fmt.h +++ b/libmisc/include/libmisc/fmt.h @@ -24,21 +24,9 @@ LO_INTERFACE(fmt_dest); /* Simple bytes. */ void fmt_print_byte(lo_interface fmt_dest w, uint8_t b); - -/* These are `static inline` so that the compiler can unroll the loops. */ -static inline void fmt_print_mem(lo_interface fmt_dest w, const void *_str, size_t size) { - const uint8_t *str = _str; - while (size--) - fmt_print_byte(w, *(str++)); -} -static inline void fmt_print_str(lo_interface fmt_dest w, const char *str) { - while (*str) - fmt_print_byte(w, *(str++)); -} -static inline void fmt_print_strn(lo_interface fmt_dest w, const char *str, size_t size) { - while (size-- && *str) - fmt_print_byte(w, *(str++)); -} +void fmt_print_mem(lo_interface fmt_dest w, const void *str, size_t size); +void fmt_print_str(lo_interface fmt_dest w, const char *str); +void fmt_print_strn(lo_interface fmt_dest w, const char *str, size_t size); /* Quoted bytes. */ void fmt_print_qbyte(lo_interface fmt_dest w, uint8_t b); @@ -46,6 +34,10 @@ void fmt_print_qmem(lo_interface fmt_dest w, const void *str, size_t size); void fmt_print_qstr(lo_interface fmt_dest w, const char *str); void fmt_print_qstrn(lo_interface fmt_dest w, const char *str, size_t size); +/* Hex bytes. */ +#define fmt_print_hbyte fmt_print_base16_u8_ +void fmt_print_hmem(lo_interface fmt_dest w, const void *str, size_t size); + /* Integers. */ #define _fmt_declare_base(base) \ void _fmt_print_base##base##_u8(lo_interface fmt_dest w, uint8_t val); \ diff --git a/libmisc/include/libmisc/hash.h b/libmisc/include/libmisc/hash.h index 58a895f..029bd3b 100644 --- a/libmisc/include/libmisc/hash.h +++ b/libmisc/include/libmisc/hash.h @@ -10,22 +10,12 @@ #include <stddef.h> /* for size_t */ #include <stdint.h> /* for uint{n}_t */ -/* djb2 hash */ -typedef uint32_t hash_t; -static inline void hash_init(hash_t *hash) { - *hash = 5381; -} -static inline void hash_write(hash_t *hash, void *dat, size_t len) { - for (size_t i = 0; i < len; i++) - *hash = (*hash * 33) + (hash_t)(((unsigned char *)dat)[i]); -} +/* base */ +typedef uint32_t hash_t; /* size subject to change */ +void hash_init(hash_t *hash); +void hash_write(hash_t *hash, void *dat, size_t len); /* utilities */ -static inline hash_t hash(void *dat, size_t len) { - hash_t h; - hash_init(&h); - hash_write(&h, dat, len); - return h; -} +hash_t hash(void *dat, size_t len); #endif /* _LIBMISC_HASH_H_ */ diff --git a/libmisc/include/libmisc/rand.h b/libmisc/include/libmisc/rand.h index 7ef238b..ca16f42 100644 --- a/libmisc/include/libmisc/rand.h +++ b/libmisc/include/libmisc/rand.h @@ -7,40 +7,12 @@ #ifndef _LIBMISC_RAND_H_ #define _LIBMISC_RAND_H_ -#include <stdint.h> /* for uint{n}_t, UINT{n}_C() */ -#include <stdlib.h> /* for random() */ - -#include <libmisc/assert.h> +#include <stdint.h> /* for uint{n}_t */ /** * Return a psuedo-random number in the half-open interval [0,cnt). * `cnt` must not be greater than 1<<63. */ -static inline uint64_t rand_uint63n(uint64_t cnt) { - assert(cnt != 0 && ((cnt-1) & 0x8000000000000000) == 0); - if (cnt <= UINT64_C(1)<<31) { - uint32_t fair_cnt = ((UINT32_C(1)<<31) / cnt) * cnt; - uint32_t rnd; - do { - rnd = random(); - } while (rnd >= fair_cnt); - return rnd % cnt; - } else if (cnt <= UINT64_C(1)<<62) { - uint64_t fair_cnt = ((UINT64_C(1)<<62) / cnt) * cnt; - uint64_t rnd; - do { - rnd = (((uint64_t)random()) << 31) | random(); - } while (rnd >= fair_cnt); - return rnd % cnt; - } else if (cnt <= UINT64_C(1)<<63) { - uint64_t fair_cnt = ((UINT64_C(1)<<63) / cnt) * cnt; - uint64_t rnd; - do { - rnd = (((uint64_t)random()) << 62) | (((uint64_t)random()) << 31) | random(); - } while (rnd >= fair_cnt); - return rnd % cnt; - } - assert_notreached("cnt is out of bounds"); -} +uint64_t rand_uint63n(uint64_t cnt); #endif /* _LIBMISC_RAND_H_ */ diff --git a/libmisc/include/libmisc/utf8.h b/libmisc/include/libmisc/utf8.h index b5e1b0b..54fcc92 100644 --- a/libmisc/include/libmisc/utf8.h +++ b/libmisc/include/libmisc/utf8.h @@ -15,38 +15,9 @@ * bytes. Invalid UTF-8 is indicated with chlen=0. For valid UTF-8, * chlen is always in the range [1, 4]. */ -static inline void utf8_decode_codepoint(const uint8_t *str, size_t len, uint32_t *ret_ch, uint8_t *ret_chlen) { - uint32_t ch; - uint8_t chlen; - if ((str[0] & 0b10000000) == 0b00000000) { ch = str[0] & 0b01111111; chlen = 1; } - else if ((str[0] & 0b11100000) == 0b11000000) { ch = str[0] & 0b00011111; chlen = 2; } - else if ((str[0] & 0b11110000) == 0b11100000) { ch = str[0] & 0b00001111; chlen = 3; } - else if ((str[0] & 0b11111000) == 0b11110000) { ch = str[0] & 0b00000111; chlen = 4; } - else goto invalid; - if ((ch == 0 && chlen != 1) || chlen > len) goto invalid; - for (uint8_t i = 1; i < chlen; i++) { - if ((str[i] & 0b11000000) != 0b10000000) goto invalid; - ch = (ch << 6) | (str[i] & 0b00111111); - } - if (ch > 0x10FFFF) goto invalid; - *ret_ch = ch; - *ret_chlen = chlen; - return; - invalid: - *ret_chlen = 0; -} +void utf8_decode_codepoint(const uint8_t *str, size_t len, uint32_t *ret_ch, uint8_t *ret_chlen); -static inline bool _utf8_is_valid(const uint8_t *str, size_t len, bool forbid_nul) { - for (size_t pos = 0; pos < len;) { - uint32_t ch; - uint8_t chlen; - utf8_decode_codepoint(&str[pos], len-pos, &ch, &chlen); - if (chlen == 0 || (forbid_nul && ch == 0)) - return false; - pos += chlen; - } - return true; -} +bool _utf8_is_valid(const uint8_t *str, size_t len, bool forbid_nul); #define utf8_is_valid(str, len) _utf8_is_valid(str, len, false) #define utf8_is_valid_without_nul(str, len) _utf8_is_valid(str, len, true) diff --git a/libmisc/rand.c b/libmisc/rand.c new file mode 100644 index 0000000..d1643ee --- /dev/null +++ b/libmisc/rand.c @@ -0,0 +1,38 @@ +/* libmisc/rand.c - Non-crytpographic random-number utilities + * + * Copyright (C) 2024-2025 Luke T. Shumaker <lukeshu@lukeshu.com> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#include <stdlib.h> /* for random() */ + +#include <libmisc/assert.h> + +#include <libmisc/rand.h> + +uint64_t rand_uint63n(uint64_t cnt) { + assert(cnt != 0 && ((cnt-1) & 0x8000000000000000) == 0); + if (cnt <= UINT64_C(1)<<31) { + uint32_t fair_cnt = ((UINT32_C(1)<<31) / cnt) * cnt; + uint32_t rnd; + do { + rnd = random(); + } while (rnd >= fair_cnt); + return rnd % cnt; + } else if (cnt <= UINT64_C(1)<<62) { + uint64_t fair_cnt = ((UINT64_C(1)<<62) / cnt) * cnt; + uint64_t rnd; + do { + rnd = (((uint64_t)random()) << 31) | random(); + } while (rnd >= fair_cnt); + return rnd % cnt; + } else if (cnt <= UINT64_C(1)<<63) { + uint64_t fair_cnt = ((UINT64_C(1)<<63) / cnt) * cnt; + uint64_t rnd; + do { + rnd = (((uint64_t)random()) << 62) | (((uint64_t)random()) << 31) | random(); + } while (rnd >= fair_cnt); + return rnd % cnt; + } + assert_notreached("cnt is out of bounds"); +} diff --git a/libmisc/utf8.c b/libmisc/utf8.c new file mode 100644 index 0000000..5f91021 --- /dev/null +++ b/libmisc/utf8.c @@ -0,0 +1,40 @@ +/* libmisc/utf8.c - UTF-8 routines + * + * Copyright (C) 2024-2025 Luke T. Shumaker <lukeshu@lukeshu.com> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#include <libmisc/utf8.h> + +void utf8_decode_codepoint(const uint8_t *str, size_t len, uint32_t *ret_ch, uint8_t *ret_chlen) { + uint32_t ch; + uint8_t chlen; + if ((str[0] & 0b10000000) == 0b00000000) { ch = str[0] & 0b01111111; chlen = 1; } + else if ((str[0] & 0b11100000) == 0b11000000) { ch = str[0] & 0b00011111; chlen = 2; } + else if ((str[0] & 0b11110000) == 0b11100000) { ch = str[0] & 0b00001111; chlen = 3; } + else if ((str[0] & 0b11111000) == 0b11110000) { ch = str[0] & 0b00000111; chlen = 4; } + else goto invalid; + if ((ch == 0 && chlen != 1) || chlen > len) goto invalid; + for (uint8_t i = 1; i < chlen; i++) { + if ((str[i] & 0b11000000) != 0b10000000) goto invalid; + ch = (ch << 6) | (str[i] & 0b00111111); + } + if (ch > 0x10FFFF) goto invalid; + *ret_ch = ch; + *ret_chlen = chlen; + return; + invalid: + *ret_chlen = 0; +} + +bool _utf8_is_valid(const uint8_t *str, size_t len, bool forbid_nul) { + for (size_t pos = 0; pos < len;) { + uint32_t ch; + uint8_t chlen; + utf8_decode_codepoint(&str[pos], len-pos, &ch, &chlen); + if (chlen == 0 || (forbid_nul && ch == 0)) + return false; + pos += chlen; + } + return true; +} |