summaryrefslogtreecommitdiff
path: root/libmisc
diff options
context:
space:
mode:
authorLuke T. Shumaker <lukeshu@lukeshu.com>2025-05-27 19:47:25 -0400
committerLuke T. Shumaker <lukeshu@lukeshu.com>2025-05-27 19:47:25 -0400
commitbf3667b8b76eefd95e33e32b4f5abbf2de0e2065 (patch)
tree241b909f0d68f3c0f355ad924375c2fa5b9a19ae /libmisc
parent42fb27570262b52e2ca889030c621b5f4af76fe1 (diff)
parent968f7710458f44d5e62d2624461f4e8459c04168 (diff)
Merge branch 'lukeshu/cover'HEADmain
Diffstat (limited to 'libmisc')
-rw-r--r--libmisc/CMakeLists.txt4
-rw-r--r--libmisc/endian.c136
-rw-r--r--libmisc/fmt.c25
-rw-r--r--libmisc/hash.c24
-rw-r--r--libmisc/include/libmisc/endian.h219
-rw-r--r--libmisc/include/libmisc/fmt.h22
-rw-r--r--libmisc/include/libmisc/hash.h20
-rw-r--r--libmisc/include/libmisc/rand.h32
-rw-r--r--libmisc/include/libmisc/utf8.h33
-rw-r--r--libmisc/rand.c38
-rw-r--r--libmisc/utf8.c40
11 files changed, 303 insertions, 290 deletions
diff --git a/libmisc/CMakeLists.txt b/libmisc/CMakeLists.txt
index 48407bd..c6405ad 100644
--- a/libmisc/CMakeLists.txt
+++ b/libmisc/CMakeLists.txt
@@ -7,11 +7,15 @@ add_library(libmisc INTERFACE)
target_include_directories(libmisc PUBLIC INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include)
target_sources(libmisc INTERFACE
assert.c
+ endian.c
fmt.c
+ hash.c
intercept.c
linkedlist.c
log.c
map.c
+ rand.c
+ utf8.c
)
add_lib_test(libmisc test_assert)
diff --git a/libmisc/endian.c b/libmisc/endian.c
new file mode 100644
index 0000000..2528f48
--- /dev/null
+++ b/libmisc/endian.c
@@ -0,0 +1,136 @@
+/* libmisc/endian.c - Endian-conversion helpers
+ *
+ * Copyright (C) 2024-2025 Luke T. Shumaker <lukeshu@lukeshu.com>
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+#include <libmisc/macro.h> /* for LM_FORCE_SEMICOLON */
+
+#include <libmisc/endian.h>
+
+#define endian_declare_wrappers(NBIT, ENDIAN) \
+ uint##NBIT##ENDIAN##_t uint##NBIT##ENDIAN##_marshal(uint##NBIT##_t in) { \
+ uint##NBIT##ENDIAN##_t out; \
+ uint##NBIT##ENDIAN##_encode(out.octets, in); \
+ return out; \
+ } \
+ uint##NBIT##_t uint##NBIT##ENDIAN##_unmarshal(uint##NBIT##ENDIAN##_t in) { \
+ return uint##NBIT##ENDIAN##_decode(in.octets); \
+ } \
+ LM_FORCE_SEMICOLON
+
+/* Big endian *****************************************************************/
+
+size_t uint16be_encode(uint8_t *out, uint16_t in) {
+ out[0] = (uint8_t)((in >> 8) & 0xFF);
+ out[1] = (uint8_t)((in >> 0) & 0xFF);
+ return 2;
+}
+
+uint16_t uint16be_decode(uint8_t *in) {
+ return (((uint16_t)(in[0])) << 8)
+ | (((uint16_t)(in[1])) << 0)
+ ;
+}
+
+size_t uint32be_encode(uint8_t *out, uint32_t in) {
+ out[0] = (uint8_t)((in >> 24) & 0xFF);
+ out[1] = (uint8_t)((in >> 16) & 0xFF);
+ out[2] = (uint8_t)((in >> 8) & 0xFF);
+ out[3] = (uint8_t)((in >> 0) & 0xFF);
+ return 4;
+}
+
+uint32_t uint32be_decode(uint8_t *in) {
+ return (((uint32_t)(in[0])) << 24)
+ | (((uint32_t)(in[1])) << 16)
+ | (((uint32_t)(in[2])) << 8)
+ | (((uint32_t)(in[3])) << 0)
+ ;
+}
+
+size_t uint64be_encode(uint8_t *out, uint64_t in) {
+ out[0] = (uint8_t)((in >> 56) & 0xFF);
+ out[1] = (uint8_t)((in >> 48) & 0xFF);
+ out[2] = (uint8_t)((in >> 40) & 0xFF);
+ out[3] = (uint8_t)((in >> 32) & 0xFF);
+ out[4] = (uint8_t)((in >> 24) & 0xFF);
+ out[5] = (uint8_t)((in >> 16) & 0xFF);
+ out[6] = (uint8_t)((in >> 8) & 0xFF);
+ out[7] = (uint8_t)((in >> 0) & 0xFF);
+ return 8;
+}
+
+uint64_t uint64be_decode(uint8_t *in) {
+ return (((uint64_t)(in[0])) << 56)
+ | (((uint64_t)(in[1])) << 48)
+ | (((uint64_t)(in[2])) << 40)
+ | (((uint64_t)(in[3])) << 32)
+ | (((uint64_t)(in[4])) << 24)
+ | (((uint64_t)(in[5])) << 16)
+ | (((uint64_t)(in[6])) << 8)
+ | (((uint64_t)(in[7])) << 0)
+ ;
+}
+
+endian_declare_wrappers(16, be);
+endian_declare_wrappers(32, be);
+endian_declare_wrappers(64, be);
+
+/* Little endian **************************************************************/
+
+size_t uint16le_encode(uint8_t *out, uint16_t in) {
+ out[0] = (uint8_t)((in >> 0) & 0xFF);
+ out[1] = (uint8_t)((in >> 8) & 0xFF);
+ return 2;
+}
+
+uint16_t uint16le_decode(uint8_t *in) {
+ return (((uint16_t)(in[0])) << 0)
+ | (((uint16_t)(in[1])) << 8)
+ ;
+}
+
+size_t uint32le_encode(uint8_t *out, uint32_t in) {
+ out[0] = (uint8_t)((in >> 0) & 0xFF);
+ out[1] = (uint8_t)((in >> 8) & 0xFF);
+ out[2] = (uint8_t)((in >> 16) & 0xFF);
+ out[3] = (uint8_t)((in >> 24) & 0xFF);
+ return 4;
+}
+
+uint32_t uint32le_decode(uint8_t *in) {
+ return (((uint32_t)(in[0])) << 0)
+ | (((uint32_t)(in[1])) << 8)
+ | (((uint32_t)(in[2])) << 16)
+ | (((uint32_t)(in[3])) << 24)
+ ;
+}
+
+size_t uint64le_encode(uint8_t *out, uint64_t in) {
+ out[0] = (uint8_t)((in >> 0) & 0xFF);
+ out[1] = (uint8_t)((in >> 8) & 0xFF);
+ out[2] = (uint8_t)((in >> 16) & 0xFF);
+ out[3] = (uint8_t)((in >> 24) & 0xFF);
+ out[4] = (uint8_t)((in >> 32) & 0xFF);
+ out[5] = (uint8_t)((in >> 40) & 0xFF);
+ out[6] = (uint8_t)((in >> 48) & 0xFF);
+ out[7] = (uint8_t)((in >> 56) & 0xFF);
+ return 8;
+}
+
+uint64_t uint64le_decode(uint8_t *in) {
+ return (((uint64_t)(in[0])) << 0)
+ | (((uint64_t)(in[1])) << 8)
+ | (((uint64_t)(in[2])) << 16)
+ | (((uint64_t)(in[3])) << 24)
+ | (((uint64_t)(in[4])) << 32)
+ | (((uint64_t)(in[5])) << 40)
+ | (((uint64_t)(in[6])) << 48)
+ | (((uint64_t)(in[7])) << 56)
+ ;
+}
+
+endian_declare_wrappers(16, le);
+endian_declare_wrappers(32, le);
+endian_declare_wrappers(64, le);
diff --git a/libmisc/fmt.c b/libmisc/fmt.c
index 6cf1d8d..a8baa84 100644
--- a/libmisc/fmt.c
+++ b/libmisc/fmt.c
@@ -14,6 +14,31 @@ static const char *const hexdig = "0123456789ABCDEF";
/* small/trivial formatters ***************************************************/
+void fmt_print_mem(lo_interface fmt_dest w, const void *_str, size_t size) {
+ const uint8_t *str = _str;
+ while (size--)
+ fmt_print_byte(w, *(str++));
+}
+void fmt_print_str(lo_interface fmt_dest w, const char *str) {
+ while (*str)
+ fmt_print_byte(w, *(str++));
+}
+void fmt_print_strn(lo_interface fmt_dest w, const char *str, size_t size) {
+ while (size-- && *str)
+ fmt_print_byte(w, *(str++));
+}
+
+void fmt_print_hmem(lo_interface fmt_dest w, const void *_str, size_t size) {
+ const uint8_t *str = _str;
+ fmt_print_byte(w, '{');
+ for (size_t i = 0; i < size; i++) {
+ if (i)
+ fmt_print_byte(w, ',');
+ fmt_print_hbyte(w, str[i]);
+ }
+ fmt_print_byte(w, '}');
+}
+
void fmt_print_byte(lo_interface fmt_dest w, uint8_t b) {
LO_CALL(w, putb, b);
}
diff --git a/libmisc/hash.c b/libmisc/hash.c
new file mode 100644
index 0000000..3814cec
--- /dev/null
+++ b/libmisc/hash.c
@@ -0,0 +1,24 @@
+/* libmisc/hash.c - General-purpose hash utilities
+ *
+ * Copyright (C) 2024-2025 Luke T. Shumaker <lukeshu@lukeshu.com>
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+#include <libmisc/hash.h>
+
+/* djb2 hash */
+void hash_init(hash_t *hash) {
+ *hash = 5381;
+}
+void hash_write(hash_t *hash, void *dat, size_t len) {
+ for (size_t i = 0; i < len; i++)
+ *hash = (*hash * 33) + (hash_t)(((unsigned char *)dat)[i]);
+}
+
+/* utilities */
+hash_t hash(void *dat, size_t len) {
+ hash_t h;
+ hash_init(&h);
+ hash_write(&h, dat, len);
+ return h;
+}
diff --git a/libmisc/include/libmisc/endian.h b/libmisc/include/libmisc/endian.h
index 75240fe..966c3bc 100644
--- a/libmisc/include/libmisc/endian.h
+++ b/libmisc/include/libmisc/endian.h
@@ -10,204 +10,25 @@
#include <stddef.h> /* for size_t */
#include <stdint.h> /* for uint{n}_t */
-#include <libmisc/assert.h>
-
-/* Big endian *****************************************************************/
-
-typedef struct {
- uint8_t octets[2];
-} uint16be_t;
-static_assert(sizeof(uint16be_t) == 2);
-
-static inline size_t uint16be_encode(uint8_t *out, uint16_t in) {
- out[0] = (uint8_t)((in >> 8) & 0xFF);
- out[1] = (uint8_t)((in >> 0) & 0xFF);
- return 2;
-}
-
-static inline uint16_t uint16be_decode(uint8_t *in) {
- return (((uint16_t)(in[0])) << 8)
- | (((uint16_t)(in[1])) << 0)
- ;
-}
-
-static inline uint16be_t uint16be_marshal(uint16_t in) {
- uint16be_t out;
- uint16be_encode(out.octets, in);
- return out;
-}
-
-static inline uint16_t uint16be_unmarshal(uint16be_t in) {
- return uint16be_decode(in.octets);
-}
-
-typedef struct {
- uint8_t octets[4];
-} uint32be_t;
-static_assert(sizeof(uint32be_t) == 4);
-
-static inline size_t uint32be_encode(uint8_t *out, uint32_t in) {
- out[0] = (uint8_t)((in >> 24) & 0xFF);
- out[1] = (uint8_t)((in >> 16) & 0xFF);
- out[2] = (uint8_t)((in >> 8) & 0xFF);
- out[3] = (uint8_t)((in >> 0) & 0xFF);
- return 4;
-}
-
-static inline uint32_t uint32be_decode(uint8_t *in) {
- return (((uint32_t)(in[0])) << 24)
- | (((uint32_t)(in[1])) << 16)
- | (((uint32_t)(in[2])) << 8)
- | (((uint32_t)(in[3])) << 0)
- ;
-}
-
-static inline uint32be_t uint32be_marshal(uint32_t in) {
- uint32be_t out;
- uint32be_encode(out.octets, in);
- return out;
-}
-
-static inline uint32_t uint32be_unmarshal(uint32be_t in) {
- return uint32be_decode(in.octets);
-}
-
-typedef struct {
- uint8_t octets[8];
-} uint64be_t;
-static_assert(sizeof(uint64be_t) == 8);
-
-static inline size_t uint64be_encode(uint8_t *out, uint64_t in) {
- out[0] = (uint8_t)((in >> 56) & 0xFF);
- out[1] = (uint8_t)((in >> 48) & 0xFF);
- out[2] = (uint8_t)((in >> 40) & 0xFF);
- out[3] = (uint8_t)((in >> 32) & 0xFF);
- out[4] = (uint8_t)((in >> 24) & 0xFF);
- out[5] = (uint8_t)((in >> 16) & 0xFF);
- out[6] = (uint8_t)((in >> 8) & 0xFF);
- out[7] = (uint8_t)((in >> 0) & 0xFF);
- return 8;
-}
-
-static inline uint64_t uint64be_decode(uint8_t *in) {
- return (((uint64_t)(in[0])) << 56)
- | (((uint64_t)(in[1])) << 48)
- | (((uint64_t)(in[2])) << 40)
- | (((uint64_t)(in[3])) << 32)
- | (((uint64_t)(in[4])) << 24)
- | (((uint64_t)(in[5])) << 16)
- | (((uint64_t)(in[6])) << 8)
- | (((uint64_t)(in[7])) << 0)
- ;
-}
-
-static inline uint64be_t uint64be_marshal(uint64_t in) {
- uint64be_t out;
- uint64be_encode(out.octets, in);
- return out;
-}
-
-static inline uint64_t uint64be_unmarshal(uint64be_t in) {
- return uint64be_decode(in.octets);
-}
-
-/* Little endian **************************************************************/
-
-typedef struct {
- uint8_t octets[2];
-} uint16le_t;
-static_assert(sizeof(uint16le_t) == 2);
-
-static inline size_t uint16le_encode(uint8_t *out, uint16_t in) {
- out[0] = (uint8_t)((in >> 0) & 0xFF);
- out[1] = (uint8_t)((in >> 8) & 0xFF);
- return 2;
-}
-
-static inline uint16_t uint16le_decode(uint8_t *in) {
- return (((uint16_t)(in[0])) << 0)
- | (((uint16_t)(in[1])) << 8)
- ;
-}
-
-static inline uint16le_t uint16le_marshal(uint16_t in) {
- uint16le_t out;
- uint16le_encode(out.octets, in);
- return out;
-}
-
-static inline uint16_t uint16le_unmarshal(uint16le_t in) {
- return uint16le_decode(in.octets);
-}
-
-typedef struct {
- uint8_t octets[4];
-} uint32le_t;
-static_assert(sizeof(uint32le_t) == 4);
-
-static inline size_t uint32le_encode(uint8_t *out, uint32_t in) {
- out[0] = (uint8_t)((in >> 0) & 0xFF);
- out[1] = (uint8_t)((in >> 8) & 0xFF);
- out[2] = (uint8_t)((in >> 16) & 0xFF);
- out[3] = (uint8_t)((in >> 24) & 0xFF);
- return 4;
-}
-
-static inline uint32_t uint32le_decode(uint8_t *in) {
- return (((uint32_t)(in[0])) << 0)
- | (((uint32_t)(in[1])) << 8)
- | (((uint32_t)(in[2])) << 16)
- | (((uint32_t)(in[3])) << 24)
- ;
-}
-
-static inline uint32le_t uint32le_marshal(uint32_t in) {
- uint32le_t out;
- uint32le_encode(out.octets, in);
- return out;
-}
-
-static inline uint32_t uint32le_unmarshal(uint32le_t in) {
- return uint32le_decode(in.octets);
-}
-
-typedef struct {
- uint8_t octets[8];
-} uint64le_t;
-static_assert(sizeof(uint64le_t) == 8);
-
-static inline size_t uint64le_encode(uint8_t *out, uint64_t in) {
- out[0] = (uint8_t)((in >> 0) & 0xFF);
- out[1] = (uint8_t)((in >> 8) & 0xFF);
- out[2] = (uint8_t)((in >> 16) & 0xFF);
- out[3] = (uint8_t)((in >> 24) & 0xFF);
- out[4] = (uint8_t)((in >> 32) & 0xFF);
- out[5] = (uint8_t)((in >> 40) & 0xFF);
- out[6] = (uint8_t)((in >> 48) & 0xFF);
- out[7] = (uint8_t)((in >> 56) & 0xFF);
- return 8;
-}
-
-static inline uint64_t uint64le_decode(uint8_t *in) {
- return (((uint64_t)(in[0])) << 0)
- | (((uint64_t)(in[1])) << 8)
- | (((uint64_t)(in[2])) << 16)
- | (((uint64_t)(in[3])) << 24)
- | (((uint64_t)(in[4])) << 32)
- | (((uint64_t)(in[5])) << 40)
- | (((uint64_t)(in[6])) << 48)
- | (((uint64_t)(in[7])) << 56)
- ;
-}
-
-static inline uint64le_t uint64le_marshal(uint64_t in) {
- uint64le_t out;
- uint64le_encode(out.octets, in);
- return out;
-}
-
-static inline uint64_t uint64le_unmarshal(uint64le_t in) {
- return uint64le_decode(in.octets);
-}
+#define _endian_declare_conv(NBIT, ENDIAN) \
+ /* byte array encode/decode */ \
+ size_t uint##NBIT##ENDIAN##_encode(uint8_t *out, uint##NBIT##_t in); \
+ uint##NBIT##_t uint##NBIT##ENDIAN##_decode(uint8_t *in); \
+ /* struct marshal/unmarshal */ \
+ typedef struct { \
+ uint8_t octets[NBIT/8]; \
+ } uint##NBIT##ENDIAN##_t; \
+ uint##NBIT##ENDIAN##_t uint##NBIT##ENDIAN##_marshal(uint##NBIT##_t in); \
+ uint##NBIT##_t uint##NBIT##ENDIAN##_unmarshal(uint##NBIT##ENDIAN##_t in)
+
+_endian_declare_conv(16, be);
+_endian_declare_conv(32, be);
+_endian_declare_conv(64, be);
+
+_endian_declare_conv(16, le);
+_endian_declare_conv(32, le);
+_endian_declare_conv(64, le);
+
+#undef _endian_declare_conv
#endif /* _LIBMISC_ENDIAN_H_ */
diff --git a/libmisc/include/libmisc/fmt.h b/libmisc/include/libmisc/fmt.h
index c29c085..c0743ff 100644
--- a/libmisc/include/libmisc/fmt.h
+++ b/libmisc/include/libmisc/fmt.h
@@ -24,21 +24,9 @@ LO_INTERFACE(fmt_dest);
/* Simple bytes. */
void fmt_print_byte(lo_interface fmt_dest w, uint8_t b);
-
-/* These are `static inline` so that the compiler can unroll the loops. */
-static inline void fmt_print_mem(lo_interface fmt_dest w, const void *_str, size_t size) {
- const uint8_t *str = _str;
- while (size--)
- fmt_print_byte(w, *(str++));
-}
-static inline void fmt_print_str(lo_interface fmt_dest w, const char *str) {
- while (*str)
- fmt_print_byte(w, *(str++));
-}
-static inline void fmt_print_strn(lo_interface fmt_dest w, const char *str, size_t size) {
- while (size-- && *str)
- fmt_print_byte(w, *(str++));
-}
+void fmt_print_mem(lo_interface fmt_dest w, const void *str, size_t size);
+void fmt_print_str(lo_interface fmt_dest w, const char *str);
+void fmt_print_strn(lo_interface fmt_dest w, const char *str, size_t size);
/* Quoted bytes. */
void fmt_print_qbyte(lo_interface fmt_dest w, uint8_t b);
@@ -46,6 +34,10 @@ void fmt_print_qmem(lo_interface fmt_dest w, const void *str, size_t size);
void fmt_print_qstr(lo_interface fmt_dest w, const char *str);
void fmt_print_qstrn(lo_interface fmt_dest w, const char *str, size_t size);
+/* Hex bytes. */
+#define fmt_print_hbyte fmt_print_base16_u8_
+void fmt_print_hmem(lo_interface fmt_dest w, const void *str, size_t size);
+
/* Integers. */
#define _fmt_declare_base(base) \
void _fmt_print_base##base##_u8(lo_interface fmt_dest w, uint8_t val); \
diff --git a/libmisc/include/libmisc/hash.h b/libmisc/include/libmisc/hash.h
index 58a895f..029bd3b 100644
--- a/libmisc/include/libmisc/hash.h
+++ b/libmisc/include/libmisc/hash.h
@@ -10,22 +10,12 @@
#include <stddef.h> /* for size_t */
#include <stdint.h> /* for uint{n}_t */
-/* djb2 hash */
-typedef uint32_t hash_t;
-static inline void hash_init(hash_t *hash) {
- *hash = 5381;
-}
-static inline void hash_write(hash_t *hash, void *dat, size_t len) {
- for (size_t i = 0; i < len; i++)
- *hash = (*hash * 33) + (hash_t)(((unsigned char *)dat)[i]);
-}
+/* base */
+typedef uint32_t hash_t; /* size subject to change */
+void hash_init(hash_t *hash);
+void hash_write(hash_t *hash, void *dat, size_t len);
/* utilities */
-static inline hash_t hash(void *dat, size_t len) {
- hash_t h;
- hash_init(&h);
- hash_write(&h, dat, len);
- return h;
-}
+hash_t hash(void *dat, size_t len);
#endif /* _LIBMISC_HASH_H_ */
diff --git a/libmisc/include/libmisc/rand.h b/libmisc/include/libmisc/rand.h
index 7ef238b..ca16f42 100644
--- a/libmisc/include/libmisc/rand.h
+++ b/libmisc/include/libmisc/rand.h
@@ -7,40 +7,12 @@
#ifndef _LIBMISC_RAND_H_
#define _LIBMISC_RAND_H_
-#include <stdint.h> /* for uint{n}_t, UINT{n}_C() */
-#include <stdlib.h> /* for random() */
-
-#include <libmisc/assert.h>
+#include <stdint.h> /* for uint{n}_t */
/**
* Return a psuedo-random number in the half-open interval [0,cnt).
* `cnt` must not be greater than 1<<63.
*/
-static inline uint64_t rand_uint63n(uint64_t cnt) {
- assert(cnt != 0 && ((cnt-1) & 0x8000000000000000) == 0);
- if (cnt <= UINT64_C(1)<<31) {
- uint32_t fair_cnt = ((UINT32_C(1)<<31) / cnt) * cnt;
- uint32_t rnd;
- do {
- rnd = random();
- } while (rnd >= fair_cnt);
- return rnd % cnt;
- } else if (cnt <= UINT64_C(1)<<62) {
- uint64_t fair_cnt = ((UINT64_C(1)<<62) / cnt) * cnt;
- uint64_t rnd;
- do {
- rnd = (((uint64_t)random()) << 31) | random();
- } while (rnd >= fair_cnt);
- return rnd % cnt;
- } else if (cnt <= UINT64_C(1)<<63) {
- uint64_t fair_cnt = ((UINT64_C(1)<<63) / cnt) * cnt;
- uint64_t rnd;
- do {
- rnd = (((uint64_t)random()) << 62) | (((uint64_t)random()) << 31) | random();
- } while (rnd >= fair_cnt);
- return rnd % cnt;
- }
- assert_notreached("cnt is out of bounds");
-}
+uint64_t rand_uint63n(uint64_t cnt);
#endif /* _LIBMISC_RAND_H_ */
diff --git a/libmisc/include/libmisc/utf8.h b/libmisc/include/libmisc/utf8.h
index b5e1b0b..54fcc92 100644
--- a/libmisc/include/libmisc/utf8.h
+++ b/libmisc/include/libmisc/utf8.h
@@ -15,38 +15,9 @@
* bytes. Invalid UTF-8 is indicated with chlen=0. For valid UTF-8,
* chlen is always in the range [1, 4].
*/
-static inline void utf8_decode_codepoint(const uint8_t *str, size_t len, uint32_t *ret_ch, uint8_t *ret_chlen) {
- uint32_t ch;
- uint8_t chlen;
- if ((str[0] & 0b10000000) == 0b00000000) { ch = str[0] & 0b01111111; chlen = 1; }
- else if ((str[0] & 0b11100000) == 0b11000000) { ch = str[0] & 0b00011111; chlen = 2; }
- else if ((str[0] & 0b11110000) == 0b11100000) { ch = str[0] & 0b00001111; chlen = 3; }
- else if ((str[0] & 0b11111000) == 0b11110000) { ch = str[0] & 0b00000111; chlen = 4; }
- else goto invalid;
- if ((ch == 0 && chlen != 1) || chlen > len) goto invalid;
- for (uint8_t i = 1; i < chlen; i++) {
- if ((str[i] & 0b11000000) != 0b10000000) goto invalid;
- ch = (ch << 6) | (str[i] & 0b00111111);
- }
- if (ch > 0x10FFFF) goto invalid;
- *ret_ch = ch;
- *ret_chlen = chlen;
- return;
- invalid:
- *ret_chlen = 0;
-}
+void utf8_decode_codepoint(const uint8_t *str, size_t len, uint32_t *ret_ch, uint8_t *ret_chlen);
-static inline bool _utf8_is_valid(const uint8_t *str, size_t len, bool forbid_nul) {
- for (size_t pos = 0; pos < len;) {
- uint32_t ch;
- uint8_t chlen;
- utf8_decode_codepoint(&str[pos], len-pos, &ch, &chlen);
- if (chlen == 0 || (forbid_nul && ch == 0))
- return false;
- pos += chlen;
- }
- return true;
-}
+bool _utf8_is_valid(const uint8_t *str, size_t len, bool forbid_nul);
#define utf8_is_valid(str, len) _utf8_is_valid(str, len, false)
#define utf8_is_valid_without_nul(str, len) _utf8_is_valid(str, len, true)
diff --git a/libmisc/rand.c b/libmisc/rand.c
new file mode 100644
index 0000000..d1643ee
--- /dev/null
+++ b/libmisc/rand.c
@@ -0,0 +1,38 @@
+/* libmisc/rand.c - Non-crytpographic random-number utilities
+ *
+ * Copyright (C) 2024-2025 Luke T. Shumaker <lukeshu@lukeshu.com>
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+#include <stdlib.h> /* for random() */
+
+#include <libmisc/assert.h>
+
+#include <libmisc/rand.h>
+
+uint64_t rand_uint63n(uint64_t cnt) {
+ assert(cnt != 0 && ((cnt-1) & 0x8000000000000000) == 0);
+ if (cnt <= UINT64_C(1)<<31) {
+ uint32_t fair_cnt = ((UINT32_C(1)<<31) / cnt) * cnt;
+ uint32_t rnd;
+ do {
+ rnd = random();
+ } while (rnd >= fair_cnt);
+ return rnd % cnt;
+ } else if (cnt <= UINT64_C(1)<<62) {
+ uint64_t fair_cnt = ((UINT64_C(1)<<62) / cnt) * cnt;
+ uint64_t rnd;
+ do {
+ rnd = (((uint64_t)random()) << 31) | random();
+ } while (rnd >= fair_cnt);
+ return rnd % cnt;
+ } else if (cnt <= UINT64_C(1)<<63) {
+ uint64_t fair_cnt = ((UINT64_C(1)<<63) / cnt) * cnt;
+ uint64_t rnd;
+ do {
+ rnd = (((uint64_t)random()) << 62) | (((uint64_t)random()) << 31) | random();
+ } while (rnd >= fair_cnt);
+ return rnd % cnt;
+ }
+ assert_notreached("cnt is out of bounds");
+}
diff --git a/libmisc/utf8.c b/libmisc/utf8.c
new file mode 100644
index 0000000..5f91021
--- /dev/null
+++ b/libmisc/utf8.c
@@ -0,0 +1,40 @@
+/* libmisc/utf8.c - UTF-8 routines
+ *
+ * Copyright (C) 2024-2025 Luke T. Shumaker <lukeshu@lukeshu.com>
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+#include <libmisc/utf8.h>
+
+void utf8_decode_codepoint(const uint8_t *str, size_t len, uint32_t *ret_ch, uint8_t *ret_chlen) {
+ uint32_t ch;
+ uint8_t chlen;
+ if ((str[0] & 0b10000000) == 0b00000000) { ch = str[0] & 0b01111111; chlen = 1; }
+ else if ((str[0] & 0b11100000) == 0b11000000) { ch = str[0] & 0b00011111; chlen = 2; }
+ else if ((str[0] & 0b11110000) == 0b11100000) { ch = str[0] & 0b00001111; chlen = 3; }
+ else if ((str[0] & 0b11111000) == 0b11110000) { ch = str[0] & 0b00000111; chlen = 4; }
+ else goto invalid;
+ if ((ch == 0 && chlen != 1) || chlen > len) goto invalid;
+ for (uint8_t i = 1; i < chlen; i++) {
+ if ((str[i] & 0b11000000) != 0b10000000) goto invalid;
+ ch = (ch << 6) | (str[i] & 0b00111111);
+ }
+ if (ch > 0x10FFFF) goto invalid;
+ *ret_ch = ch;
+ *ret_chlen = chlen;
+ return;
+ invalid:
+ *ret_chlen = 0;
+}
+
+bool _utf8_is_valid(const uint8_t *str, size_t len, bool forbid_nul) {
+ for (size_t pos = 0; pos < len;) {
+ uint32_t ch;
+ uint8_t chlen;
+ utf8_decode_codepoint(&str[pos], len-pos, &ch, &chlen);
+ if (chlen == 0 || (forbid_nul && ch == 0))
+ return false;
+ pos += chlen;
+ }
+ return true;
+}