10 files changed, 154 insertions, 36 deletions
diff --git a/GNUmakefile b/GNUmakefile
index 146f725..758c5aa 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -107,6 +107,7 @@ check:
 # `gcovr` is a Python script that calls `gcov` and merges and post-processes the .gcov files to other formats
 gcovr_flags  = --txt=$(@D)/coverage.txt
 gcovr_flags += --html=$(@D)/coverage.html --html-details --html-single-page=js-enabled
+gcovr_flags += --sort uncovered-number --sort-reverse
 $(foreach t,$(build_types),$(foreach p,$(platforms),build/$p-$t/check)): build/%/check: build/%/build
 	./build-aux/gcov-prune $(@D)
 	+cd $(@D) && ctest --output-on-failure $(if $(filter --jobserver-auth=%,$(MAKEFLAGS)),--parallel)
diff --git a/lib9p/core_gen/c_validate.py b/lib9p/core_gen/c_validate.py
index e7a4017..8997237 100644
--- a/lib9p/core_gen/c_validate.py
+++ b/lib9p/core_gen/c_validate.py
@@ -67,7 +67,7 @@ def gen_c_validate(versions: set[str], typs: list[idl.UserType]) -> str:
         "\t\tsize_t len = n;\n"
         "\t\tVALIDATE_NET_BYTES(len);\n"
         "\t\tif (!utf8_is_valid_without_nul(&net_bytes[net_offset-len], len))\n"
-        f'\t\t\treturn lib9p_error(ctx, {c9util.IDENT("ERRNO_L_EBADMSG")}, "message contains invalid UTF-8");\n'
+        f'\t\t\treturn lib9p_error(ctx, {c9util.IDENT("ERRNO_L_EILSEQ")}, "message contains invalid UTF-8");\n'
         "\t}\n"
     )
     ret += cutil.macro(
diff --git a/lib9p/core_generated.c b/lib9p/core_generated.c
index 6e3633f..ad7b210 100644
--- a/lib9p/core_generated.c
+++ b/lib9p/core_generated.c
@@ -229,12 +229,12 @@ static const lib9p_lock_flags_t lock_flags_masks[LIB9P_VER_NUM] = {
 		return lib9p_error(ctx, LIB9P_ERRNO_L_EBADMSG, "message is too short for content"); \
 	if (net_offset > net_size)                                                                  \
 		return lib9p_error(ctx, LIB9P_ERRNO_L_EBADMSG, "message is too short for content (", net_offset, " > ", net_size, ")");
-#define VALIDATE_NET_UTF8(n)                                                                              \
-	{                                                                                                 \
-		size_t len = n;                                                                           \
-		VALIDATE_NET_BYTES(len);                                                                  \
-		if (!utf8_is_valid_without_nul(&net_bytes[net_offset-len], len))                          \
-			return lib9p_error(ctx, LIB9P_ERRNO_L_EBADMSG, "message contains invalid UTF-8"); \
+#define VALIDATE_NET_UTF8(n)                                                                             \
+	{                                                                                                \
+		size_t len = n;                                                                          \
+		VALIDATE_NET_BYTES(len);                                                                 \
+		if (!utf8_is_valid_without_nul(&net_bytes[net_offset-len], len))                         \
+			return lib9p_error(ctx, LIB9P_ERRNO_L_EILSEQ, "message contains invalid UTF-8"); \
 	}
 #define RESERVE_HOST_BYTES(n)                                           \
 	if (__builtin_add_overflow(host_size, n, &host_size))           \
diff --git a/lib9p/core_include/lib9p/core.h b/lib9p/core_include/lib9p/core.h
index 4941220..afefa2b 100644
--- a/lib9p/core_include/lib9p/core.h
+++ b/lib9p/core_include/lib9p/core.h
@@ -98,11 +98,7 @@ void fmt_print_lib9p_msg(lo_interface fmt_dest w, struct lib9p_ctx *ctx, enum li
  * number may be larger than net_bytes due to (1) struct padding, (2)
  * array pointers.
  *
- * Emits an error (return -1, set ctx->err_num and ctx->err_msg) if
- * either the message type is unknown, or if net_bytes is too short
- * for that message type, or if an invalid string (invalid UTF-8,
- * contains a nul-byte) is encountered.
- *
+ * @param ctx : negotiated protocol parameters, where to record errors
  * @param net_bytes : the complete request, starting with the "size[4]"
  *
  * @return required size, or -1 on error
@@ -110,7 +106,7 @@ void fmt_print_lib9p_msg(lo_interface fmt_dest w, struct lib9p_ctx *ctx, enum li
  * @errno L_EOPNOTSUPP: message is an R-message
  * @errno L_EOPNOTSUPP: message has unknown type
  * @errno L_EBADMSG: message is wrong size for content
- * @errno L_EBADMSG: message contains invalid UTF-8
+ * @errno L_EILSEQ: message contains invalid UTF-8, or the UTF-8 contains a nul-byte
  * @errno L_EBADMSG: message contains a bitfield with unknown bits
  * @errno L_EMSGSIZE: would-be return value overflows SSIZE_MAX
  */
@@ -142,9 +138,9 @@ void lib9p_Tmsg_unmarshal(struct lib9p_ctx *ctx, uint8_t *net_bytes,
  *
  * @param ctx : negotiated protocol parameters, where to record errors
  * @param typ : the message type
- * @param msg : the message to encode
+ * @param msg : the message to encode (`struct lib9p_msg_XXXX` according to `typ`)
  *
- * @return ret_bytes : the buffer to encode to, must be at be at least ctx->max_msg_size bytes
+ * @return ret : the buffer to encode to
  * @return whether there was an error (false=success, true=error)
  *
  * @errno L_ERANGE: reply does not fit in ctx->max_msg_size
diff --git a/lib9p/srv_include/lib9p/srv.h b/lib9p/srv_include/lib9p/srv.h
index eb87d6f..89dc986 100644
--- a/lib9p/srv_include/lib9p/srv.h
+++ b/lib9p/srv_include/lib9p/srv.h
@@ -185,7 +185,7 @@ LO_INTERFACE(lib9p_srv_fio); /*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>*/
 	LO_FUNC(void                       , iofree )                          \
 	/**                                                                    \
 	 * Return the idx-th dirent.  idx will always be either 0 or           \
-	 * prev_idx+1.  A dirrent with an empty name signals EOF.  The string  \
+	 * prev_idx+1.  A dirent with an empty name signals EOF.  The string   \
 	 * must remain valid until the next dread() call or iofree().          \
 	 */                                                                    \
 	LO_FUNC(struct lib9p_srv_dirent    , dread  , struct lib9p_srv_ctx *,  \
diff --git a/libdhcp/tests/test_client.c b/libdhcp/tests/test_client.c
index 90efd01..24b3af6 100644
--- a/libdhcp/tests/test_client.c
+++ b/libdhcp/tests/test_client.c
@@ -24,6 +24,9 @@ LO_IMPLEMENTATION_H(net_packet_conn, struct test_udp, test_udp);
 LO_IMPLEMENTATION_C(net_packet_conn, struct test_udp, test_udp, static);
 
 static ssize_t test_udp_sendto(struct test_udp *LM_UNUSED(self), void *LM_UNUSED(buf), size_t len, struct net_ip4_addr LM_UNUSED(node), uint16_t LM_UNUSED(port)) {
+	static unsigned cnt = 0;
+	if (cnt++ % 2 == 0)
+		return -NET_EOTHER;
 	return len;
 }
 
@@ -35,8 +38,10 @@ static ssize_t test_udp_recvfrom(struct test_udp *LM_UNUSED(self), void *buf, si
 	const void *resp;
 	size_t resp_len;
 	switch (cnt++) {
-	case 0: resp = resp_offer; resp_len = sizeof(resp_offer); break;
-	case 1: resp = resp_ack; resp_len = sizeof(resp_ack); break;
+	case 0: return -NET_EOTHER;
+	case 1: resp = resp_offer; resp_len = sizeof(resp_offer); break;
+	case 2: return -NET_EOTHER;
+	case 3: resp = resp_ack; resp_len = sizeof(resp_ack); break;
 	default: return -NET_ERECV_TIMEOUT;
 	}
 	test_assert(len >= resp_len);
diff --git a/libmisc/fmt.c b/libmisc/fmt.c
index a8baa84..7c18ef5 100644
--- a/libmisc/fmt.c
+++ b/libmisc/fmt.c
@@ -71,9 +71,33 @@ void fmt_print_ptr(lo_interface fmt_dest w, void *ptr) {
  */
 void fmt_print_qbyte(lo_interface fmt_dest w, uint8_t b) {
 	fmt_print_byte(w, '\'');
-	if (' ' <= b && b <= '~') {
-		if (b == '\'' || b == '\\')
-			fmt_print_byte(w, '\\');
+	if (b == '\0' ||
+	    b == '\b' ||
+	    b == '\f' ||
+	    b == '\n' ||
+	    b == '\r' ||
+	    b == '\t' ||
+	    b == '\v' ||
+	    b == '\\' ||
+	    b == '\'' ||
+	    b == '"' ||
+	    b == '?') {
+		fmt_print_byte(w, '\\');
+		switch (b) {
+		case '\0': fmt_print_byte(w, '0'); break;
+		case '\a': fmt_print_byte(w, 'a'); break;
+		case '\b': fmt_print_byte(w, 'b'); break;
+		case '\f': fmt_print_byte(w, 'f'); break;
+		case '\n': fmt_print_byte(w, 'n'); break;
+		case '\r': fmt_print_byte(w, 'r'); break;
+		case '\t': fmt_print_byte(w, 't'); break;
+		case '\v': fmt_print_byte(w, 'v'); break;
+		case '\\': fmt_print_byte(w, '\\'); break;
+		case '\'': fmt_print_byte(w, '\''); break;
+		case '"': fmt_print_byte(w, '"'); break;
+		case '?': fmt_print_byte(w, '?'); break;
+		}
+	} else if (' ' <= b && b <= '~') {
 		fmt_print_byte(w, b);
 	} else {
 		fmt_print_byte(w, '\\');
diff --git a/libmisc/include/libmisc/fmt.h b/libmisc/include/libmisc/fmt.h
index c0743ff..6c04d99 100644
--- a/libmisc/include/libmisc/fmt.h
+++ b/libmisc/include/libmisc/fmt.h
@@ -9,6 +9,7 @@
 
 #include <stddef.h> /* for size_t */
 #include <stdint.h> /* for (u)int{n}_t */
+#include <stdlib.h> /* for realloc() */
 
 #include <libmisc/macro.h>
 #include <libmisc/obj.h>
@@ -99,6 +100,11 @@ void fmt_print_bool(lo_interface fmt_dest w, bool b);
 	const char *       : fmt_print_str                                      , \
 	bool               : fmt_print_bool                                     )(w, val)
 
+/** Same as fmt_print(), but usable from inside of fmt_print().  */
+#define fmt_print2(w, ...) do { LM_FOREACH_PARAM2_(_fmt_param2, (w), __VA_ARGS__) } while (0)
+#define _fmt_param2(...) _LM_DEFER2(_fmt_param_indirect)()(__VA_ARGS__)
+#define _fmt_param_indirect() _fmt_param
+
 /* print-to-memory ************************************************************/
 
 struct fmt_buf {
@@ -116,16 +122,25 @@ LO_IMPLEMENTATION_H(fmt_dest, struct fmt_buf, fmt_buf);
 	_w.len;                                                    \
 })
 
-/* justify ********************************************************************/
+#define fmt_asprint(...) ({                                        \
+	struct fmt_buf _w = {};                                    \
+	lo_interface fmt_dest w = lo_box_fmt_buf_as_fmt_dest(&_w); \
+	fmt_print(w, __VA_ARGS__);                                 \
+	while (_w.cap <= _w.len) {                                 \
+		_w.cap = _w.len + 1;                               \
+		_w.len = 0;                                        \
+		_w.dat = realloc(_w.dat, _w.cap);                  \
+		fmt_print(w, __VA_ARGS__);                         \
+	}                                                          \
+	((char *)_w.dat)[_w.len] = '\0';                           \
+	_w.dat;                                                    \
+})
 
-/* *grubles about not being allowed to nest things* */
-#define _fmt_param_indirect() _fmt_param
-#define _fmt_print2(w, ...) do { LM_FOREACH_PARAM2_(_fmt_param2, (w), __VA_ARGS__) } while (0)
-#define _fmt_param2(...) _LM_DEFER2(_fmt_param_indirect)()(__VA_ARGS__)
+/* justify ********************************************************************/
 
 #define fmt_print_ljust(w, width, fillchar, ...) do { \
 	size_t beg = LO_CALL(w, tell);                \
-	_fmt_print2(w, __VA_ARGS__);                  \
+	fmt_print2(w, __VA_ARGS__);                   \
 	while ((LO_CALL(w, tell) - beg) < width)      \
 		fmt_print_byte(w, fillchar);          \
 } while (0)
@@ -133,10 +148,10 @@ LO_IMPLEMENTATION_H(fmt_dest, struct fmt_buf, fmt_buf);
 #define fmt_print_rjust(w, width, fillchar, ...) do {                          \
 	struct fmt_buf _discard = {};                                          \
 	lo_interface fmt_dest discard = lo_box_fmt_buf_as_fmt_dest(&_discard); \
-	_fmt_print2(discard, __VA_ARGS__);                                     \
+	fmt_print2(discard, __VA_ARGS__);                                      \
 	while (_discard.len++ < width)                                         \
 		fmt_print_byte(w, fillchar);                                   \
-	_fmt_print2(w, __VA_ARGS__);                                           \
+	fmt_print2(w, __VA_ARGS__);                                            \
 } while (0)
 
 void fmt_print_base16_u8_(lo_interface fmt_dest w, uint8_t x);
diff --git a/libmisc/tests/test_fmt.c b/libmisc/tests/test_fmt.c
index 6a6eb7c..64b3b8a 100644
--- a/libmisc/tests/test_fmt.c
+++ b/libmisc/tests/test_fmt.c
@@ -4,6 +4,7 @@
  * SPDX-License-Identifier: AGPL-3.0-or-later
  */
 
+#include <stdlib.h> /* for free() */
 #include <string.h> /* for strcmp(), memcmp(), memset() */
 
 #include <libmisc/fmt.h>
@@ -62,6 +63,26 @@ int main() {
 	test_assert(strcmp(str, "\"hell\"") == 0);
 	memset(str, 0, sizeof(str));
 
+	do_print((qstr, "hello\xFFworld🚧"));
+	test_assert(strcmp(str, "\"hello\\xFFworld\\U0001F6A7\"") == 0);
+	memset(str, 0, sizeof(str));
+
+	do_print((qstr, "¡hello world!"));
+	test_assert(strcmp(str, "\"\\u00A1hello world!\"") == 0);
+	memset(str, 0, sizeof(str));
+
+	do_print((qmem, "🚧", 3)); /* truncated UTF-8 */
+	test_assert(strcmp(str, "\"\\xF0\\x9F\\x9A\"") == 0);
+	memset(str, 0, sizeof(str));
+
+	do_print((qmem, "\xF7\xBF\xBF\xBF", 4)); /* over unicode_max */
+	test_assert(strcmp(str, "\"\\xF7\\xBF\\xBF\\xBF\"") == 0);
+	memset(str, 0, sizeof(str));
+
+	do_print((qmem, "\xE0\xA0", 2)); /* non-optimal encoding (of ' ') */
+	test_assert(strcmp(str, "\"\\xE0\\xA0\"") == 0);
+	memset(str, 0, sizeof(str));
+
 	do_print((byte, 'h'), (byte, 'w'));
 	test_assert(strcmp(str, "hw") == 0);
 	memset(str, 0, sizeof(str));
@@ -70,6 +91,26 @@ int main() {
 	test_assert(strcmp(str, "'h''w'") == 0);
 	memset(str, 0, sizeof(str));
 
+	do_print((qbyte, 0));
+	test_assert(strcmp(str, "'\\0'") == 0);
+	memset(str, 0, sizeof(str));
+
+	do_print((qbyte, '\\'));
+	test_assert(strcmp(str, "'\\\\'") == 0);
+	memset(str, 0, sizeof(str));
+
+	do_print((qbyte, '\''));
+	test_assert(strcmp(str, "'\\''") == 0);
+	memset(str, 0, sizeof(str));
+
+	do_print((qbyte, '\n'));
+	test_assert(strcmp(str, "'\\n'") == 0);
+	memset(str, 0, sizeof(str));
+
+	do_print((qbyte, 0xff));
+	test_assert(strcmp(str, "'\\xFF'") == 0);
+	memset(str, 0, sizeof(str));
+
 	do_print("zero ", 0);
 	test_assert(strcmp(str, "zero 0") == 0);
 	memset(str, 0, sizeof(str));
@@ -166,5 +207,37 @@ int main() {
 	test_assert(strcmp(str, "        1x") == 0);
 	memset(str, 0, sizeof(str));
 
+	do_print((base16_u8_, 1));
+	test_assert(strcmp(str, "0x01") == 0);
+	memset(str, 0, sizeof(str));
+
+	do_print((base16_u16_, 1));
+	test_assert(strcmp(str, "0x0001") == 0);
+	memset(str, 0, sizeof(str));
+
+	do_print((base16_u32_, 1));
+	test_assert(strcmp(str, "0x00000001") == 0);
+	memset(str, 0, sizeof(str));
+
+	do_print((base16_u64_, 1));
+	test_assert(strcmp(str, "0x0000000000000001") == 0);
+	memset(str, 0, sizeof(str));
+
+	do_print((hbyte, 1));
+	test_assert(strcmp(str, "0x01") == 0);
+	memset(str, 0, sizeof(str));
+
+	do_print((hmem, "hello", 6));
+	test_assert(strcmp(str, "{0x68,0x65,0x6C,0x6C,0x6F,0x00}") == 0);
+	memset(str, 0, sizeof(str));
+
+	char *astr = fmt_asprint("");
+	test_assert(astr != NULL && astr[0] == '\0');
+	free(astr);
+
+	astr = fmt_asprint("hello ", (base2, 9), (qstr, " world!\n"));
+	test_assert(strcmp(astr, "hello 1001\" world!\\n\"") == 0);
+	free(astr);
+
 	return 0;
 }
diff --git a/libmisc/utf8.c b/libmisc/utf8.c
index 5f91021..28357f0 100644
--- a/libmisc/utf8.c
+++ b/libmisc/utf8.c
@@ -9,17 +9,21 @@
 void utf8_decode_codepoint(const uint8_t *str, size_t len, uint32_t *ret_ch, uint8_t *ret_chlen) {
 	uint32_t ch;
 	uint8_t  chlen;
-	if      ((str[0] & 0b10000000) == 0b00000000) { ch = str[0] & 0b01111111; chlen = 1; }
-	else if ((str[0] & 0b11100000) == 0b11000000) { ch = str[0] & 0b00011111; chlen = 2; }
-	else if ((str[0] & 0b11110000) == 0b11100000) { ch = str[0] & 0b00001111; chlen = 3; }
-	else if ((str[0] & 0b11111000) == 0b11110000) { ch = str[0] & 0b00000111; chlen = 4; }
+	uint32_t chmin;
+	if      ((str[0] & 0b10000000) == 0b00000000) { ch = str[0] & 0b01111111; chlen = 1; chmin = 0;               } /* bits=7+(0*6)= 7 */
+	else if ((str[0] & 0b11100000) == 0b11000000) { ch = str[0] & 0b00011111; chlen = 2; chmin = UINT32_C(1)<< 7; } /* bits=5+(1*6)=11 */
+	else if ((str[0] & 0b11110000) == 0b11100000) { ch = str[0] & 0b00001111; chlen = 3; chmin = UINT32_C(1)<<11; } /* bits=4+(2*6)=16 */
+	else if ((str[0] & 0b11111000) == 0b11110000) { ch = str[0] & 0b00000111; chlen = 4; chmin = UINT32_C(1)<<16; } /* bits=3+(3*6)=21 */
 	else goto invalid;
-	if ((ch == 0 && chlen != 1) || chlen > len) goto invalid;
+	if (chlen > len)
+		goto invalid;
 	for (uint8_t i = 1; i < chlen; i++) {
-		if ((str[i] & 0b11000000) != 0b10000000) goto invalid;
+		if ((str[i] & 0b11000000) != 0b10000000)
+			goto invalid;
 		ch = (ch << 6) | (str[i] & 0b00111111);
 	}
-	if (ch > 0x10FFFF) goto invalid;
+	if (ch > 0x10FFFF || ch < chmin)
+		goto invalid;
 	*ret_ch    = ch;
 	*ret_chlen = chlen;
 	return;