diff options
-rw-r--r-- | GNUmakefile | 1 | ||||
-rw-r--r-- | lib9p/core_gen/c_validate.py | 2 | ||||
-rw-r--r-- | lib9p/core_generated.c | 12 | ||||
-rw-r--r-- | lib9p/core_include/lib9p/core.h | 12 | ||||
-rw-r--r-- | lib9p/srv_include/lib9p/srv.h | 2 | ||||
-rw-r--r-- | libdhcp/tests/test_client.c | 9 | ||||
-rw-r--r-- | libmisc/fmt.c | 30 | ||||
-rw-r--r-- | libmisc/include/libmisc/fmt.h | 31 | ||||
-rw-r--r-- | libmisc/tests/test_fmt.c | 73 | ||||
-rw-r--r-- | libmisc/utf8.c | 18 |
10 files changed, 154 insertions, 36 deletions
diff --git a/GNUmakefile b/GNUmakefile index 146f725..758c5aa 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -107,6 +107,7 @@ check: # `gcovr` is a Python script that calls `gcov` and merges and post-processes the .gcov files to other formats gcovr_flags = --txt=$(@D)/coverage.txt gcovr_flags += --html=$(@D)/coverage.html --html-details --html-single-page=js-enabled +gcovr_flags += --sort uncovered-number --sort-reverse $(foreach t,$(build_types),$(foreach p,$(platforms),build/$p-$t/check)): build/%/check: build/%/build ./build-aux/gcov-prune $(@D) +cd $(@D) && ctest --output-on-failure $(if $(filter --jobserver-auth=%,$(MAKEFLAGS)),--parallel) diff --git a/lib9p/core_gen/c_validate.py b/lib9p/core_gen/c_validate.py index e7a4017..8997237 100644 --- a/lib9p/core_gen/c_validate.py +++ b/lib9p/core_gen/c_validate.py @@ -67,7 +67,7 @@ def gen_c_validate(versions: set[str], typs: list[idl.UserType]) -> str: "\t\tsize_t len = n;\n" "\t\tVALIDATE_NET_BYTES(len);\n" "\t\tif (!utf8_is_valid_without_nul(&net_bytes[net_offset-len], len))\n" - f'\t\t\treturn lib9p_error(ctx, {c9util.IDENT("ERRNO_L_EBADMSG")}, "message contains invalid UTF-8");\n' + f'\t\t\treturn lib9p_error(ctx, {c9util.IDENT("ERRNO_L_EILSEQ")}, "message contains invalid UTF-8");\n' "\t}\n" ) ret += cutil.macro( diff --git a/lib9p/core_generated.c b/lib9p/core_generated.c index 6e3633f..ad7b210 100644 --- a/lib9p/core_generated.c +++ b/lib9p/core_generated.c @@ -229,12 +229,12 @@ static const lib9p_lock_flags_t lock_flags_masks[LIB9P_VER_NUM] = { return lib9p_error(ctx, LIB9P_ERRNO_L_EBADMSG, "message is too short for content"); \ if (net_offset > net_size) \ return lib9p_error(ctx, LIB9P_ERRNO_L_EBADMSG, "message is too short for content (", net_offset, " > ", net_size, ")"); -#define VALIDATE_NET_UTF8(n) \ - { \ - size_t len = n; \ - VALIDATE_NET_BYTES(len); \ - if (!utf8_is_valid_without_nul(&net_bytes[net_offset-len], len)) \ - return lib9p_error(ctx, LIB9P_ERRNO_L_EBADMSG, "message contains invalid UTF-8"); \ +#define VALIDATE_NET_UTF8(n) \ + { \ + size_t len = n; \ + VALIDATE_NET_BYTES(len); \ + if (!utf8_is_valid_without_nul(&net_bytes[net_offset-len], len)) \ + return lib9p_error(ctx, LIB9P_ERRNO_L_EILSEQ, "message contains invalid UTF-8"); \ } #define RESERVE_HOST_BYTES(n) \ if (__builtin_add_overflow(host_size, n, &host_size)) \ diff --git a/lib9p/core_include/lib9p/core.h b/lib9p/core_include/lib9p/core.h index 4941220..afefa2b 100644 --- a/lib9p/core_include/lib9p/core.h +++ b/lib9p/core_include/lib9p/core.h @@ -98,11 +98,7 @@ void fmt_print_lib9p_msg(lo_interface fmt_dest w, struct lib9p_ctx *ctx, enum li * number may be larger than net_bytes due to (1) struct padding, (2) * array pointers. * - * Emits an error (return -1, set ctx->err_num and ctx->err_msg) if - * either the message type is unknown, or if net_bytes is too short - * for that message type, or if an invalid string (invalid UTF-8, - * contains a nul-byte) is encountered. - * + * @param ctx : negotiated protocol parameters, where to record errors * @param net_bytes : the complete request, starting with the "size[4]" * * @return required size, or -1 on error @@ -110,7 +106,7 @@ void fmt_print_lib9p_msg(lo_interface fmt_dest w, struct lib9p_ctx *ctx, enum li * @errno L_EOPNOTSUPP: message is an R-message * @errno L_EOPNOTSUPP: message has unknown type * @errno L_EBADMSG: message is wrong size for content - * @errno L_EBADMSG: message contains invalid UTF-8 + * @errno L_EILSEQ: message contains invalid UTF-8, or the UTF-8 contains a nul-byte * @errno L_EBADMSG: message contains a bitfield with unknown bits * @errno L_EMSGSIZE: would-be return value overflows SSIZE_MAX */ @@ -142,9 +138,9 @@ void lib9p_Tmsg_unmarshal(struct lib9p_ctx *ctx, uint8_t *net_bytes, * * @param ctx : negotiated protocol parameters, where to record errors * @param typ : the message type - * @param msg : the message to encode + * @param msg : the message to encode (`struct lib9p_msg_XXXX` according to `typ`) * - * @return ret_bytes : the buffer to encode to, must be at be at least ctx->max_msg_size bytes + * @return ret : the buffer to encode to * @return whether there was an error (false=success, true=error) * * @errno L_ERANGE: reply does not fit in ctx->max_msg_size diff --git a/lib9p/srv_include/lib9p/srv.h b/lib9p/srv_include/lib9p/srv.h index eb87d6f..89dc986 100644 --- a/lib9p/srv_include/lib9p/srv.h +++ b/lib9p/srv_include/lib9p/srv.h @@ -185,7 +185,7 @@ LO_INTERFACE(lib9p_srv_fio); /*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>*/ LO_FUNC(void , iofree ) \ /** \ * Return the idx-th dirent. idx will always be either 0 or \ - * prev_idx+1. A dirrent with an empty name signals EOF. The string \ + * prev_idx+1. A dirent with an empty name signals EOF. The string \ * must remain valid until the next dread() call or iofree(). \ */ \ LO_FUNC(struct lib9p_srv_dirent , dread , struct lib9p_srv_ctx *, \ diff --git a/libdhcp/tests/test_client.c b/libdhcp/tests/test_client.c index 90efd01..24b3af6 100644 --- a/libdhcp/tests/test_client.c +++ b/libdhcp/tests/test_client.c @@ -24,6 +24,9 @@ LO_IMPLEMENTATION_H(net_packet_conn, struct test_udp, test_udp); LO_IMPLEMENTATION_C(net_packet_conn, struct test_udp, test_udp, static); static ssize_t test_udp_sendto(struct test_udp *LM_UNUSED(self), void *LM_UNUSED(buf), size_t len, struct net_ip4_addr LM_UNUSED(node), uint16_t LM_UNUSED(port)) { + static unsigned cnt = 0; + if (cnt++ % 2 == 0) + return -NET_EOTHER; return len; } @@ -35,8 +38,10 @@ static ssize_t test_udp_recvfrom(struct test_udp *LM_UNUSED(self), void *buf, si const void *resp; size_t resp_len; switch (cnt++) { - case 0: resp = resp_offer; resp_len = sizeof(resp_offer); break; - case 1: resp = resp_ack; resp_len = sizeof(resp_ack); break; + case 0: return -NET_EOTHER; + case 1: resp = resp_offer; resp_len = sizeof(resp_offer); break; + case 2: return -NET_EOTHER; + case 3: resp = resp_ack; resp_len = sizeof(resp_ack); break; default: return -NET_ERECV_TIMEOUT; } test_assert(len >= resp_len); diff --git a/libmisc/fmt.c b/libmisc/fmt.c index a8baa84..7c18ef5 100644 --- a/libmisc/fmt.c +++ b/libmisc/fmt.c @@ -71,9 +71,33 @@ void fmt_print_ptr(lo_interface fmt_dest w, void *ptr) { */ void fmt_print_qbyte(lo_interface fmt_dest w, uint8_t b) { fmt_print_byte(w, '\''); - if (' ' <= b && b <= '~') { - if (b == '\'' || b == '\\') - fmt_print_byte(w, '\\'); + if (b == '\0' || + b == '\b' || + b == '\f' || + b == '\n' || + b == '\r' || + b == '\t' || + b == '\v' || + b == '\\' || + b == '\'' || + b == '"' || + b == '?') { + fmt_print_byte(w, '\\'); + switch (b) { + case '\0': fmt_print_byte(w, '0'); break; + case '\a': fmt_print_byte(w, 'a'); break; + case '\b': fmt_print_byte(w, 'b'); break; + case '\f': fmt_print_byte(w, 'f'); break; + case '\n': fmt_print_byte(w, 'n'); break; + case '\r': fmt_print_byte(w, 'r'); break; + case '\t': fmt_print_byte(w, 't'); break; + case '\v': fmt_print_byte(w, 'v'); break; + case '\\': fmt_print_byte(w, '\\'); break; + case '\'': fmt_print_byte(w, '\''); break; + case '"': fmt_print_byte(w, '"'); break; + case '?': fmt_print_byte(w, '?'); break; + } + } else if (' ' <= b && b <= '~') { fmt_print_byte(w, b); } else { fmt_print_byte(w, '\\'); diff --git a/libmisc/include/libmisc/fmt.h b/libmisc/include/libmisc/fmt.h index c0743ff..6c04d99 100644 --- a/libmisc/include/libmisc/fmt.h +++ b/libmisc/include/libmisc/fmt.h @@ -9,6 +9,7 @@ #include <stddef.h> /* for size_t */ #include <stdint.h> /* for (u)int{n}_t */ +#include <stdlib.h> /* for realloc() */ #include <libmisc/macro.h> #include <libmisc/obj.h> @@ -99,6 +100,11 @@ void fmt_print_bool(lo_interface fmt_dest w, bool b); const char * : fmt_print_str , \ bool : fmt_print_bool )(w, val) +/** Same as fmt_print(), but usable from inside of fmt_print(). */ +#define fmt_print2(w, ...) do { LM_FOREACH_PARAM2_(_fmt_param2, (w), __VA_ARGS__) } while (0) +#define _fmt_param2(...) _LM_DEFER2(_fmt_param_indirect)()(__VA_ARGS__) +#define _fmt_param_indirect() _fmt_param + /* print-to-memory ************************************************************/ struct fmt_buf { @@ -116,16 +122,25 @@ LO_IMPLEMENTATION_H(fmt_dest, struct fmt_buf, fmt_buf); _w.len; \ }) -/* justify ********************************************************************/ +#define fmt_asprint(...) ({ \ + struct fmt_buf _w = {}; \ + lo_interface fmt_dest w = lo_box_fmt_buf_as_fmt_dest(&_w); \ + fmt_print(w, __VA_ARGS__); \ + while (_w.cap <= _w.len) { \ + _w.cap = _w.len + 1; \ + _w.len = 0; \ + _w.dat = realloc(_w.dat, _w.cap); \ + fmt_print(w, __VA_ARGS__); \ + } \ + ((char *)_w.dat)[_w.len] = '\0'; \ + _w.dat; \ +}) -/* *grubles about not being allowed to nest things* */ -#define _fmt_param_indirect() _fmt_param -#define _fmt_print2(w, ...) do { LM_FOREACH_PARAM2_(_fmt_param2, (w), __VA_ARGS__) } while (0) -#define _fmt_param2(...) _LM_DEFER2(_fmt_param_indirect)()(__VA_ARGS__) +/* justify ********************************************************************/ #define fmt_print_ljust(w, width, fillchar, ...) do { \ size_t beg = LO_CALL(w, tell); \ - _fmt_print2(w, __VA_ARGS__); \ + fmt_print2(w, __VA_ARGS__); \ while ((LO_CALL(w, tell) - beg) < width) \ fmt_print_byte(w, fillchar); \ } while (0) @@ -133,10 +148,10 @@ LO_IMPLEMENTATION_H(fmt_dest, struct fmt_buf, fmt_buf); #define fmt_print_rjust(w, width, fillchar, ...) do { \ struct fmt_buf _discard = {}; \ lo_interface fmt_dest discard = lo_box_fmt_buf_as_fmt_dest(&_discard); \ - _fmt_print2(discard, __VA_ARGS__); \ + fmt_print2(discard, __VA_ARGS__); \ while (_discard.len++ < width) \ fmt_print_byte(w, fillchar); \ - _fmt_print2(w, __VA_ARGS__); \ + fmt_print2(w, __VA_ARGS__); \ } while (0) void fmt_print_base16_u8_(lo_interface fmt_dest w, uint8_t x); diff --git a/libmisc/tests/test_fmt.c b/libmisc/tests/test_fmt.c index 6a6eb7c..64b3b8a 100644 --- a/libmisc/tests/test_fmt.c +++ b/libmisc/tests/test_fmt.c @@ -4,6 +4,7 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ +#include <stdlib.h> /* for free() */ #include <string.h> /* for strcmp(), memcmp(), memset() */ #include <libmisc/fmt.h> @@ -62,6 +63,26 @@ int main() { test_assert(strcmp(str, "\"hell\"") == 0); memset(str, 0, sizeof(str)); + do_print((qstr, "hello\xFFworld🚧")); + test_assert(strcmp(str, "\"hello\\xFFworld\\U0001F6A7\"") == 0); + memset(str, 0, sizeof(str)); + + do_print((qstr, "¡hello world!")); + test_assert(strcmp(str, "\"\\u00A1hello world!\"") == 0); + memset(str, 0, sizeof(str)); + + do_print((qmem, "🚧", 3)); /* truncated UTF-8 */ + test_assert(strcmp(str, "\"\\xF0\\x9F\\x9A\"") == 0); + memset(str, 0, sizeof(str)); + + do_print((qmem, "\xF7\xBF\xBF\xBF", 4)); /* over unicode_max */ + test_assert(strcmp(str, "\"\\xF7\\xBF\\xBF\\xBF\"") == 0); + memset(str, 0, sizeof(str)); + + do_print((qmem, "\xE0\xA0", 2)); /* non-optimal encoding (of ' ') */ + test_assert(strcmp(str, "\"\\xE0\\xA0\"") == 0); + memset(str, 0, sizeof(str)); + do_print((byte, 'h'), (byte, 'w')); test_assert(strcmp(str, "hw") == 0); memset(str, 0, sizeof(str)); @@ -70,6 +91,26 @@ int main() { test_assert(strcmp(str, "'h''w'") == 0); memset(str, 0, sizeof(str)); + do_print((qbyte, 0)); + test_assert(strcmp(str, "'\\0'") == 0); + memset(str, 0, sizeof(str)); + + do_print((qbyte, '\\')); + test_assert(strcmp(str, "'\\\\'") == 0); + memset(str, 0, sizeof(str)); + + do_print((qbyte, '\'')); + test_assert(strcmp(str, "'\\''") == 0); + memset(str, 0, sizeof(str)); + + do_print((qbyte, '\n')); + test_assert(strcmp(str, "'\\n'") == 0); + memset(str, 0, sizeof(str)); + + do_print((qbyte, 0xff)); + test_assert(strcmp(str, "'\\xFF'") == 0); + memset(str, 0, sizeof(str)); + do_print("zero ", 0); test_assert(strcmp(str, "zero 0") == 0); memset(str, 0, sizeof(str)); @@ -166,5 +207,37 @@ int main() { test_assert(strcmp(str, " 1x") == 0); memset(str, 0, sizeof(str)); + do_print((base16_u8_, 1)); + test_assert(strcmp(str, "0x01") == 0); + memset(str, 0, sizeof(str)); + + do_print((base16_u16_, 1)); + test_assert(strcmp(str, "0x0001") == 0); + memset(str, 0, sizeof(str)); + + do_print((base16_u32_, 1)); + test_assert(strcmp(str, "0x00000001") == 0); + memset(str, 0, sizeof(str)); + + do_print((base16_u64_, 1)); + test_assert(strcmp(str, "0x0000000000000001") == 0); + memset(str, 0, sizeof(str)); + + do_print((hbyte, 1)); + test_assert(strcmp(str, "0x01") == 0); + memset(str, 0, sizeof(str)); + + do_print((hmem, "hello", 6)); + test_assert(strcmp(str, "{0x68,0x65,0x6C,0x6C,0x6F,0x00}") == 0); + memset(str, 0, sizeof(str)); + + char *astr = fmt_asprint(""); + test_assert(astr != NULL && astr[0] == '\0'); + free(astr); + + astr = fmt_asprint("hello ", (base2, 9), (qstr, " world!\n")); + test_assert(strcmp(astr, "hello 1001\" world!\\n\"") == 0); + free(astr); + return 0; } diff --git a/libmisc/utf8.c b/libmisc/utf8.c index 5f91021..28357f0 100644 --- a/libmisc/utf8.c +++ b/libmisc/utf8.c @@ -9,17 +9,21 @@ void utf8_decode_codepoint(const uint8_t *str, size_t len, uint32_t *ret_ch, uint8_t *ret_chlen) { uint32_t ch; uint8_t chlen; - if ((str[0] & 0b10000000) == 0b00000000) { ch = str[0] & 0b01111111; chlen = 1; } - else if ((str[0] & 0b11100000) == 0b11000000) { ch = str[0] & 0b00011111; chlen = 2; } - else if ((str[0] & 0b11110000) == 0b11100000) { ch = str[0] & 0b00001111; chlen = 3; } - else if ((str[0] & 0b11111000) == 0b11110000) { ch = str[0] & 0b00000111; chlen = 4; } + uint32_t chmin; + if ((str[0] & 0b10000000) == 0b00000000) { ch = str[0] & 0b01111111; chlen = 1; chmin = 0; } /* bits=7+(0*6)= 7 */ + else if ((str[0] & 0b11100000) == 0b11000000) { ch = str[0] & 0b00011111; chlen = 2; chmin = UINT32_C(1)<< 7; } /* bits=5+(1*6)=11 */ + else if ((str[0] & 0b11110000) == 0b11100000) { ch = str[0] & 0b00001111; chlen = 3; chmin = UINT32_C(1)<<11; } /* bits=4+(2*6)=16 */ + else if ((str[0] & 0b11111000) == 0b11110000) { ch = str[0] & 0b00000111; chlen = 4; chmin = UINT32_C(1)<<16; } /* bits=3+(3*6)=21 */ else goto invalid; - if ((ch == 0 && chlen != 1) || chlen > len) goto invalid; + if (chlen > len) + goto invalid; for (uint8_t i = 1; i < chlen; i++) { - if ((str[i] & 0b11000000) != 0b10000000) goto invalid; + if ((str[i] & 0b11000000) != 0b10000000) + goto invalid; ch = (ch << 6) | (str[i] & 0b00111111); } - if (ch > 0x10FFFF) goto invalid; + if (ch > 0x10FFFF || ch < chmin) + goto invalid; *ret_ch = ch; *ret_chlen = chlen; return; |