From d559c50a98e65ce889411b46ab108b392907e0f0 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Tue, 24 Sep 2024 09:51:37 -0600 Subject: wip 9p --- .gitignore | 1 + 3rd-party/linux-errno.txt | 264 ++++++++++----------- 3rd-party/linux-errno.txt.gen | 14 ++ 9p/.editorconfig | 2 +- 9p/.gitignore | 5 +- 9p/9P2000.txt | 2 +- 9p/9p.h | 15 ++ 9p/defs.c | 89 +++++++ 9p/defs.gen | 456 +++++++++++++++++++++++++++++++++++ 9p/defs.h | 77 ++++++ 9p/generate | 541 ------------------------------------------ 9p/internal.h | 114 +++++++++ 9p/linux-errno.h.gen | 15 +- 9p/srv.h | 19 ++ Makefile | 12 +- README.md | 49 ++++ srv9p.c | 2 +- 17 files changed, 982 insertions(+), 695 deletions(-) create mode 100755 3rd-party/linux-errno.txt.gen create mode 100644 9p/9p.h create mode 100644 9p/defs.c create mode 100755 9p/defs.gen create mode 100644 9p/defs.h delete mode 100755 9p/generate create mode 100644 9p/internal.h diff --git a/.gitignore b/.gitignore index 1891c8c..fa00101 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.o *.log +.mypy_cache/ /build/ diff --git a/3rd-party/linux-errno.txt b/3rd-party/linux-errno.txt index 34b297d..b44da96 100644 --- a/3rd-party/linux-errno.txt +++ b/3rd-party/linux-errno.txt @@ -1,132 +1,132 @@ -# Generated from linux.git v6.7. DO NOT EDIT! -1 EPERM -2 ENOENT -3 ESRCH -4 EINTR -5 EIO -6 ENXIO -7 E2BIG -8 ENOEXEC -9 EBADF -10 ECHILD -11 EAGAIN -12 ENOMEM -13 EACCES -14 EFAULT -15 ENOTBLK -16 EBUSY -17 EEXIST -18 EXDEV -19 ENODEV -20 ENOTDIR -21 EISDIR -22 EINVAL -23 ENFILE -24 EMFILE -25 ENOTTY -26 ETXTBSY -27 EFBIG -28 ENOSPC -29 ESPIPE -30 EROFS -31 EMLINK -32 EPIPE -33 EDOM -34 ERANGE -35 EDEADLK -36 ENAMETOOLONG -37 ENOLCK -38 ENOSYS -39 ENOTEMPTY -40 ELOOP -42 ENOMSG -43 EIDRM -44 ECHRNG -45 EL2NSYNC -46 EL3HLT -47 EL3RST -48 ELNRNG -49 EUNATCH -50 ENOCSI -51 EL2HLT -52 EBADE -53 EBADR -54 EXFULL -55 ENOANO -56 EBADRQC -57 EBADSLT -59 EBFONT -60 ENOSTR -61 ENODATA -62 ETIME -63 ENOSR -64 ENONET -65 ENOPKG -66 EREMOTE -67 ENOLINK -68 EADV -69 ESRMNT -70 ECOMM -71 EPROTO -72 EMULTIHOP -73 EDOTDOT -74 EBADMSG -75 EOVERFLOW -76 ENOTUNIQ -77 EBADFD -78 EREMCHG -79 ELIBACC -80 ELIBBAD -81 ELIBSCN -82 ELIBMAX -83 ELIBEXEC -84 EILSEQ -85 ERESTART -86 ESTRPIPE -87 EUSERS -88 ENOTSOCK -89 EDESTADDRREQ -90 EMSGSIZE -91 EPROTOTYPE -92 ENOPROTOOPT -93 EPROTONOSUPPORT -94 ESOCKTNOSUPPORT -95 EOPNOTSUPP -96 EPFNOSUPPORT -97 EAFNOSUPPORT -98 EADDRINUSE -99 EADDRNOTAVAIL -100 ENETDOWN -101 ENETUNREACH -102 ENETRESET -103 ECONNABORTED -104 ECONNRESET -105 ENOBUFS -106 EISCONN -107 ENOTCONN -108 ESHUTDOWN -109 ETOOMANYREFS -110 ETIMEDOUT -111 ECONNREFUSED -112 EHOSTDOWN -113 EHOSTUNREACH -114 EALREADY -115 EINPROGRESS -116 ESTALE -117 EUCLEAN -118 ENOTNAM -119 ENAVAIL -120 EISNAM -121 EREMOTEIO -122 EDQUOT -123 ENOMEDIUM -124 EMEDIUMTYPE -125 ECANCELED -126 ENOKEY -127 EKEYEXPIRED -128 EKEYREVOKED -129 EKEYREJECTED -130 EOWNERDEAD -131 ENOTRECOVERABLE -132 ERFKILL -133 EHWPOISON +# 3rd-party/linux-errno.txt - Generated from linux.git v6.7. DO NOT EDIT! +1 EPERM Operation not permitted +2 ENOENT No such file or directory +3 ESRCH No such process +4 EINTR Interrupted system call +5 EIO I/O error +6 ENXIO No such device or address +7 E2BIG Argument list too long +8 ENOEXEC Exec format error +9 EBADF Bad file number +10 ECHILD No child processes +11 EAGAIN Try again +12 ENOMEM Out of memory +13 EACCES Permission denied +14 EFAULT Bad address +15 ENOTBLK Block device required +16 EBUSY Device or resource busy +17 EEXIST File exists +18 EXDEV Cross-device link +19 ENODEV No such device +20 ENOTDIR Not a directory +21 EISDIR Is a directory +22 EINVAL Invalid argument +23 ENFILE File table overflow +24 EMFILE Too many open files +25 ENOTTY Not a typewriter +26 ETXTBSY Text file busy +27 EFBIG File too large +28 ENOSPC No space left on device +29 ESPIPE Illegal seek +30 EROFS Read-only file system +31 EMLINK Too many links +32 EPIPE Broken pipe +33 EDOM Math argument out of domain of func +34 ERANGE Math result not representable +35 EDEADLK Resource deadlock would occur +36 ENAMETOOLONG File name too long +37 ENOLCK No record locks available +38 ENOSYS Invalid system call number +39 ENOTEMPTY Directory not empty +40 ELOOP Too many symbolic links encountered +42 ENOMSG No message of desired type +43 EIDRM Identifier removed +44 ECHRNG Channel number out of range +45 EL2NSYNC Level 2 not synchronized +46 EL3HLT Level 3 halted +47 EL3RST Level 3 reset +48 ELNRNG Link number out of range +49 EUNATCH Protocol driver not attached +50 ENOCSI No CSI structure available +51 EL2HLT Level 2 halted +52 EBADE Invalid exchange +53 EBADR Invalid request descriptor +54 EXFULL Exchange full +55 ENOANO No anode +56 EBADRQC Invalid request code +57 EBADSLT Invalid slot +59 EBFONT Bad font file format +60 ENOSTR Device not a stream +61 ENODATA No data available +62 ETIME Timer expired +63 ENOSR Out of streams resources +64 ENONET Machine is not on the network +65 ENOPKG Package not installed +66 EREMOTE Object is remote +67 ENOLINK Link has been severed +68 EADV Advertise error +69 ESRMNT Srmount error +70 ECOMM Communication error on send +71 EPROTO Protocol error +72 EMULTIHOP Multihop attempted +73 EDOTDOT RFS specific error +74 EBADMSG Not a data message +75 EOVERFLOW Value too large for defined data type +76 ENOTUNIQ Name not unique on network +77 EBADFD File descriptor in bad state +78 EREMCHG Remote address changed +79 ELIBACC Can not access a needed shared library +80 ELIBBAD Accessing a corrupted shared library +81 ELIBSCN .lib section in a.out corrupted +82 ELIBMAX Attempting to link in too many shared libraries +83 ELIBEXEC Cannot exec a shared library directly +84 EILSEQ Illegal byte sequence +85 ERESTART Interrupted system call should be restarted +86 ESTRPIPE Streams pipe error +87 EUSERS Too many users +88 ENOTSOCK Socket operation on non-socket +89 EDESTADDRREQ Destination address required +90 EMSGSIZE Message too long +91 EPROTOTYPE Protocol wrong type for socket +92 ENOPROTOOPT Protocol not available +93 EPROTONOSUPPORT Protocol not supported +94 ESOCKTNOSUPPORT Socket type not supported +95 EOPNOTSUPP Operation not supported on transport endpoint +96 EPFNOSUPPORT Protocol family not supported +97 EAFNOSUPPORT Address family not supported by protocol +98 EADDRINUSE Address already in use +99 EADDRNOTAVAIL Cannot assign requested address +100 ENETDOWN Network is down +101 ENETUNREACH Network is unreachable +102 ENETRESET Network dropped connection because of reset +103 ECONNABORTED Software caused connection abort +104 ECONNRESET Connection reset by peer +105 ENOBUFS No buffer space available +106 EISCONN Transport endpoint is already connected +107 ENOTCONN Transport endpoint is not connected +108 ESHUTDOWN Cannot send after transport endpoint shutdown +109 ETOOMANYREFS Too many references: cannot splice +110 ETIMEDOUT Connection timed out +111 ECONNREFUSED Connection refused +112 EHOSTDOWN Host is down +113 EHOSTUNREACH No route to host +114 EALREADY Operation already in progress +115 EINPROGRESS Operation now in progress +116 ESTALE Stale file handle +117 EUCLEAN Structure needs cleaning +118 ENOTNAM Not a XENIX named type file +119 ENAVAIL No XENIX semaphores available +120 EISNAM Is a named type file +121 EREMOTEIO Remote I/O error +122 EDQUOT Quota exceeded +123 ENOMEDIUM No medium found +124 EMEDIUMTYPE Wrong medium type +125 ECANCELED Operation Canceled +126 ENOKEY Required key not available +127 EKEYEXPIRED Key has expired +128 EKEYREVOKED Key has been revoked +129 EKEYREJECTED Key was rejected by service +130 EOWNERDEAD Owner died +131 ENOTRECOVERABLE State not recoverable +132 ERFKILL Operation not possible due to RF-kill +133 EHWPOISON Memory page has hardware error diff --git a/3rd-party/linux-errno.txt.gen b/3rd-party/linux-errno.txt.gen new file mode 100755 index 0000000..b03a67e --- /dev/null +++ b/3rd-party/linux-errno.txt.gen @@ -0,0 +1,14 @@ +#!/bin/sh +# 3rd-party/linux-errno.txt.gen - Generate a listing of Linux kernel errnos +# +# Copyright (C) 2024 Luke T. Shumaker +# SPDX-Licence-Identifier: AGPL-3.0-or-later + +set -e +( + cd "$1" + echo "# ${0%.gen} - Generated from linux.git $(git describe). DO NOT EDIT!" + git ls-files include/uapi/ | grep errno | + xargs sed -nE 's,#\s*define\s+(E[A-Z0-9]+)\s+([0-9]+)\s+/\* (.*) \*/,\2 \1 \3,p' | + sort --numeric-sort +) >"${0%.gen}" diff --git a/9p/.editorconfig b/9p/.editorconfig index 3de59d7..3f04564 100644 --- a/9p/.editorconfig +++ b/9p/.editorconfig @@ -1,3 +1,3 @@ -[{generate,linux-errno.h.gen}] +[{defs.gen,linux-errno.h.gen}] indent_style = space indent_size = 4 diff --git a/9p/.gitignore b/9p/.gitignore index c8e8c58..667e81c 100644 --- a/9p/.gitignore +++ b/9p/.gitignore @@ -1,5 +1,2 @@ -/9P2000.c -/9P2000.h -/9P2000.*.c -/9P2000.*.h +/defs-* /linux-errno.h diff --git a/9p/9P2000.txt b/9p/9P2000.txt index 778673f..5f93cdf 100644 --- a/9p/9P2000.txt +++ b/9p/9P2000.txt @@ -25,7 +25,7 @@ version "9P2000" # data (u32le `n`, then `n` bytes of data) -d = "len[8] len*(dat[1])" +d = "len[4] len*(dat[1])" # string (u16le `n`, then `n` bytes of UTF-8) s = "len[2] len*(utf8[1])" diff --git a/9p/9p.h b/9p/9p.h new file mode 100644 index 0000000..63fbd04 --- /dev/null +++ b/9p/9p.h @@ -0,0 +1,15 @@ +/* 9p/9p.h - TODO + * + * Copyright (C) 2024 Luke T. Shumaker + * SPDX-Licence-Identifier: AGPL-3.0-or-later + */ + +#include "9p/linux-errno.h" +#include "9p/defs.h" +#include "9p/defs-9P2000.h" +/*#include "9p/defs-9P2000.u.h"*/ + +#define P9_TYPECODE_FOR_CTYPE(msg) _Generic((in_msg) \ + _P9_TYPECODE_FOR_CTYPE_9P2000(msg) \ + /* _P9_TYPECODE_FOR_CTYPE_9P2000u(msg) */ \ + ) diff --git a/9p/defs.c b/9p/defs.c new file mode 100644 index 0000000..886a0c1 --- /dev/null +++ b/9p/defs.c @@ -0,0 +1,89 @@ +/* 9p/defs.c - TODO + * + * Copyright (C) 2024 Luke T. Shumaker + * SPDX-Licence-Identifier: AGPL-3.0-or-later + */ + +#include /* for PRIu{n} */ +#include /* for va_* */ +#include /* for vsnprintf() */ +#include /* for strncpy() */ + +#include "9p/defs.h" +#include "9p/linux-errno.h" +#include "9p/internal.h" + +static struct version *versions[_P9_VER_CNT] = { + [P9_VER_9P2000] = &version_9P2000, + /* [P9_VER_9P2000u] = &version_9P2000u, */ +}; + +int p9_error(struct p9_ctx *ctx, uint32_t linux_errno, char const *msg) { + strncpy(ctx->err_msg, msg, sizeof(ctx->err_msg)); + ctx->err_msg[sizeof(ctx->err_msg)-1] = '\0'; + ctx->err_num = linux_errno; + return -1; +} + +int p9_errorf(struct p9_ctx *ctx, uint32_t linux_errno, char const *fmt, ...) { + int n; + va_list args; + + va_start(args, fmt); + n = vsnprintf(ctx->err_msg, sizeof(ctx->err_msg), fmt, args); + va_end(args); + if ((size_t)(n+1) < sizeof(ctx->err_msg)) + memset(&ctx->err_msg[n+1], 0, sizeof(ctx->err_msg)-(n+1)); + + ctx->err_num = linux_errno; + + return -1; +} + +size_t p9_unmarshal_size(struct p9_ctx *ctx, uint8_t *net_bytes) { + /* Header */ + uint32_t net_len = decode_u32le(net_bytes); + if (net_len < 7) + return p9_error(ctx, LINUX_EBADMSG, "message is too short"); + uint8_t typ = net_bytes[4]; + uint32_t net_offset = 7; + + /* Body */ + if (!versions[ctx->version]->msgs[typ].unmarshal_extrasize) + return p9_errorf(ctx, LINUX_EOPNOTSUPP, "unknown message type %"PRIu8, typ); + size_t host_size = versions[ctx->version]->msgs[typ].unmarshal_basesize; + if (versions[ctx->version]->msgs[typ].unmarshal_extrasize(net_len, net_bytes, &net_offset, &host_size)) + return p9_error(ctx, LINUX_EBADMSG, "message is too short for content"); + + return host_size; +} + +uint8_t p9_unmarshal(struct p9_ctx *ctx, uint8_t *net_bytes, uint16_t *out_tag, void *out_body) { + /* Header */ + uint8_t typ = net_bytes[4]; + *out_tag = decode_u16le(&net_bytes[5]); + uint32_t net_offset = 7; + + /* Body */ + void *host_extra = out_body + versions[ctx->version]->msgs[typ].unmarshal_basesize; + if (versions[ctx->version]->msgs[typ].unmarshal(net_bytes, &net_offset, &host_extra, out_body)) + return p9_error(ctx, LINUX_EBADMSG, "message contains invalid UTF-8"); + + return typ; +} + +uint32_t _p9_marshal(struct p9_ctx *ctx, uint8_t typ, uint16_t msgid, void *body, uint8_t *out_bytes) { + /* Header */ + out_bytes[4] = typ; + encode_u16le(msgid, &out_bytes[5]); + uint32_t net_offset = 7; + + /* Body */ + if (versions[ctx->version]->msgs[typ].marshal(ctx, body, out_bytes, &net_offset)) + return 0; + + /* Header, again */ + encode_u32le(net_offset, out_bytes); + + return net_offset; +} diff --git a/9p/defs.gen b/9p/defs.gen new file mode 100755 index 0000000..1a8dc69 --- /dev/null +++ b/9p/defs.gen @@ -0,0 +1,456 @@ +#!/usr/bin/env python +# 9p/defs.gen - Generate C marshalers/unmarshalers for a .txt file +# defining a 9P protocol variant. +# +# Copyright (C) 2024 Luke T. Shumaker +# SPDX-Licence-Identifier: AGPL-3.0-or-later + +import enum +import os.path +import re + +PROGRAM = "./9p/defs.gen" + +# Parse the *.txt ############################################################## + + +class Atom(enum.Enum): + u8 = 1 + u16 = 2 + u32 = 4 + u64 = 8 + + @property + def name(self) -> str: + return str(self.value) + + @property + def static_size(self) -> int: + return self.value + + +# `msgid/structname = "member1 member2..."` +# `structname = "member1 member2..."` +# `structname += "member1 member2..."` +class Struct: + msgid: int | None = None + name: str + members: list["Member"] + + @property + def static_size(self) -> int | None: + size = 0 + for member in self.members: + msize = member.static_size + if msize is None: + return None + size += msize + return size + + +# `cnt*(name[typ])` +# the `cnt*(...)` wrapper is optional +class Member: + cnt: str | None = None + name: str + typ: Atom | Struct + + @property + def static_size(self) -> int | None: + if self.cnt: + return None + return self.typ.static_size + + +re_membername = "(?:[a-zA-Z_][a-zA-Z_0-9]*)" +re_memberspec = ( + f"(?:(?P{re_membername})\\*\\()?(?P{re_membername})\\[(?P.*)\\]\\)?" +) + + +def parse_members( + env: dict[str, Atom | Struct], existing: list[Member], specs: str +) -> list[Member]: + ret = existing + for spec in specs.split(): + m = re.fullmatch(re_memberspec, spec) + if not m: + raise SyntaxError(f"invalid member spec {repr(spec)}") + + member = Member() + + member.name = m.group("name") + if any(x.name == member.name for x in ret): + raise ValueError(f"duplicate member name {repr(member.name)}") + + if m.group("typ") not in env: + raise NameError(f"Unknown type {repr(m.group(2))}") + member.typ = env[m.group("typ")] + + if cnt := m.group("cnt"): + if len(ret) == 0 or ret[-1].name != cnt: + raise ValueError(f"list count must be previous item: {repr(cnt)}") + if not isinstance(ret[-1].typ, Atom): + raise ValueError(f"list count must be an integer type: {repr(cnt)}") + member.cnt = cnt + + ret += [member] + return ret + + +re_version = r'version\s+"(?P[^"]+)"' +re_structspec = ( + r'(?:(?P[0-9]+)/)?(?P\S+)\s*(?P\+?=)\s*"(?P[^"]*)"' +) +re_structspec_cont = r'"(?P[^"]*)"' + + +def parse_file(filename: str) -> tuple[str, list[Struct]]: + version: str | None = None + env: dict[str, Atom | Struct] = { + "1": Atom.u8, + "2": Atom.u16, + "4": Atom.u32, + "8": Atom.u64, + } + with open(filename, "r") as fh: + prev: Struct | None = None + for line in fh: + line = line.split("#", 1)[0].strip() + if not line: + continue + if m := re.fullmatch(re_version, line): + if version: + raise SyntaxError("must have exactly 1 version line") + version = m.group("version") + elif m := re.fullmatch(re_structspec, line): + if m.group("op") == "+=" and m.group("msgid"): + raise SyntaxError("cannot += to a message that is not yet defined") + match m.group("op"): + case "=": + struct = Struct() + if m.group("msgid"): + struct.msgid = int(m.group("msgid")) + struct.name = m.group("name") + struct.members = parse_members(env, [], m.group("members")) + env[struct.name] = struct + prev = struct + case "+=": + if m.group("name") not in env: + raise NameError(f"Unknown type {repr(m.group('name'))}") + _struct = env[m.group("name")] + if not isinstance(_struct, Struct): + raise NameError( + f"Type {repr(m.group('name'))} is not a struct" + ) + struct = _struct + struct.members = parse_members( + env, struct.members, m.group("members") + ) + prev = struct + elif m := re.fullmatch(re_structspec_cont, line): + if not prev: + raise SyntaxError("continuation line must come after a struct line") + prev.members = parse_members(env, prev.members, m.group("members")) + else: + raise SyntaxError(f"invalid line {repr(line)}") + if not version: + raise SyntaxError("must have exactly 1 version line") + structs = [x for x in env.values() if isinstance(x, Struct)] + return version, structs + + +# Generate C ################################################################### + + +def c_typename(idprefix: str, typ: Atom | Struct) -> str: + match typ: + case Atom(): + return f"uint{typ.value*8}_t" + case Struct(): + if typ.msgid is not None: + return f"struct {idprefix}msg_{typ.name}" + return f"struct {idprefix}{typ.name}" + case _: + raise ValueError(f"not a type: {typ.__class__.__name__}") + + +def gen_h(txtname: str, idprefix: str, structs: list[Struct]) -> str: + guard = ( + f"_{txtname.replace('.txt', '.h').upper().replace('/', '_').replace('.', '_')}_" + ) + ret = f"""/* Generated by `{PROGRAM} {txtname}`. DO NOT EDIT! */ + +#ifndef {guard} +#define {guard} + +#define {idprefix.upper()}MIN_MSGLEN 7 +""" + ret += """ +/* non-message structs ********************************************************/ + +""" + for struct in structs: + if struct.msgid is not None: + continue + ret += c_typename(idprefix, struct) + " {\n" + typewidth = max( + len(c_typename(idprefix, member.typ)) for member in struct.members + ) + for member in struct.members: + ret += f"\t{c_typename(idprefix, member.typ).ljust(typewidth)} {'*' if member.cnt else ' '}{member.name};\n" + ret += "};\n" + + ret += """ +/* message types **************************************************************/ + +""" + ret += f"enum {idprefix}msg_type {{ /* uint8_t */\n" + namewidth = max(len(msg.name) for msg in structs if msg.msgid is not None) + for msg in structs: + if msg.msgid is None: + continue + ret += f"\t{idprefix.upper()}TYP_{msg.name.ljust(namewidth)} = {msg.msgid},\n" + ret += "};\n" + + ret += """ +/* message structs ************************************************************/ + +""" + for msg in structs: + if msg.msgid is None: + continue + if not msg.members: + ret += c_typename(idprefix, msg) + " {};\n" + continue + ret += c_typename(idprefix, msg) + " {\n" + typewidth = max(len(c_typename(idprefix, member.typ)) for member in msg.members) + for member in msg.members: + ret += f"\t{c_typename(idprefix, member.typ).ljust(typewidth)} {'*' if member.cnt else ' '}{member.name};\n" + ret += "};\n" + + ret += f""" +/* tables *********************************************************************/ + +#define _P9_TYPECODE_FOR_CTYPE(msg) """ + for msg in structs: + if msg.msgid is None: + continue + ret += f", \\\n\t\t{c_typename(idprefix, msg)}: {idprefix.upper()}TYP_{msg.name}" + ret += "\n" + + ret += "\n" + ret += f"#endif /* {guard} */\n" + return ret + + +def gen_c(txtname: str, idprefix: str, structs: list[Struct]) -> str: + txtdir, txtbase = os.path.split(txtname) + header = os.path.join(txtdir, "defs-" + txtbase.replace(".txt", ".h")) + ret = f"""/* Generated by `{PROGRAM} {txtname}`. DO NOT EDIT! */ + +#include +#include +#include /* for malloc() */ + +#include "{header}" +#include "9p/internal.h" +""" + + def used(arg: str) -> str: + return arg + + def unused(arg: str) -> str: + return f"UNUSED({arg})" + + # checksize_* ############################################################## + ret += """ +/* checksize_* ****************************************************************/ + +static inline bool _checksize_list(size_t cnt, _checksize_fn_t fn, size_t host_size, + uint32_t net_len, uint8_t *net_bytes, uint32_t *mut_net_offset, size_t *mut_host_extra) { + for (size_t i = 0; i < cnt; i++) + if (__builtin_add_overflow(*mut_host_extra, host_size, mut_host_extra) + || fn(net_len, net_bytes, mut_net_offset, mut_host_extra)) + return true; + return false; +} + +static inline bool checksize_1(uint32_t net_len, uint8_t *UNUSED(net_bytes), uint32_t *mut_net_offset, size_t *UNUSED(mut_host_extra)) { + return __builtin_add_overflow(*mut_net_offset, 1, mut_net_offset) || net_len < *mut_net_offset; +} +static inline bool checksize_2(uint32_t net_len, uint8_t *UNUSED(net_bytes), uint32_t *mut_net_offset, size_t *UNUSED(mut_host_extra)) { + return __builtin_add_overflow(*mut_net_offset, 2, mut_net_offset) || net_len < *mut_net_offset; +} +static inline bool checksize_4(uint32_t net_len, uint8_t *UNUSED(net_bytes), uint32_t *mut_net_offset, size_t *UNUSED(mut_host_extra)) { + return __builtin_add_overflow(*mut_net_offset, 4, mut_net_offset) || net_len < *mut_net_offset; +} +static inline bool checksize_8(uint32_t net_len, uint8_t *UNUSED(net_bytes), uint32_t *mut_net_offset, size_t *UNUSED(mut_host_extra)) { + return __builtin_add_overflow(*mut_net_offset, 8, mut_net_offset) || net_len < *mut_net_offset; +} + +""" + for struct in structs: + inline = ' inline' if struct.msgid is None else '' + argfn = used if struct.members else unused + ret += f"static{inline} bool checksize_{struct.name}(uint32_t {argfn('net_len')}, uint8_t *{argfn('net_bytes')}, uint32_t *{argfn('mut_net_offset')}, size_t *{argfn('mut_host_extra')}) {{" + if len(struct.members) == 0: + ret += "\n\treturn false;\n" + ret += "}\n" + continue + prefix0 = "\treturn " + prefix1 = "\t || " + prefix2 = "\t " + prefix = prefix0 + prev_size: int | None = None + for member in struct.members: + if member.cnt is not None: + assert prev_size + ret += f"\n{prefix }_checksize_list(decode_u{prev_size*8}le(&net_bytes[(*mut_net_offset)-{prev_size}]), checksize_{member.typ.name}, sizeof({c_typename(idprefix, member.typ)})," + ret += f"\n{prefix2} net_len, net_bytes, mut_net_offset, mut_host_extra)" + else: + ret += f"\n{prefix}checksize_{member.typ.name}(net_len, net_bytes, mut_net_offset, mut_host_extra)" + prefix = prefix1 + prev_size = member.static_size + if struct.name == "s": + ret += ( + f"\n{prefix}__builtin_add_overflow(*mut_host_extra, 1, mut_host_extra)" + ) + ret += ";\n}\n" + + # unmarshal_* ############################################################## + ret += """ +/* unmarshal_* ****************************************************************/ +/* checksize_XXX() should be called before unmarshal_XXX(). */ + +static inline bool unmarshal_1(uint8_t *net_bytes, uint32_t *mut_net_offset, void **UNUSED(mut_host_extra), uint8_t *out) { + *out = decode_u8le(&net_bytes[*mut_net_offset]); + *mut_net_offset += 1; + return false; +} +static inline bool unmarshal_2(uint8_t *net_bytes, uint32_t *mut_net_offset, void **UNUSED(mut_host_extra), uint16_t *out) { + *out = decode_u16le(&net_bytes[*mut_net_offset]); + *mut_net_offset += 2; + return false; +} +static inline bool unmarshal_4(uint8_t *net_bytes, uint32_t *mut_net_offset, void **UNUSED(mut_host_extra), uint32_t *out) { + *out = decode_u32le(&net_bytes[*mut_net_offset]); + *mut_net_offset += 4; + return false; +} +static inline bool unmarshal_8(uint8_t *net_bytes, uint32_t *mut_net_offset, void **UNUSED(mut_host_extra), uint64_t *out) { + *out = decode_u64le(&net_bytes[*mut_net_offset]); + *mut_net_offset += 8; + return false; +} + +""" + for struct in structs: + argfn = used if struct.members else unused + ret += f"static inline bool unmarshal_{struct.name}(uint8_t *{argfn('net_bytes')}, uint32_t *{argfn('mut_net_offset')}, void **{argfn('mut_host_extra')}, {c_typename(idprefix, struct)} *{argfn('out')}) {{\n" + for member in struct.members: + if member.cnt: + ret += f"\tout->{member.name} = *mut_host_extra;\n" + ret += f"\t*mut_host_extra += sizeof(*out->{member.name}) * out->{member.cnt};\n" + ret += f"\tfor (typeof(out->{member.cnt}) i = 0; i < out->{member.cnt}; i++)\n" + ret += f"\t\tif (unmarshal_{member.typ.name}(net_bytes, mut_net_offset, mut_host_extra, &(out->{member.name}[i])))\n" + ret += f"\t\t\treturn true;\n" + if struct.name == "s": + ret += f"\tif (!is_valid_utf8_without_nul(out->{member.name}, out->{member.cnt}))\n" + ret += f"\t\treturn true;\n" + ret += f"\tout->{member.name}[out->{member.cnt}] = '\\0';\n" + else: + ret += f"\tif (unmarshal_{member.typ.name}(net_bytes, mut_net_offset, mut_host_extra, &(out->{member.name})))\n" + ret += f"\t\treturn true;\n" + ret += "\treturn false;\n" + ret += "}\n" + + # marshal_* ################################################################ + ret += """ +/* marshal_* ******************************************************************/ + +static inline bool marshal_1(struct p9_ctx *ctx, uint8_t val, uint8_t *out_net_bytes, uint32_t *mut_net_offset) { + if (*mut_net_offset + 1 > ctx->max_msg_size) + return true; + out_net_bytes[*mut_net_offset] = val; + *mut_net_offset += 1; + return false; +} +static inline bool marshal_2(struct p9_ctx *ctx, uint16_t val, uint8_t *out_net_bytes, uint32_t *mut_net_offset) { + if (*mut_net_offset + 2 > ctx->max_msg_size) + return true; + encode_u16le(val, &out_net_bytes[*mut_net_offset]); + *mut_net_offset += 2; + return false; +} +static inline bool marshal_4(struct p9_ctx *ctx, uint32_t val, uint8_t *out_net_bytes, uint32_t *mut_net_offset) { + if (*mut_net_offset + 4 > ctx->max_msg_size) + return true; + encode_u32le(val, &out_net_bytes[*mut_net_offset]); + *mut_net_offset += 4; + return false; +} +static inline bool marshal_8(struct p9_ctx *ctx, uint64_t val, uint8_t *out_net_bytes, uint32_t *mut_net_offset) { + if (*mut_net_offset + 8 > ctx->max_msg_size) + return true; + encode_u64le(val, &out_net_bytes[*mut_net_offset]); + *mut_net_offset += 8; + return false; +} +""" + for struct in structs: + argfn = used if struct.members else unused + ret += f"static inline bool marshal_{struct.name}(struct p9_ctx *{argfn('ctx')}, {c_typename(idprefix, struct)} {argfn('val')}, uint8_t *{argfn('out_net_bytes')}, uint32_t *{argfn('mut_net_offset')}) {{\n" + for member in struct.members: + if member.cnt: + ret += f"\tfor (typeof(val.{member.cnt}) i = 0; i < val.{member.cnt}; i++)\n" + ret += f"\t\tif (marshal_{member.typ.name}(ctx, val.{member.name}[i], out_net_bytes, mut_net_offset))\n" + ret += f"\t\t\treturn true;\n" + else: + ret += f"\tif (marshal_{member.typ.name}(ctx, val.{member.name}, out_net_bytes, mut_net_offset))\n" + ret += f"\t\treturn true;\n" + ret += "\treturn false;\n" + ret += "}\n" + + # tables ################################################################### + ret += """ +/* tables *********************************************************************/ + +""" + for msg in structs: + if msg.msgid is None: + continue + ret += f"static bool _unmarshal_{msg.name}(uint8_t *net_bytes, uint32_t *mut_net_offset, void **mut_host_extra, void *out) {{ return unmarshal_{msg.name}(net_bytes, mut_net_offset, mut_host_extra, ({c_typename(idprefix, msg)} *)out); }}\n" + for msg in structs: + if msg.msgid is None: + continue + ret += f"static bool _marshal_{msg.name}(struct p9_ctx *ctx, void *val, uint8_t *out_net_bytes, uint32_t *mut_net_offset) {{ return marshal_{msg.name}(ctx, *(({c_typename(idprefix, msg)} *)val), out_net_bytes, mut_net_offset); }}\n" + ret += "struct version version_9P2000 = {\n" + ret += "\t.msgs = {\n" + for msg in structs: + if msg.msgid is None: + continue + ret += f"\t\t[{idprefix.upper()}TYP_{msg.name}] = {{ .unmarshal_basesize=sizeof({c_typename(idprefix, msg)}), .unmarshal_extrasize=checksize_{msg.name}, .unmarshal=_unmarshal_{msg.name}, .marshal=_marshal_{msg.name} }},\n" + ret += "\t},\n" + ret += "};\n" + + ############################################################################ + return ret + + +################################################################################ + +if __name__ == "__main__": + import sys + + for txtname in sys.argv[1:]: + txtdir, txtbase = os.path.split(txtname) + version, structs = parse_file(txtname) + with open( + os.path.join(txtdir, "defs-" + txtbase.replace(".txt", ".h")), "w" + ) as fh: + fh.write(gen_h(txtname, "p9_", structs)) + with open( + os.path.join(txtdir, "defs-" + txtbase.replace(".txt", ".c")), "w" + ) as fh: + fh.write(gen_c(txtname, "p9_", structs)) diff --git a/9p/defs.h b/9p/defs.h new file mode 100644 index 0000000..20a6411 --- /dev/null +++ b/9p/defs.h @@ -0,0 +1,77 @@ +/* 9p/defs.h - TODO + * + * Copyright (C) 2024 Luke T. Shumaker + * SPDX-Licence-Identifier: AGPL-3.0-or-later + */ + +#ifndef _9P_DEFS_H_ +#define _9P_DEFS_H_ + +#include + +#define P9_NOTAG ((uint16_t)~0U) +#define P9_NOFID ((uint32_t)~0U) + +enum p9_version { + /* P9_VER_9P1, */ + P9_VER_9P2000, + /*P9_VER_9P2000_u,*/ + /*P9_VER_9P2000_L,*/ + /*P9_VER_9P2000_e,*/ + _P9_VER_CNT, +}; + +struct p9_ctx { + enum p9_version version; + uint32_t max_msg_size; + + uint32_t err_num; + char err_msg[256]; /* I chose 256 arbitrarily. */ +}; + +/** Write an static error into ctx, return -1. */ +int p9_error(struct p9_ctx *ctx, uint32_t linux_errno, char const *msg); +/** Write a printf-style error into ctx, return -1. */ +int p9_errorf(struct p9_ctx *ctx, uint32_t linux_errno, char const *fmt, ...); + +/** + * Return how much space the message at net_bytes will take when + * unmarshaled. This number may be larger than net_bytes due to (1) + * struct padding, (2) nul-terminator byes for strings. + * + * Emits an error (return -1, set ctx->err_num and ctx->err_msg) if + * either the message type is unknown or if net_bytes is too short for + * that message type. + * + * @param net_bytes : the complete request, starting with the "size[4]" + * @return required size, or -1 on error + */ +size_t p9_unmarshal_size(struct p9_ctx *ctx, uint8_t *net_bytes); + +/** + * Unmarshal the 9P message `net_bytes` into the C struct `out_body`. + * + * Emits an error (return 0, set ctx->err_num and ctx->err_msg) if a + * string contains invalid UTF-8 or a nul-byte. + * + * @param net_bytes : the complete message, starting with the "size[4]" + * @param out_tag : the message-ID tag + * @param out_body : the message body, must be at least p9_unmarshal_size() bytes + * @return the message type, or -1 on error + */ +uint8_t p9_unmarshal(struct p9_ctx *ctx, uint8_t *net_bytes, uint16_t *out_tag, void *out_body); + +/** + * Marshal a `struct p9_msg_{type}` structure into a byte-array. + * + * @param struct p9_ctx *ctx : a request context + * @param uint16_t msgid : the message-ID tag + * @param struct p9_msg_{type} msg : the message to encode + * + * @param uint8_t *out_bytes : the buffer to encode to, must be at be at least ctx->max_msg_size bytes + * @return uint32_t : the encoded length, or -1 on error + */ +#define p9_marshal(ctx, msgid, msg, out_bytes) _p9_marshal(ctx, P9_TYPECODE_FOR_CTYPE(msg), msgid, &(msg), out_bytes) +uint32_t _p9_marshal(struct p9_ctx *ctx, uint8_t typ, uint16_t msgid, void *body, uint8_t *out_bytes); + +#endif /* _9P_DEFS_H_ */ diff --git a/9p/generate b/9p/generate deleted file mode 100755 index 6456609..0000000 --- a/9p/generate +++ /dev/null @@ -1,541 +0,0 @@ -#!/usr/bin/env python -# 9p/generate - Generate C marshalers/unmarshalers for a .txt file -# defining a 9P protocol variant. -# -# Copyright (C) 2024 Luke T. Shumaker -# SPDX-Licence-Identifier: AGPL-3.0-or-later - -import enum -import re - -PROGRAM = "./9p/generate" - -# Parse the *.txt ############################################################## - - -class Atom(enum.Enum): - u8 = 1 - u16 = 2 - u32 = 4 - u64 = 8 - - @property - def name(self) -> str: - return str(self.value) - - @property - def static_size(self) -> int: - return self.value - - -# `msgid/structname = "member1 member2..."` -# `structname = "member1 member2..."` -# `structname += "member1 member2..."` -class Struct: - msgid: int | None = None - name: str - members: list["Member"] - - @property - def static_size(self) -> int | None: - size = 0 - for member in self.members: - msize = member.static_size - if msize is None: - return None - size += msize - return size - - -# `cnt*(name[typ])` -# the `cnt*(...)` wrapper is optional -class Member: - cnt: str | None = None - name: str - typ: Atom | Struct - - @property - def static_size(self) -> int | None: - if self.cnt: - return None - return self.typ.static_size - - -re_membername = "(?:[a-zA-Z_][a-zA-Z_0-9]*)" -re_memberspec = ( - f"(?:(?P{re_membername})\\*\\()?(?P{re_membername})\\[(?P.*)\\]\\)?" -) - - -def parse_members( - env: dict[str, Atom | Struct], existing: list[Member], specs: str -) -> list[Member]: - ret = existing - for spec in specs.split(): - m = re.fullmatch(re_memberspec, spec) - if not m: - raise SyntaxError(f"invalid member spec {repr(spec)}") - - member = Member() - - member.name = m.group("name") - if any(x.name == member.name for x in ret): - raise ValueError(f"duplicate member name {repr(member.name)}") - - if m.group("typ") not in env: - raise NameError(f"Unknown type {repr(m.group(2))}") - member.typ = env[m.group("typ")] - - if cnt := m.group("cnt"): - if len(ret) == 0 or ret[-1].name != cnt: - raise ValueError(f"list count must be previous item: {repr(cnt)}") - if not isinstance(ret[-1].typ, Atom): - raise ValueError(f"list count must be an integer type: {repr(cnt)}") - member.cnt = cnt - - ret += [member] - return ret - - -re_version = r'version\s+"(?P[^"]+)"' -re_structspec = ( - r'(?:(?P[0-9]+)/)?(?P\S+)\s*(?P\+?=)\s*"(?P[^"]*)"' -) -re_structspec_cont = r'"(?P[^"]*)"' - - -def parse_file(filename: str) -> tuple[str, list[Struct]]: - version: str | None = None - env: dict[str, Atom | Struct] = { - "1": Atom.u8, - "2": Atom.u16, - "4": Atom.u32, - "8": Atom.u64, - } - with open(filename, "r") as fh: - prev: Struct | None = None - for line in fh: - line = line.split("#", 1)[0].strip() - if not line: - continue - if m := re.fullmatch(re_version, line): - if version: - raise SyntaxError("must have exactly 1 version line") - version = m.group("version") - elif m := re.fullmatch(re_structspec, line): - if m.group("op") == "+=" and m.group("msgid"): - raise SyntaxError("cannot += to a message that is not yet defined") - match m.group("op"): - case "=": - struct = Struct() - if m.group("msgid"): - struct.msgid = int(m.group("msgid")) - struct.name = m.group("name") - struct.members = parse_members(env, [], m.group("members")) - env[struct.name] = struct - prev = struct - case "+=": - if m.group("name") not in env: - raise NameError(f"Unknown type {repr(m.group('name'))}") - _struct = env[m.group("name")] - if not isinstance(_struct, Struct): - raise NameError( - f"Type {repr(m.group('name'))} is not a struct" - ) - struct = _struct - struct.members = parse_members( - env, struct.members, m.group("members") - ) - prev = struct - elif m := re.fullmatch(re_structspec_cont, line): - if not prev: - raise SyntaxError("continuation line must come after a struct line") - prev.members = parse_members(env, prev.members, m.group("members")) - else: - raise SyntaxError(f"invalid line {repr(line)}") - if not version: - raise SyntaxError("must have exactly 1 version line") - structs = [x for x in env.values() if isinstance(x, Struct)] - return version, structs - - -# Generate C ################################################################### - - -def c_typename(idprefix: str, typ: Atom | Struct) -> str: - match typ: - case Atom(): - return f"uint{typ.value*8}_t" - case Struct(): - if typ.msgid is not None: - return f"struct {idprefix}msg_{typ.name}" - return f"struct {idprefix}{typ.name}" - case _: - raise ValueError(f"not a type: {typ.__class__.__name__}") - - -def gen_h(txtname: str, idprefix: str, structs: list[Struct]) -> str: - guard = ( - "_" - + txtname.replace(".txt", ".h").upper().replace("/", "_").replace(".", "_") - + "_" - ) - ret = f"""/* Generated by `{PROGRAM} {txtname}`. DO NOT EDIT! */ - -#ifndef {guard} -#define {guard} - -#define {idprefix.upper()}MIN_MSGLEN 7 -""" - ret += """ -/* non-message structs ********************************************************/ - -""" - for struct in structs: - if struct.msgid is not None: - continue - ret += c_typename(idprefix, struct) + " {\n" - typewidth = max( - len(c_typename(idprefix, member.typ)) for member in struct.members - ) - for member in struct.members: - ret += f"\t{c_typename(idprefix, member.typ).ljust(typewidth)} {'*' if member.cnt else ' '}{member.name};\n" - ret += "};\n" - - ret += """ -/* message types **************************************************************/ - -""" - ret += f"enum {idprefix}msg_type {{ /* uint8_t */\n" - namewidth = max(len(msg.name) for msg in structs if msg.msgid is not None) - for msg in structs: - if msg.msgid is None: - continue - ret += f"\t{idprefix.upper()}TYP_{msg.name.ljust(namewidth)} = {msg.msgid},\n" - ret += "};\n" - - ret += """ -/* message structs ************************************************************/ - -""" - for msg in structs: - if msg.msgid is None: - continue - if not msg.members: - ret += c_typename(idprefix, msg) + " {};\n" - continue - ret += c_typename(idprefix, msg) + " {\n" - typewidth = max(len(c_typename(idprefix, member.typ)) for member in msg.members) - for member in msg.members: - ret += f"\t{c_typename(idprefix, member.typ).ljust(typewidth)} {'*' if member.cnt else ' '}{member.name};\n" - ret += "};\n" - - ret += f""" -/* functions ******************************************************************/ - -/** - * @param net_bytes : the complete request, starting with the "size[4]" - * @param out_tag : the message-ID tag - * @param out_body : a pointer that get set to the parsed body, whose - * type is known by the return value, will need to - * be free()d - * @return -{idprefix.upper()}E{{error}} on error, {idprefix.upper()}TYP_{{type}} on success - */ -int {idprefix}unmarshal_msg(uint8_t *net_bytes, uint16_t *out_tag, void **out_body); - -/** - * @param uint16_t in_msgid : the message-ID tag - * @param struct {idprefix}msg_{{type}} in_msg : the message to encode - * @param uint8_t *out_buf : the buffer to encode to - * @return uint32_t : the encoded length - */ -#define {idprefix}marshal_msg(in_msgid, in_msg, out_buf) _Generic((in_msg)""" - for msg in structs: - if msg.msgid is None: - continue - ret += f", \\\n\t\t{c_typename(idprefix, msg)}: _{idprefix}marshal_{msg.name}(in_msgid, in_msg, out_buf)" - ret += "\\\n\t)(in_msg)\n" - for msg in structs: - if msg.msgid is None: - continue - ret += f"uint32_t _{idprefix}marshal_{msg.name}(uint16_t in_msgid, {c_typename(idprefix, msg)} in_msg, uint8_t *out_buf);\n" - - ret += "\n" - ret += f"#endif /* {guard} */\n" - return ret - - -def gen_c(txtname: str, idprefix: str, structs: list[Struct]) -> str: - ret = f"""/* Generated by `{PROGRAM} {txtname}`. DO NOT EDIT! */ - -#include -#include -#include /* for malloc() */ - -#include "{txtname.replace('.txt', '.h')}" -""" - - # basic utilities ########################################################## - ret += """ -/* basic utilities ************************************************************/ - -#define UNUSED(name) /* name __attribute__ ((unused)) */ - -static inline uint8_t decode_u8le(uint8_t *in) { - return in[0]; -} -static inline uint16_t decode_u16le(uint8_t *in) { - return (((uint16_t)(in[0])) << 0) - | (((uint16_t)(in[1])) << 8) - ; -} -static inline uint32_t decode_u32le(uint8_t *in) { - return (((uint32_t)(in[0])) << 0) - | (((uint32_t)(in[1])) << 8) - | (((uint32_t)(in[2])) << 16) - | (((uint32_t)(in[3])) << 24) - ; -} -static inline uint64_t decode_u64le(uint8_t *in) { - return (((uint64_t)(in[0])) << 0) - | (((uint64_t)(in[1])) << 8) - | (((uint64_t)(in[2])) << 16) - | (((uint64_t)(in[3])) << 24) - | (((uint64_t)(in[4])) << 32) - | (((uint64_t)(in[5])) << 40) - | (((uint64_t)(in[6])) << 48) - | (((uint64_t)(in[7])) << 56) - ; -} - -static inline void encode_u8le(uint8_t in, uint8_t *out) { - out[0] = in; -} -static inline void encode_u16le(uint16_t in, uint8_t *out) { - out[0] = (uint8_t)((in >> 0) & 0xFF); - out[1] = (uint8_t)((in >> 8) & 0xFF); -} -static inline void encode_u32le(uint32_t in, uint8_t *out) { - out[0] = (uint8_t)((in >> 0) & 0xFF); - out[1] = (uint8_t)((in >> 8) & 0xFF); - out[2] = (uint8_t)((in >> 16) & 0xFF); - out[3] = (uint8_t)((in >> 24) & 0xFF); -} -static inline void encode_u64le(uint64_t in, uint8_t *out) { - out[0] = (uint8_t)((in >> 0) & 0xFF); - out[1] = (uint8_t)((in >> 8) & 0xFF); - out[2] = (uint8_t)((in >> 16) & 0xFF); - out[3] = (uint8_t)((in >> 24) & 0xFF); - out[4] = (uint8_t)((in >> 32) & 0xFF); - out[5] = (uint8_t)((in >> 40) & 0xFF); - out[6] = (uint8_t)((in >> 48) & 0xFF); - out[7] = (uint8_t)((in >> 56) & 0xFF); -} -""" - - def used(arg: str) -> str: - return arg - - def unused(arg: str) -> str: - return f"UNUSED({arg})" - - # checksize_* ############################################################## - ret += """ -/* checksize_* ****************************************************************/ - -typedef bool (*_checksize_fn_t)(uint32_t net_len, uint8_t *net_bytes, uint32_t *mut_net_offset, size_t *mut_host_extra); -static inline bool _checksize_list(size_t cnt, _checksize_fn_t fn, size_t host_size, - uint32_t net_len, uint8_t *net_bytes, uint32_t *mut_net_offset, size_t *mut_host_extra) { - for (size_t i = 0; i < cnt; i++) - if (__builtin_add_overflow(*mut_host_extra, host_size, mut_host_extra) - || fn(net_len, net_bytes, mut_net_offset, mut_host_extra)) - return true; - return false; -} -static inline bool checksize_1(uint32_t net_len, uint8_t *UNUSED(net_bytes), uint32_t *mut_net_offset, size_t *UNUSED(mut_host_extra)) { - return __builtin_add_overflow(*mut_net_offset, 1, mut_net_offset) || net_len < *mut_net_offset; -} -static inline bool checksize_2(uint32_t net_len, uint8_t *UNUSED(net_bytes), uint32_t *mut_net_offset, size_t *UNUSED(mut_host_extra)) { - return __builtin_add_overflow(*mut_net_offset, 2, mut_net_offset) || net_len < *mut_net_offset; -} -static inline bool checksize_4(uint32_t net_len, uint8_t *UNUSED(net_bytes), uint32_t *mut_net_offset, size_t *UNUSED(mut_host_extra)) { - return __builtin_add_overflow(*mut_net_offset, 4, mut_net_offset) || net_len < *mut_net_offset; -} -static inline bool checksize_8(uint32_t net_len, uint8_t *UNUSED(net_bytes), uint32_t *mut_net_offset, size_t *UNUSED(mut_host_extra)) { - return __builtin_add_overflow(*mut_net_offset, 8, mut_net_offset) || net_len < *mut_net_offset; -} -""" - for struct in structs: - argfn = used if struct.members else unused - ret += f"static inline bool checksize_{struct.name}(uint32_t {argfn('net_len')}, uint8_t *{argfn('net_bytes')}, uint32_t *{argfn('mut_net_offset')}, size_t *{argfn('mut_host_extra')}) {{\n" - if len(struct.members) == 0: - ret += "\treturn false;\n" - ret += "}\n" - continue - prefix0 = "\treturn " - prefix1 = "\t || " - prefix2 = "\t " - prefix = prefix0 - prev_size: int | None = None - for member in struct.members: - if member.cnt is not None: - assert prev_size - ret += f"\n{prefix }_checksize_list(decode_u{prev_size*8}le(&net_bytes[(*mut_net_offset)-{prev_size}]), checksize_{member.typ.name}, sizeof({c_typename(idprefix, member.typ)})," - ret += f"\n{prefix2} net_len, net_bytes, mut_net_offset, mut_host_extra)" - else: - ret += f"\n{prefix}checksize_{member.typ.name}(net_len, net_bytes, mut_net_offset, mut_host_extra)" - prefix = prefix1 - prev_size = member.static_size - if struct.name == "s": - ret += ( - f"\n{prefix}__builtin_add_overflow(*mut_host_extra, 1, mut_host_extra)" - ) - ret += ";\n}\n" - - # unmarshal_* ############################################################## - ret += """ -/* unmarshal_* ****************************************************************/ -/* checksize_XXX() should be called before unmarshal_XXX(). */ - -static inline void unmarshal_1(uint8_t *net_bytes, uint32_t *mut_net_offset, void **UNUSED(mut_host_extra), uint8_t *out) { - *out = decode_u8le(&net_bytes[*mut_net_offset]); - *mut_net_offset += 1; -} -static inline void unmarshal_2(uint8_t *net_bytes, uint32_t *mut_net_offset, void **UNUSED(mut_host_extra), uint16_t *out) { - *out = decode_u16le(&net_bytes[*mut_net_offset]); - *mut_net_offset += 2; -} -static inline void unmarshal_4(uint8_t *net_bytes, uint32_t *mut_net_offset, void **UNUSED(mut_host_extra), uint32_t *out) { - *out = decode_u32le(&net_bytes[*mut_net_offset]); - *mut_net_offset += 4; -} -static inline void unmarshal_8(uint8_t *net_bytes, uint32_t *mut_net_offset, void **UNUSED(mut_host_extra), uint64_t *out) { - *out = decode_u64le(&net_bytes[*mut_net_offset]); - *mut_net_offset += 8; -} -""" - for struct in structs: - argfn = used if struct.members else unused - ret += f"static inline void unmarshal_{struct.name}(uint8_t *{argfn('net_bytes')}, uint32_t *{argfn('mut_net_offset')}, void **{argfn('mut_host_extra')}, {c_typename(idprefix, struct)} *{argfn('out')}) {{" - if len(struct.members) == 0: - ret += "}\n" - continue - ret += "\n" - for member in struct.members: - if member.cnt: - ret += f"\tout->{member.name} = *mut_host_extra;\n" - ret += f"\t*mut_host_extra += sizeof(*out->{member.name}) * out->{member.cnt};\n" - ret += f"\tfor (typeof(out->{member.cnt}) i = 0; i < out->{member.cnt}; i++)\n" - ret += f"\t\tunmarshal_{member.typ.name}(net_bytes, mut_net_offset, mut_host_extra, &(out->{member.name}[i]));\n" - else: - ret += f"\tunmarshal_{member.typ.name}(net_bytes, mut_net_offset, mut_host_extra, &(out->{member.name}));\n" - ret += "}\n" - - # unmarshal_msg ############################################################ - ret += f""" -/* unmarshal_msg **************************************************************/ - -int {idprefix}unmarshal_msg(uint8_t *net_bytes, uint16_t *out_tag, void **out_body) {{ - uint32_t net_len = decode_u32le(net_bytes); - if (net_len < 7) - return -LINUX_EWRONGSIZE; - uint8_t typ = net_bytes[4]; - *out_tag = decode_u16le(&net_bytes[5]); - - uint32_t net_offset = 7; - size_t host_size; - void *host_extra; - switch (typ) {{ -""" - for msg in structs: - if msg.msgid is None: - continue - ret += f"\tcase {idprefix.upper()}TYP_{msg.name}:\n" - ret += f"\t\thost_size = sizeof({c_typename(idprefix, msg)});\n" - ret += f"\t\tif (checksize_{msg.name}(net_len, net_bytes, &net_offset, &host_size))\n" - ret += "\t\t\treturn -LINUX_EWRONGSIZE;\n" - ret += "\n" - ret += "\t\t*out_body = malloc(host_size);" - ret += "\n" - ret += "\t\tnet_offset = 7;\n" - ret += f"\t\thost_extra = *out_body + sizeof({c_typename(idprefix, msg)});\n" - ret += f"\t\tunmarshal_{msg.name}(net_bytes, &net_offset, &host_extra, *out_body);\n" - ret += "\n" - ret += "\t\tbreak;\n" - ret += """ - default: - return -LINUX_EOPNOTSUPP; - } - return typ; -} -""" - - # marshal_* ################################################################ - ret += """ -/* marshal_* ******************************************************************/ - -static inline void marshal_1(uint8_t val, uint8_t *out_net_bytes, uint32_t *mut_net_offset) { - out_net_bytes[*mut_net_offset] = val; - *mut_net_offset += 1; -} -static inline void marshal_2(uint16_t val, uint8_t *out_net_bytes, uint32_t *mut_net_offset) { - encode_u16le(val, &out_net_bytes[*mut_net_offset]); - *mut_net_offset += 2; -} -static inline void marshal_4(uint32_t val, uint8_t *out_net_bytes, uint32_t *mut_net_offset) { - encode_u32le(val, &out_net_bytes[*mut_net_offset]); - *mut_net_offset += 4; -} -static inline void marshal_8(uint64_t val, uint8_t *out_net_bytes, uint32_t *mut_net_offset) { - encode_u64le(val, &out_net_bytes[*mut_net_offset]); - *mut_net_offset += 8; -} -""" - for struct in structs: - argfn = used if struct.members else unused - ret += f"static inline void marshal_{struct.name}({c_typename(idprefix, struct)} {argfn('val')}, uint8_t *{argfn('out_net_bytes')}, uint32_t *{argfn('mut_net_offset')}) {{" - if len(struct.members) == 0: - ret += "}\n" - continue - ret += "\n" - for member in struct.members: - if member.cnt: - ret += f"\tfor (typeof(val.{member.cnt}) i = 0; i < val.{member.cnt}; i++)\n" - ret += f"\t\tmarshal_{member.typ.name}(val.{member.name}[i], out_net_bytes, mut_net_offset);\n" - else: - ret += f"\tmarshal_{member.typ.name}(val.{member.name}, out_net_bytes, mut_net_offset);\n" - ret += "}\n" - - # _marshal_msg_* ########################################################### - ret += """ -/* _marshal_msg_* *************************************************************/ - -""" - for msg in structs: - if msg.msgid is None: - continue - ret += f"uint32_t _{idprefix}marshal_{msg.name}(uint16_t in_msgid, {c_typename(idprefix, msg)} in_msg, uint8_t *out_buf) {{\n" - ret += "\tuint32_t offset = 4;\n" - ret += f"\tmarshal_1({idprefix.upper()}TYP_{msg.name}, out_buf, &offset);\n" - ret += "\tmarshal_2(in_msgid, out_buf, &offset);\n" - ret += f"\tmarshal_{msg.name}(in_msg, out_buf, &offset);\n" - ret += "\tencode_u32le(offset, out_buf);\n" - ret += "\treturn offset;\n" - ret += "}\n" - ret += "\n" - - ############################################################################ - return ret - - -################################################################################ - -if __name__ == "__main__": - import sys - - for txtname in sys.argv[1:]: - version, structs = parse_file(txtname) - with open(txtname.replace(".txt", ".h"), "w") as fh: - fh.write(gen_h(txtname, "p9_", structs)) - with open(txtname.replace(".txt", ".c"), "w") as fh: - fh.write(gen_c(txtname, "p9_", structs)) diff --git a/9p/internal.h b/9p/internal.h new file mode 100644 index 0000000..1bc0e92 --- /dev/null +++ b/9p/internal.h @@ -0,0 +1,114 @@ +/* 9p/internal.h - TODO + * + * Copyright (C) 2024 Luke T. Shumaker + * SPDX-Licence-Identifier: AGPL-3.0-or-later + */ + +#ifndef _9P_INTERNAL_H_ +#define _9P_INTERNAL_H_ + +#include +#include + +#include "9p/defs.h" + +/* C language *****************************************************************/ + +#define UNUSED(name) /* name __attribute__ ((unused)) */ + +/* vtables ********************************************************************/ + +typedef bool (*_checksize_fn_t)(uint32_t net_len, uint8_t *net_bytes, uint32_t *mut_net_offset, size_t *mut_host_extra); +typedef bool (*_unmarshal_fn_t)(uint8_t *net_bytes, uint32_t *mut_net_offset, void **mut_host_extra, void *out); +typedef bool (*_marshal_fn_t)(struct p9_ctx *ctx, void *val, uint8_t *out_net_bytes, uint32_t *mut_net_offset); + +struct msg_vtable { + size_t unmarshal_basesize; + _checksize_fn_t unmarshal_extrasize; + _unmarshal_fn_t unmarshal; + _marshal_fn_t marshal; +}; + +struct version { + struct msg_vtable msgs[0xFF]; +}; + +extern struct version version_9P2000; +/*extern struct version version_9P2000u; */ + +/* unmarshal utilities ********************************************************/ + +static inline uint8_t decode_u8le(uint8_t *in) { + return in[0]; +} +static inline uint16_t decode_u16le(uint8_t *in) { + return (((uint16_t)(in[0])) << 0) + | (((uint16_t)(in[1])) << 8) + ; +} +static inline uint32_t decode_u32le(uint8_t *in) { + return (((uint32_t)(in[0])) << 0) + | (((uint32_t)(in[1])) << 8) + | (((uint32_t)(in[2])) << 16) + | (((uint32_t)(in[3])) << 24) + ; +} +static inline uint64_t decode_u64le(uint8_t *in) { + return (((uint64_t)(in[0])) << 0) + | (((uint64_t)(in[1])) << 8) + | (((uint64_t)(in[2])) << 16) + | (((uint64_t)(in[3])) << 24) + | (((uint64_t)(in[4])) << 32) + | (((uint64_t)(in[5])) << 40) + | (((uint64_t)(in[6])) << 48) + | (((uint64_t)(in[7])) << 56) + ; +} + +static inline bool is_valid_utf8_without_nul(uint8_t *str, size_t len) { + uint8_t mask; + uint8_t chlen; + for (size_t pos = 0; pos < len;) { + if ((str[pos] & 0b10000000) == 0b00000000) { mask = 0b01111111; chlen = 1; } + else if ((str[pos] & 0b11100000) == 0b11000000) { mask = 0b00011111; chlen = 2; } + else if ((str[pos] & 0b11110000) == 0b11100000) { mask = 0b00001111; chlen = 3; } + else if ((str[pos] & 0b11111000) == 0b11110000) { mask = 0b00000111; chlen = 4; } + else return false; + if (pos + chlen > len || (str[pos] & mask) == 0) return false; + switch (chlen) { + case 4: if ((str[pos+3] & 0b11000000) != 0b10000000) return false; /* fallthrough */ + case 3: if ((str[pos+2] & 0b11000000) != 0b10000000) return false; /* fallthrough */ + case 2: if ((str[pos+1] & 0b11000000) != 0b10000000) return false; /* fallthrough */ + } + pos += chlen; + } + return true; +} + +/* marshal utilities **********************************************************/ + +static inline void encode_u8le(uint8_t in, uint8_t *out) { + out[0] = in; +} +static inline void encode_u16le(uint16_t in, uint8_t *out) { + out[0] = (uint8_t)((in >> 0) & 0xFF); + out[1] = (uint8_t)((in >> 8) & 0xFF); +} +static inline void encode_u32le(uint32_t in, uint8_t *out) { + out[0] = (uint8_t)((in >> 0) & 0xFF); + out[1] = (uint8_t)((in >> 8) & 0xFF); + out[2] = (uint8_t)((in >> 16) & 0xFF); + out[3] = (uint8_t)((in >> 24) & 0xFF); +} +static inline void encode_u64le(uint64_t in, uint8_t *out) { + out[0] = (uint8_t)((in >> 0) & 0xFF); + out[1] = (uint8_t)((in >> 8) & 0xFF); + out[2] = (uint8_t)((in >> 16) & 0xFF); + out[3] = (uint8_t)((in >> 24) & 0xFF); + out[4] = (uint8_t)((in >> 32) & 0xFF); + out[5] = (uint8_t)((in >> 40) & 0xFF); + out[6] = (uint8_t)((in >> 48) & 0xFF); + out[7] = (uint8_t)((in >> 56) & 0xFF); +} + +#endif /* _9P_INTERNAL_H_ */ diff --git a/9p/linux-errno.h.gen b/9p/linux-errno.h.gen index 749cd8e..b896384 100755 --- a/9p/linux-errno.h.gen +++ b/9p/linux-errno.h.gen @@ -3,26 +3,27 @@ def print_errnos(txtlists: list[str]) -> None: print( - f"/* Generated by `./9p/linux-errno.h.gen {' '.join(txtlists)}`. DO NOT EDIT! */" + f"/* 9p/linux-errno.h - Generated by `./9p/linux-errno.h.gen {' '.join(txtlists)}`. DO NOT EDIT! */" ) - errnos: dict[str, int] = {} + errnos: dict[str, tuple[int, str]] = {} for txtlist in sys.argv[1:]: with open(txtlist, "r") as fh: for line in fh: if line.startswith("#"): print(f"/* {line[1:].strip()} */") continue - _num, name = line.split(maxsplit=1) + _num, name, desc = line.split(maxsplit=2) num = int(_num) - name = name.strip() - errnos[name] = int(num) - namelen = max(len(name) for name in errnos.keys()) + desc = desc.strip() + errnos[name] = (num, desc) print() print("#ifndef _9P_LINUX_ERRNO_H_") print("#define _9P_LINUX_ERRNO_H_") print() + namelen = max(len(name) for name in errnos.keys()) + numlen = max(len(str(num)) for (num, desc) in errnos.values()) for name in errnos: - print(f"#define LINUX_{name.ljust(namelen)} {errnos[name]}") + print(f"#define LINUX_{name.ljust(namelen)} {str(errnos[name][0]).rjust(numlen)} /* {errnos[name][1]} */") print() print("#endif /* _9P_LINUX_ERRNO_H_ */") diff --git a/9p/srv.h b/9p/srv.h index f28a525..32a2e30 100644 --- a/9p/srv.h +++ b/9p/srv.h @@ -3,6 +3,25 @@ #include "coroutine.h" +#define 9P_DEFAULT_PORT 564 + +/** + * The default here is the same as in Plan 9 4e's lib9p. It's sized + * so that a Twrite message can return 8KiB of data; it uses the + * default (1024*8)+24 with the comment that "24" is "ample room for + * Twrite/Rread header (iounit)". In fact, the Twrite header is only + * 23 bytes ("size[4] Twrite[1] tag[2] fid[4] offset[8] count[4]") and + * the Rread header is even shorter at 11 bytes ("size[4] Rread[1] + * tag[2] count[4]"), so "24" appears to be the size of the Twrite + * header rounded up to a nice round number. + * + * In older versions of 9P ("9P1"), the max message size was defined + * as part of the protocol specification rather than negotiated. In + * Plan 9 1e it was (1024*8)+128, and was bumped to (1024*8)+160 in 2e + * and 3e. + */ +#define 9P_DEFAULT_MAX_MSG_SIZE ((1024*8)+24) + COROUTINE net9p_cr(void *); #endif /* _NET9P_H_ */ diff --git a/Makefile b/Makefile index fd80695..74aa20b 100644 --- a/Makefile +++ b/Makefile @@ -6,21 +6,17 @@ LDFLAGS += -static linux.git = $(HOME)/src/github.com/torvalds/linux -3rd-party/linux-errno.txt: - { \ - cd $(linux.git) && \ - echo "# Generated from linux.git $$(git describe). DO NOT EDIT!" && \ - git ls-files include/uapi/ | grep errno | xargs grep -E '#\s*define\s+E[A-Z0-9]+\s+[0-9]+' | awk '{print $$3, $$2}' | sort --numeric-sort && \ - :; } >$@ +3rd-party/linux-errno.txt: %: %.gen + $< $(linux.git) 9p/linux-errno.h: %: %.gen 3rd-party/linux-errno.txt $^ >$@ -9p/%.c 9p/%.h: 9p/generate 9p/%.txt +9p/defs-%.c 9p/defs-%.h: 9p/defs.gen 9p/%.txt $^ srv9p: srv9p.o coroutine.o net9p.o 9p/9P2000.o -sources_py = 9p/generate +sources_py = 9p/defs.gen sources_py += 9p/linux-errno.h.gen lint: diff --git a/README.md b/README.md index 8240584..a1441f4 100644 --- a/README.md +++ b/README.md @@ -26,3 +26,52 @@ UART: - pin2: gpio1: RX (so connect it to your FTDI's TX) - pin3: gnd (so connect it to your FTDI's GND) - picocom --baud=115200 /dev/ttyUSB0 + +# Usage + +The harness uses DHCP to acquire an IPv4 address, then serves the 9P +protocol over TCP: + + - TCP port: 564 (9P does not have a standard TCP port number, but + this is the default port number used by most 9P-over-TCP clients, + including the Linux kernel's v9fs driver). + - Supported protocol versions: + - `9P2000` (base protocol): Yes + - `9P2000.u` (Unix extension): Yes, with Linux kernel + architecture-"generic" errnos. This will match the Linux kernel + errnos on most architectures (but notably not on Alpha, MIPS, + PA-RISC, PowerPC, or SPARC; I am unsure whether on these + platforms the kernel's v9fs filesystem driver will map the + "generic" errnos to the architecture-specific errnos for you). + - `9P2000.L` (Linux extension): No, it's an abomination and + unlikely to ever be supported + - `9P2000.e` (Erlang extension): No, but if you want it and ask + nicely I'd be happy to add it (I'm not sure why you'd want it + though). + - Authentication: None + +There are lots of 9P clients that you can use. 9P is a filesystem +protocol; and you can mount it directly with the the Linux kernel's +v9fs filesystem driver, with plan9port's `9pfuse`; or interact with it +without mounting it using the shell commands `9p` (from plan9port), +`wmiir`, `ixpc`; or interact with it without mounting it by using a +library for your programming language of choice. + +Some notes on choosing a client: + - On x86-32, the Linux kernel v9fs driver is known to drop entries + from directory listings; I advise using 9pfuse instead of you want + to mount it on 32-bit systems. + - I generally like mounting it as a real filesystem, but this means + that you only get errno errors, and the more-helpful error strings + are discarded. + - The sbc-harness only supports 8 concurrent connections to the 9P + server, so while shell commands are handy for poking around, for + real use where you're doing things in parallel you'll likely want + to mount it or use a library that can reuse existing connections. + +# Bugs/Limitations + + - Only supports 8 concurrent TCP connectsions to the 9P server (due + to limitations in the W5500 TCP-offload chip) + - Only supports IPv4, not IPv6 (due to limitations in the W5500 + TCP-offload chip) diff --git a/srv9p.c b/srv9p.c index c7d1dde..822a9ea 100644 --- a/srv9p.c +++ b/srv9p.c @@ -10,7 +10,7 @@ int main() { error(1, -sock, "netio_listen"); for (int i = 0; i < 8; i++) - if (!coroutine_add(net9p_cr, NULL)) + if (!coroutine_add(net9p_cr, &sock)) error(1, 0, "coroutine_add(net9p_cr, NULL)"); coroutine_main(); -- cgit v1.2.3-2-g168b