diff options
author | Luke T. Shumaker <lukeshu@lukeshu.com> | 2024-09-20 11:30:28 -0600 |
---|---|---|
committer | Luke T. Shumaker <lukeshu@lukeshu.com> | 2024-09-20 11:30:28 -0600 |
commit | 0cc4f3ab82473ca09373a5b1d42223c69bf92fce (patch) | |
tree | 2c0594601e81b073c44c8c059064f9e1bf9aecb2 | |
parent | 978c935f535b9f05dd96b118380156b678ddedd9 (diff) |
wip generate 9p
-rw-r--r-- | .editorconfig | 11 | ||||
-rw-r--r-- | net9p.c | 174 | ||||
-rwxr-xr-x | net9p_defs.gen | 309 | ||||
-rw-r--r-- | net9p_defs.txt | 89 |
4 files changed, 572 insertions, 11 deletions
diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..574abd1 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,11 @@ +root = true + +[*] +end_of_line = lf +insert_final_newline = true +charset = utf-8 +indent_style = tab + +[net9p_defs.gen] +indent_style = space +indent_size = 4 @@ -9,13 +9,162 @@ #include <netinet/in.h> #include <arpa/inet.h> +#include <endian.h> + #include "net9p.h" +/* "T" messages are client->server requests, and "R" messages are + * server->client responses. I do not know what the Plan 9 designers + * intended "T" and "R" to stand for. + */ +enum v9fs_msg_type { + /* "9P2000" base protocol + * https://ericvh.github.io/9p-rfc/rfc9p2000.html + * https://github.com/ericvh/9p-rfc/blob/master/9p2000.xml + * + * But due to incompleteness, the Plan 9 manual section-5 and + * the Plan 9 headers (particularly fcall.h) are a better + * references. + */ + V9FS_Tversion = 100, /* "412" "4s" */ + V9FS_Rversion = 101, /* "412" "4s" */ + V9FS_Tauth = 102, /* "412" "4ss" */ + V9FS_Rauth = 103, /* "412" "TODO" */ + V9FS_Tattach = 104, /* "412" "TODO" */ + V9FS_Rattach = 105, /* "412" "TODO" */ + /*V9FS_Terror = 106, /* There is no Terror request, only Rerror responses */ + V9FS_Rerror = 107, /* "412" "TODO" */ + V9FS_Tflush = 108, /* "412" "TODO" */ + V9FS_Rflush = 109, /* "412" "TODO" */ + V9FS_Twalk = 110, /* "412" "TODO" */ + V9FS_Rwalk = 111, /* "412" "TODO" */ + V9FS_Topen = 112, /* "412" "TODO" */ + V9FS_Ropen = 113, /* "412" "TODO" */ + V9FS_Tcreate = 114, /* "412" "TODO" */ + V9FS_Rcreate = 115, /* "412" "TODO" */ + V9FS_Tread = 116, /* "412" "TODO" */ + V9FS_Rread = 117, /* "412" "TODO" */ + V9FS_Twrite = 118, /* "412" "TODO" */ + V9FS_Rwrite = 119, /* "412" "TODO" */ + V9FS_Tclunk = 120, /* "412" "TODO" */ + V9FS_Rclunk = 121, /* "412" "TODO" */ + V9FS_Tremove = 122, /* "412" "TODO" */ + V9FS_Rremove = 123, /* "412" "TODO" */ + V9FS_Tstat = 124, /* "412" "TODO" */ + V9FS_Rstat = 125, /* "412" "TODO" */ + V9FS_Twstat = 126, /* "412" "TODO" */ + V9FS_Rwstat = 127, /* "412" "TODO" */ + + /* "9P2000.u" Unix extension + * https://ericvh.github.io/9p-rfc/rfc9p2000.u.html + * https://github.com/ericvh/9p-rfc/blob/master/9p2000.u.xml + */ + + /* "9P2000.L" Linux extension + * https://github.com/ericvh/9p-rfc/blob/master/9p2000.L.xml + */ + V9FS_TLERROR = 6, + V9FS_RLERROR, + V9FS_TSTATFS = 8, + V9FS_RSTATFS, + V9FS_TLOPEN = 12, + V9FS_RLOPEN, + V9FS_TLCREATE = 14, + V9FS_RLCREATE, + V9FS_TSYMLINK = 16, + V9FS_RSYMLINK, + V9FS_TMKNOD = 18, + V9FS_RMKNOD, + V9FS_TRENAME = 20, + V9FS_RRENAME, + V9FS_TREADLINK = 22, + V9FS_RREADLINK, + V9FS_TGETATTR = 24, + V9FS_RGETATTR, + V9FS_TSETATTR = 26, + V9FS_RSETATTR, + V9FS_TXATTRWALK = 30, + V9FS_RXATTRWALK, + V9FS_TXATTRCREATE = 32, + V9FS_RXATTRCREATE, + V9FS_TREADDIR = 40, + V9FS_RREADDIR, + V9FS_TFSYNC = 50, + V9FS_RFSYNC, + V9FS_TLOCK = 52, + V9FS_RLOCK, + V9FS_TGETLOCK = 54, + V9FS_RGETLOCK, + V9FS_TLINK = 70, + V9FS_RLINK, + V9FS_TMKDIR = 72, + V9FS_RMKDIR, + V9FS_TRENAMEAT = 74, + V9FS_RRENAMEAT, + V9FS_TUNLINKAT = 76, + V9FS_RUNLINKAT, +}; + +/* 1 - u8 + * 2 - u16le + * 4 - u32le + * 8 - u16le + * d - data (u32le `n`, then `n` bytes of data) + * s - string (u16le `n`, then `n` bytes of UTF-8) + * q - qid (13 bytes, idk) + */ + +static const char const *msgfmt[255] = { + /* All messages start with a "size[4] type[1] tag[2]" + * prefix; that is not included in this table. */ + + [V9FS_Tversion] = "max_msg_size[4] s", + [V9FS_Rversion] = "4s", + + [V9FS_Tauth] = "4ss", + [V9FS_Rauth] = "q", + + [V9FS_Rerror] = "s", + + [V9FS_Tflush] = "2", + [V9FS_Rflush] = "", + + [V9FS_Tattach] = "44ss", + [V9FS_Rattach] = "q", + + [V9FS_Twalk] = "TODO", + [V9FS_Rwalk] = "TODO", + + [V9FS_Topen] = "41", + [V9FS_Ropen] = "q4", + + [V9FS_Tcreate] = "4s41", + [V9FS_Rcreate] = "q4", + + [V9FS_Tread] = "484", + [V9FS_Rread] = "d", + + [V9FS_Twrite] = "48d", + [V9FS_Rwrite] = "4", + + [V9FS_Tclunk] = "4", + [V9FS_Rclunk] = "", + + [V9FS_Tremove] = "4", + [V9FS_Rremove] = "", + + [V9FS_Tstat] = "4", + [V9FS_Rstat] = "TODO", + + [V9FS_Twstat] = "TODO", + [V9FS_Rwstat] = "", +}; + + + void net9p_listen_cr(void *_arg) { (void)_arg; - printf("listen initializing...\n"); cr_begin(); - printf("listen running...\n"); union { struct sockaddr_in in; @@ -35,12 +184,17 @@ void net9p_listen_cr(void *_arg) { if (listen(fd, 5) < 0) error(1, errno, "listen"); - int conn = 9; - if (!coroutine_add(net9p_worker_cr, &conn)) - error(1, 0, "coroutine_add(net9p_worker_cr, &%d)", conn); - printf("im back...\n"); - for (int i = 0; i < 10; i++) { - cr_yield(); + for (;;) { + int conn = accept(fd, NULL, NULL); + if (conn < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + cr_yield(); + continue; + } + error(1, errno, "accept"); + } + if (!coroutine_add(net9p_worker_cr, &conn)) + error(1, 0, "coroutine_add(net9p_worker_cr, &%d)", conn); } cr_end(); @@ -48,11 +202,9 @@ void net9p_listen_cr(void *_arg) { void net9p_worker_cr(void *_arg) { int fd = *((int *)_arg); - printf("worker %zu initializing...\n", cr_getcid()); cr_begin(); - printf("worker %zu running...\n", cr_getcid()); - //close(fd); + close(fd); cr_end(); } diff --git a/net9p_defs.gen b/net9p_defs.gen new file mode 100755 index 0000000..0e75e42 --- /dev/null +++ b/net9p_defs.gen @@ -0,0 +1,309 @@ +#!/usr/bin/env python + +import enum +import re + + +class Atom(enum.Enum): + u8 = 1 + u16 = 2 + u32 = 3 + u64 = 4 + + +class Struct: + name: str + members: list["Member"] + + +class List: + cnt: str + typ: Atom | Struct + + def __init__(self, /, *, cnt: str, typ: Atom | Struct) -> None: + self.cnt = cnt + self.typ = typ + + +class Member: + name: str + typ: Atom | Struct | List + + def __init__(self, /, *, name: str, typ: Atom | Struct | List) -> None: + self.name = name + self.typ = typ + + +def parse_members( + env: dict[str, Atom | Struct], existing: list[Member], specs: str +) -> list[Member]: + ret = existing + for spec in specs.split(): + m = re.fullmatch(r"(.+)\[([^*]+)(?:\*([^*]+))?\]", spec) + if not m: + raise SyntaxError(f"invalid member spec {repr(spec)}") + if m.group(2) not in env: + raise NameError(f"Unknown type {repr(m.group(2))}") + name = m.group(1) + typ = env[m.group(2)] + if any(x.name == name for x in ret): + raise ValueError(f"duplicate member name {repr(name)}") + if cnt := m.group(3): + if len(ret) == 0 or ret[-1].name != cnt: + raise ValueError(f"list count must be previous item: {repr(cnt)}") + if not isinstance(ret[-1].typ, Atom): + raise ValueError(f"list count must be an integer type: {repr(cnt)}") + ret += [Member(name=name, typ=List(cnt=cnt, typ=typ))] + else: + ret += [Member(name=name, typ=typ)] + return ret + + +class Message: + id: int + name: str + members: list[Member] + + +def parse_file(filename: str) -> tuple[list[Struct], list[Message]]: + msgs: list[Message] = [] + env: dict[str, Atom | Struct] = { + "1": Atom.u8, + "2": Atom.u16, + "4": Atom.u32, + "8": Atom.u64, + } + with open(filename, "r") as fh: + prev: Struct | Message | None = None + for line in fh: + line = line.split("#", 1)[0].strip() + if not line: + continue + if m := re.fullmatch(r'([0-9]+)\s*=\s*(\S+)\s*"([^"]*)"', line): + msg = Message() + msg.id = int(m.group(1)) + msg.name = m.group(2) + msg.members = parse_members(env, [], m.group(3)) + msgs += [msg] + prev = msg + elif m := re.fullmatch(r'(\S+)\s*=\s*"([^"]*)"', line): + struct = Struct() + struct.name = m.group(1) + struct.members = parse_members(env, [], m.group(2)) + env[struct.name] = struct + prev = struct + elif m := re.fullmatch(r'"([^"]*)"', line): + if not prev: + raise SyntaxError( + "a continuation line must come after a struct line" + ) + prev.members = parse_members(env, prev.members, line.strip('"')) + else: + raise SyntaxError(f"invalid line {repr(line)}") + structs = [x for x in env.values() if isinstance(x, Struct)] + return structs, msgs + + +def c_typename(typ: Atom | Struct | List | Message) -> str: + match typ: + case Atom.u8: + return "uint8_t" + case Atom.u16: + return "uint16_t" + case Atom.u32: + return "uint32_t" + case Atom.u64: + return "uint64_t" + case Struct(): + return "struct v9fs_" + typ.name + case Message(): + return "struct v9fs_msg_" + typ.name + case List(): + return c_typename(typ.typ) + "*" + case _: + raise ValueError(f"not a type: {typ.__class__.__name__}") + + +def gen_h(structs: list[Struct], msgs: list[Message]) -> str: + ret = "" + ret += "/* Generated by ./net9p_defs.gen. DO NOT EDIT! */\n" + ret += "\n" + ret += "#ifndef _NET9P_DEFS_H_\n" + ret += "#define _NET9P_DEFS_H_\n" + ret += "\n" + + for struct in structs: + ret += c_typename(struct) + " {\n" + typewidth = max(len(c_typename(member.typ)) for member in struct.members) + for member in struct.members: + ret += f"\t{c_typename(member.typ).ljust(typewidth)} {member.name};\n" + ret += "};\n" + ret += "\n" + + ret += "enum v9fs_msg_type {\n" + namewidth = max(len(msg.name) for msg in msgs) + for msg in msgs: + ret += f"\tV9FS_TYP_{msg.name.ljust(namewidth)} = {msg.id},\n" + ret += "};\n" + + for msg in msgs: + if not msg.members: + ret += c_typename(msg) + " {};\n" + else: + ret += c_typename(msg) + " {\n" + typewidth = max(len(c_typename(member.typ)) for member in msg.members) + for member in msg.members: + ret += f"\t{c_typename(member.typ).ljust(typewidth)} {member.name};\n" + ret += "};\n" + ret += "\n" + + ret += "#endif /* _NET9P_DEFS_H_ */\n" + return ret + + +c_atom_funcs = """ +static inline uint16_t unmarshal_u16le(uint8_t *bytes) { + return (((uint16_t)(bytes[0])) << 0) + | (((uint16_t)(bytes[1])) << 8) + ; +} +static inline uint32_t unmarshal_u32le(uint8_t *bytes) { + return (((uint16_t)(bytes[0])) << 0) + | (((uint16_t)(bytes[1])) << 8) + | (((uint16_t)(bytes[2])) << 16) + | (((uint16_t)(bytes[3])) << 24) + ; +} +static inline uint64_t unmarshal_u64le(uint8_t *bytes) { + return (((uint16_t)(bytes[0])) << 0) + | (((uint16_t)(bytes[1])) << 8) + | (((uint16_t)(bytes[2])) << 16) + | (((uint16_t)(bytes[3])) << 24) + | (((uint16_t)(bytes[4])) << 32) + | (((uint16_t)(bytes[5])) << 40) + | (((uint16_t)(bytes[6])) << 48) + | (((uint16_t)(bytes[7])) << 56) + ; +} + +static inline void marshal_u16le(val uint16_t, uint8_t *bytes) { + bytes[0] = (uint8_t)((val >> 0) & 0xFF); + bytes[1] = (uint8_t)((val >> 8) & 0xFF); +} +static inline void marshal_u32le(val uint32_t, uint8_t *bytes) { + bytes[0] = (uint8_t)((val >> 0) & 0xFF); + bytes[1] = (uint8_t)((val >> 8) & 0xFF); + bytes[2] = (uint8_t)((val >> 16) & 0xFF); + bytes[3] = (uint8_t)((val >> 24) & 0xFF); +} +static inline void marshal_u64le(val uint64_t, uint8_t *bytes) { + bytes[0] = (uint8_t)((val >> 0) & 0xFF); + bytes[1] = (uint8_t)((val >> 8) & 0xFF); + bytes[2] = (uint8_t)((val >> 16) & 0xFF); + bytes[3] = (uint8_t)((val >> 24) & 0xFF); + bytes[4] = (uint8_t)((val >> 32) & 0xFF); + bytes[5] = (uint8_t)((val >> 40) & 0xFF); + bytes[6] = (uint8_t)((val >> 48) & 0xFF); + bytes[7] = (uint8_t)((val >> 56) & 0xFF); +} +""" + + +def static_net_size(typ: Atom | Struct | List | Message) -> int | None: + match typ: + case Atom.u8: + return 1 + case Atom.u16: + return 2 + case Atom.u32: + return 4 + case Atom.u64: + return 8 + case Struct() | Message(): + size = 0 + for member in typ.members: + msize = static_net_size(member.typ) + if msize is None: + return None + size += msize + return size + case List(): + return None + case _: + raise ValueError(f"not a type: {typ.__class__.__name__}") + + +def gen_c_check_net_len(msg: Message) -> str: + ret: str = "" + if (sz := static_net_size(msg)) is not None: + ret += f"\tif (net_len != {sz})\n" + ret += "\t\treturn -EINVAL;\n" + return ret + + ret += f"\tuint64_t net_offset = 0;\n" + static_acc = 0 + def _gen_c_check_net_len(prefix: str, struct: Struct | Message) -> str: + nonlocal static_acc + ret: str = "" + + prev_size: int = 0 + for member in struct.members: + if (sz := static_net_size(member.typ)) is not None: + static_acc += sz + prev_size = sz + elif isinstance(member.typ, Struct): + ret += _gen_c_check_net_len(prefix, member.typ) + elif isinstance(member.typ, List): + if static_acc: + ret += f"{prefix}net_offset += {static_acc};\n" + static_acc = 0 + ret += f"{prefix}if ((uint64_t)net_len < net_offset)\n" + ret += f"{prefix}\treturn -EINVAL;\n" + if (sz := static_net_size(member.typ.typ)) is not None: + ret += f"{prefix}net_offset += unmarshal_u{prev_size*8}le(&net_bytes[net_offset-{prev_size}])*{sz};\n" + else: + assert isinstance(member.typ.typ, Struct) + ret += f"{prefix}for (uint{prev_size*8}_t i, cnt = 0, unmarshal_u{prev_size*8}le(&net_bytes[net_offset-{prev_size}]); i < cnt; i++) {{\n" + ret += _gen_c_check_net_len(prefix + "\t", member.typ.typ) + if static_acc: + ret += f"{prefix}net_offset += {static_acc};\n" + static_acc = 0 + ret += f"{prefix}\n" + else: + raise ValueError(f"not a type: {member.typ.__class__.__name__}") + + return ret + ret += _gen_c_check_net_len("\t", msg) + if static_acc: + ret += f"\tnet_offset += {static_acc};\n" + ret += "\tif ((uint64_t)net_len != net_offset)\n" + ret += "\t\treturn -EINVAL;\n" + return ret + + + + +def gen_c(structs: list[Struct], msgs: list[Message]) -> str: + ret = "" + ret += "/* Generated by ./net9p_defs.gen. DO NOT EDIT! */\n" + ret += "\n" + ret += "#include <stdint.h> /* for size_t, uint{n}_t */\n" + ret += "#include <stdlib.h> /* for malloc() */\n" + ret += "\n" + ret += c_atom_funcs + ret += "\n" + + for msg in msgs: + ret += f"int unmarshal_msg_{msg.name}(uint32_t net_len, uint8_t *net_bytes, {c_typename(msg)} **ret) {{\n" + ret += gen_c_check_net_len(msg) + ret += "\n" + ret += "\tTODO;\n" + ret += "}\n" + ret += "\n" + return ret + + +if __name__ == "__main__": + structs, msgs = parse_file("net9p_defs.txt") + print(gen_h(structs, msgs)) + print(gen_c(structs, msgs)) diff --git a/net9p_defs.txt b/net9p_defs.txt new file mode 100644 index 0000000..3eba255 --- /dev/null +++ b/net9p_defs.txt @@ -0,0 +1,89 @@ +# net9p.txt - Definitions of 9P messages +# +# Copyright (C) 2024 Luke T. Shumaker <lukeshu@lukeshu.com> +# SPDX-Licence-Identifier: AGPL-3.0-or-later + +# In the 9P protocol, each message has a type, and message types come +# in pairs (except "Rerror"); "T" and "R"; "T" messages are +# client->server requests, and "R" messages are server->client +# responses (I do not know what the Plan 9 designers intended "T" and +# "R" to stand for). The type of a message is represented by a u8 ID. +# +# This file is a defines the the ID and format of each message type, +# and is used to generate implementation code. + +# The format of each message (excluding the "size[4] msg_type[1] +# tag[2]" header) is written here as a sequence of +# "member_name[member_type]" struct members. +# +# The primitive member types types are the following single-character +# mnemonics: +# +# - 1 = u8 +# - 2 = u16le +# - 4 = u32le +# - 8 = u16le +# +# A type expression may also be [type*count]" where "type" is the +# type, and then "count" is a previously-defined integer member. +# +# We also define a few reusable compound types: + +# data (u32le `n`, then `n` bytes of data) +d = "len[8] dat[1*len]" + +# string (u16le `n`, then `n` bytes of UTF-8) +s = "len[2] utf8[1*len]" + +# qid (TODO) +q = "type[1] vers[4] path[8]" + +# stat (TODO) +stat = "stat_size[2]" + "kern_type[2]" + "kern_dev[4]" + "file_qid[q]" + "file_mode[4]" + "file_atime[4]" + "file_mtime[4]" + "file_size[8]" + "file_name[s]" + "file_owner_uid[s]" + "file_owner_gid[s]" + "file_last_modified_uid[s]" + +# "9P2000" base protocol +# https://ericvh.github.io/9p-rfc/rfc9p2000.html +# https://github.com/ericvh/9p-rfc/blob/master/9p2000.xml +# +# But due to incompleteness of the draft RFC, the Plan 9 manual +# section-5 and the Plan 9 headers (particularly fcall.h) are a better +# references. +100 = Tversion "max_msg_size[4] version[s]" +101 = Rversion "max_msg_size[4] version[s]" +102 = Tauth "afid[4] uname[s] aname[s]" +103 = Rauth "aqid[q]" +104 = Tattach "fid[4] afid[4] uname[s] aname[s]" +105 = Rattach "qid[q]" +#106 = Terror "illegal" +107 = Rerror "ename[s]" +108 = Tflush "oldtag[2]" +109 = Rflush "" +110 = Twalk "fid[4] newfid[4] nwname[2] wname[s*nwname]" +111 = Rwalk "nwqid[2] wqid[q*nwqid]" +112 = Topen "fid[4] mode[1]" +113 = Ropen "qid[q] iounit[4]" +114 = Tcreate "fid[4] name[s] perm[4] mode[1]" +115 = Rcreate "qid[q] iounit[4]" +116 = Tread "fid[4] offset[8] count[4]" +117 = Rread "data[d]" +118 = Twrite "fid[4] offset[8] data[d]" +119 = Rwrite "count[4]" +120 = Tclunk "fid[4]" +121 = Rclunk "" +122 = Tremove "fid[4]" +123 = Rremove "" +124 = Tstat "fid[4]" +125 = Rstat "stat[stat]" +126 = Twstat "fid[4] stat[stat]" +127 = Rwstat "" |