#!/usr/bin/env python import enum import re # Parse net9p_defs.txt ######################################################### class Atom(enum.Enum): u8 = 1 u16 = 2 u32 = 3 u64 = 4 class Struct: name: str members: list["Member"] class List: cnt: str typ: Atom | Struct def __init__(self, /, *, cnt: str, typ: Atom | Struct) -> None: self.cnt = cnt self.typ = typ class Member: name: str typ: Atom | Struct | List def __init__(self, /, *, name: str, typ: Atom | Struct | List) -> None: self.name = name self.typ = typ def parse_members( env: dict[str, Atom | Struct], existing: list[Member], specs: str ) -> list[Member]: ret = existing for spec in specs.split(): m = re.fullmatch(r"(.+)\[([^*]+)(?:\*([^*]+))?\]", spec) if not m: raise SyntaxError(f"invalid member spec {repr(spec)}") if m.group(2) not in env: raise NameError(f"Unknown type {repr(m.group(2))}") name = m.group(1) typ = env[m.group(2)] if any(x.name == name for x in ret): raise ValueError(f"duplicate member name {repr(name)}") if cnt := m.group(3): if len(ret) == 0 or ret[-1].name != cnt: raise ValueError(f"list count must be previous item: {repr(cnt)}") if not isinstance(ret[-1].typ, Atom): raise ValueError(f"list count must be an integer type: {repr(cnt)}") ret += [Member(name=name, typ=List(cnt=cnt, typ=typ))] else: ret += [Member(name=name, typ=typ)] return ret class Message: id: int name: str members: list[Member] def parse_file(filename: str) -> tuple[list[Struct], list[Message]]: msgs: list[Message] = [] env: dict[str, Atom | Struct] = { "1": Atom.u8, "2": Atom.u16, "4": Atom.u32, "8": Atom.u64, } with open(filename, "r") as fh: prev: Struct | Message | None = None for line in fh: line = line.split("#", 1)[0].strip() if not line: continue if m := re.fullmatch(r'([0-9]+)\s*=\s*(\S+)\s*"([^"]*)"', line): msg = Message() msg.id = int(m.group(1)) msg.name = m.group(2) msg.members = parse_members(env, [], m.group(3)) msgs += [msg] prev = msg elif m := re.fullmatch(r'(\S+)\s*=\s*"([^"]*)"', line): struct = Struct() struct.name = m.group(1) struct.members = parse_members(env, [], m.group(2)) env[struct.name] = struct prev = struct elif m := re.fullmatch(r'"([^"]*)"', line): if not prev: raise SyntaxError( "a continuation line must come after a struct line" ) prev.members = parse_members(env, prev.members, line.strip('"')) else: raise SyntaxError(f"invalid line {repr(line)}") structs = [x for x in env.values() if isinstance(x, Struct)] return structs, msgs # Generate C ################################################################### def shortname(typ: Atom | Struct | Message) -> str: match typ: case Atom.u8: return "1" case Atom.u16: return "2" case Atom.u32: return "4" case Atom.u64: return "8" case Struct(): return typ.name case Message(): return 'msg_'+typ.name case _: raise ValueError(f"not a type: {typ.__class__.__name__}") def c_typename(typ: Atom | Struct | List | Message) -> str: match typ: case Atom.u8: return "uint8_t" case Atom.u16: return "uint16_t" case Atom.u32: return "uint32_t" case Atom.u64: return "uint64_t" case Struct(): return "struct v9fs_" + typ.name case Message(): return "struct v9fs_msg_" + typ.name case List(): return c_typename(typ.typ) + "*" case _: raise ValueError(f"not a type: {typ.__class__.__name__}") def static_size(typ: Atom | Struct | List | Message) -> int | None: match typ: case Atom.u8: return 1 case Atom.u16: return 2 case Atom.u32: return 4 case Atom.u64: return 8 case Struct() | Message(): size = 0 for member in typ.members: msize = static_size(member.typ) if msize is None: return None size += msize return size case List(): return None case _: raise ValueError(f"not a type: {typ.__class__.__name__}") def gen_h(structs: list[Struct], msgs: list[Message]) -> str: ret = "" ret += "/* Generated by ./net9p_defs.gen. DO NOT EDIT! */\n" ret += "\n" ret += "#ifndef _NET9P_DEFS_H_\n" ret += "#define _NET9P_DEFS_H_\n" ret += "\n" for struct in structs: ret += c_typename(struct) + " {\n" typewidth = max(len(c_typename(member.typ)) for member in struct.members) for member in struct.members: ret += f"\t{c_typename(member.typ).ljust(typewidth)} {member.name};\n" ret += "};\n" ret += "\n" ret += "enum v9fs_msg_type {\n" namewidth = max(len(msg.name) for msg in msgs) for msg in msgs: ret += f"\tV9FS_TYP_{msg.name.ljust(namewidth)} = {msg.id},\n" ret += "};\n" for msg in msgs: if not msg.members: ret += c_typename(msg) + " {};\n" else: ret += c_typename(msg) + " {\n" typewidth = max(len(c_typename(member.typ)) for member in msg.members) for member in msg.members: ret += f"\t{c_typename(member.typ).ljust(typewidth)} {member.name};\n" ret += "};\n" ret += "\n" ret += "#endif /* _NET9P_DEFS_H_ */\n" return ret def gen_c(structs: list[Struct], msgs: list[Message]) -> str: ret = """ /* Generated by ./net9p_defs.gen. DO NOT EDIT! */ #include /* for size_t, uint{n}_t */ #include /* for malloc() */ #include "net9p_defs.h" """ # basic utilities ########################################################## ret += """ /* basic utilities ************************************************************/ #define UNUSED __attribute__ ((unused)) static inline uint16_t decode_u16le(uint8_t *bytes) { return (((uint16_t)(bytes[0])) << 0) | (((uint16_t)(bytes[1])) << 8) ; } static inline uint32_t decode_u32le(uint8_t *bytes) { return (((uint16_t)(bytes[0])) << 0) | (((uint16_t)(bytes[1])) << 8) | (((uint16_t)(bytes[2])) << 16) | (((uint16_t)(bytes[3])) << 24) ; } static inline uint64_t decode_u64le(uint8_t *bytes) { return (((uint16_t)(bytes[0])) << 0) | (((uint16_t)(bytes[1])) << 8) | (((uint16_t)(bytes[2])) << 16) | (((uint16_t)(bytes[3])) << 24) | (((uint16_t)(bytes[4])) << 32) | (((uint16_t)(bytes[5])) << 40) | (((uint16_t)(bytes[6])) << 48) | (((uint16_t)(bytes[7])) << 56) ; } static inline void decode_u16le(val uint16_t, uint8_t *bytes) { bytes[0] = (uint8_t)((val >> 0) & 0xFF); bytes[1] = (uint8_t)((val >> 8) & 0xFF); } static inline void decode_u32le(val uint32_t, uint8_t *bytes) { bytes[0] = (uint8_t)((val >> 0) & 0xFF); bytes[1] = (uint8_t)((val >> 8) & 0xFF); bytes[2] = (uint8_t)((val >> 16) & 0xFF); bytes[3] = (uint8_t)((val >> 24) & 0xFF); } static inline void decode_u64le(val uint64_t, uint8_t *bytes) { bytes[0] = (uint8_t)((val >> 0) & 0xFF); bytes[1] = (uint8_t)((val >> 8) & 0xFF); bytes[2] = (uint8_t)((val >> 16) & 0xFF); bytes[3] = (uint8_t)((val >> 24) & 0xFF); bytes[4] = (uint8_t)((val >> 32) & 0xFF); bytes[5] = (uint8_t)((val >> 40) & 0xFF); bytes[6] = (uint8_t)((val >> 48) & 0xFF); bytes[7] = (uint8_t)((val >> 56) & 0xFF); } """ # checksize ################################################################ ret += """ /* checksize ******************************************************************/ typedef bool (*_checksize_fn_t)(uint32_t net_len, uint8_t *net_bytes, uint32_t *mut_net_offset, size_t *mut_host_extra); static inline bool _checksize_list(size_t cnt, checksize_fn_t fn, size_t host_size, uint32_t net_len, uint8_t *net_bytes, uint32_t *mut_net_offset, size_t *mut_host_extra) { for (size_t i = 0; i < cnt; i++) if (__builtin_add_overflow(*mut_host_extra, host_size, mut_host_extra) || fn(net_len, net_bytes, mut_net_offset, mut_host_extra)) return true; return false; } static inline bool checksize_1(uint32_t net_len, uint8_t *net_bytes UNUSED, uint32_t *mut_net_offset, size_t *mut_host_extra UNUSED) { return __builtin_add_overflow(*mut_net_offset, 1, mut_net_offset) || net_len < *mut_net_offset; } static inline bool checksize_2(uint32_t net_len, uint8_t *net_bytes UNUSED, uint32_t *mut_net_offset, size_t *mut_host_extra UNUSED) { return __builtin_add_overflow(*mut_net_offset, 2, mut_net_offset) || net_len < *mut_net_offset; } static inline bool checksize_4(uint32_t net_len, uint8_t *net_bytes UNUSED, uint32_t *mut_net_offset, size_t *mut_host_extra UNUSED) { return __builtin_add_overflow(*mut_net_offset, 4, mut_net_offset) || net_len < *mut_net_offset; } static inline bool checksize_8(uint32_t net_len, uint8_t *net_bytes UNUSED, uint32_t *mut_net_offset, size_t *mut_host_extra UNUSED) { return __builtin_add_overflow(*mut_net_offset, 8, mut_net_offset) || net_len < *mut_net_offset; } """ for struct in structs + msgs: argattr = ' UNUSED' if len(struct.members) == 0 else '' ret += f"static inline bool checksize_{shortname(struct)}(uint32_t net_len{argattr}, uint8_t *net_bytes{argattr}, uint32_t *mut_net_offset{argattr}, size_t mut_host_extra{argattr}) {{" if len(struct.members) == 0: ret += "}\n" continue prefix0 = "\treturn " prefix1 = "\t || " prefix2 = "\t " prefix = prefix0 prev_size = 0 for member in struct.members: if isinstance(member.typ, List): ret += f"\n{prefix }_checksize_list(decode_u{prev_size*8}le(&net_bytes[(*mut_net_offset)-{prev_size}]), checksize_{shortname(member.typ.typ)}, sizeof({c_typename(member.typ.typ)})," ret += f"\n{prefix2} net_len, net_bytes, mut_net_offset, mut_host_extra)" else: ret += f"\n{prefix}checksize_{shortname(member.typ)}(net_len, net_bytes, mut_net_offset, mut_host_extra)" prefix = prefix1 if struct.name == 's': ret += f"\n{prefix}__builtin_add_overflow(*mut_host_extra, 1, mut_host_extra)" ret += ";\n}\n" # unmarshal ################################################################ ret += """ /* unmarshal ******************************************************************/ /* checksize_XXX() should be called before unmarshal_XXX(). */ static inline void unmarshal_1(uint32_t net_len, uint8_t *net_bytes UNUSED, uint32_t *mut_net_offset, void *mut_host_extra UNUSED, uint8_t *out) { *out = decode_u8le(&net_bytes[*mut_net_offset]); *mut_net_offset += 1; } static inline void unmarshal_2(uint32_t net_len, uint8_t *net_bytes UNUSED, uint32_t *mut_net_offset, void *mut_host_extra UNUSED, uint16_t *out) { *out = decode_u16le(&net_bytes[*mut_net_offset]); *mut_net_offset += 2; } static inline void unmarshal_4(uint32_t net_len, uint8_t *net_bytes UNUSED, uint32_t *mut_net_offset, void *mut_host_extra UNUSED, uint32_t *out) { *out = decode_u32le(&net_bytes[*mut_net_offset]); *mut_net_offset += 4; } static inline void unmarshal_8(uint32_t net_len, uint8_t *net_bytes UNUSED, uint32_t *mut_net_offset, void *mut_host_extra UNUSED, uint64_t *out) { *out = decode_u64le(&net_bytes[*mut_net_offset]); *mut_net_offset += 8; } """ for struct in structs + msgs: ret += f"static inline void unmarshal_{shortname(struct)}(uint32_t net_len, uint8_t *net_bytes, uint32_t *mut_net_offset, void *mut_host_extra, {c_typename(struct)} *out) {{" if len(struct.members) == 0: ret += "}\n" continue ret += "\n" for member in struct.members: if isinstance(member.typ, List): ret += f"\tout->{member.name} = mut_host_extra;\n" ret += f"\t*mut_host_extra += sizeof(out->{member.name}) * out->{member.typ.cnt};\n" ret += f"\tfor (typeof(out->{member.typ.cnt}) i = 0; i < out->{member.typ.cnt}; i++)\n" ret += f"\t\tunmarshal_{shortname(member.typ.typ)}(net_len, net_bytes, mut_net_offset, mut_host_extra, &(out->{member.name}[i]));\n" else: ret += f"\tunmarshal_{shortname(member.typ)}(net_len, net_bytes, mut_net_offset, mut_host_extra, &(out->{member.name}));\n" ret += "}\n" ############################################################################ return ret ################################################################################ if __name__ == "__main__": structs, msgs = parse_file("net9p_defs.txt") #print(gen_h(structs, msgs)) print(gen_c(structs, msgs))