diff options
Diffstat (limited to 'lib9p/defs.gen')
-rwxr-xr-x | lib9p/defs.gen | 456 |
1 files changed, 0 insertions, 456 deletions
diff --git a/lib9p/defs.gen b/lib9p/defs.gen deleted file mode 100755 index 1a8dc69..0000000 --- a/lib9p/defs.gen +++ /dev/null @@ -1,456 +0,0 @@ -#!/usr/bin/env python -# 9p/defs.gen - Generate C marshalers/unmarshalers for a .txt file -# defining a 9P protocol variant. -# -# Copyright (C) 2024 Luke T. Shumaker <lukeshu@lukeshu.com> -# SPDX-Licence-Identifier: AGPL-3.0-or-later - -import enum -import os.path -import re - -PROGRAM = "./9p/defs.gen" - -# Parse the *.txt ############################################################## - - -class Atom(enum.Enum): - u8 = 1 - u16 = 2 - u32 = 4 - u64 = 8 - - @property - def name(self) -> str: - return str(self.value) - - @property - def static_size(self) -> int: - return self.value - - -# `msgid/structname = "member1 member2..."` -# `structname = "member1 member2..."` -# `structname += "member1 member2..."` -class Struct: - msgid: int | None = None - name: str - members: list["Member"] - - @property - def static_size(self) -> int | None: - size = 0 - for member in self.members: - msize = member.static_size - if msize is None: - return None - size += msize - return size - - -# `cnt*(name[typ])` -# the `cnt*(...)` wrapper is optional -class Member: - cnt: str | None = None - name: str - typ: Atom | Struct - - @property - def static_size(self) -> int | None: - if self.cnt: - return None - return self.typ.static_size - - -re_membername = "(?:[a-zA-Z_][a-zA-Z_0-9]*)" -re_memberspec = ( - f"(?:(?P<cnt>{re_membername})\\*\\()?(?P<name>{re_membername})\\[(?P<typ>.*)\\]\\)?" -) - - -def parse_members( - env: dict[str, Atom | Struct], existing: list[Member], specs: str -) -> list[Member]: - ret = existing - for spec in specs.split(): - m = re.fullmatch(re_memberspec, spec) - if not m: - raise SyntaxError(f"invalid member spec {repr(spec)}") - - member = Member() - - member.name = m.group("name") - if any(x.name == member.name for x in ret): - raise ValueError(f"duplicate member name {repr(member.name)}") - - if m.group("typ") not in env: - raise NameError(f"Unknown type {repr(m.group(2))}") - member.typ = env[m.group("typ")] - - if cnt := m.group("cnt"): - if len(ret) == 0 or ret[-1].name != cnt: - raise ValueError(f"list count must be previous item: {repr(cnt)}") - if not isinstance(ret[-1].typ, Atom): - raise ValueError(f"list count must be an integer type: {repr(cnt)}") - member.cnt = cnt - - ret += [member] - return ret - - -re_version = r'version\s+"(?P<version>[^"]+)"' -re_structspec = ( - r'(?:(?P<msgid>[0-9]+)/)?(?P<name>\S+)\s*(?P<op>\+?=)\s*"(?P<members>[^"]*)"' -) -re_structspec_cont = r'"(?P<members>[^"]*)"' - - -def parse_file(filename: str) -> tuple[str, list[Struct]]: - version: str | None = None - env: dict[str, Atom | Struct] = { - "1": Atom.u8, - "2": Atom.u16, - "4": Atom.u32, - "8": Atom.u64, - } - with open(filename, "r") as fh: - prev: Struct | None = None - for line in fh: - line = line.split("#", 1)[0].strip() - if not line: - continue - if m := re.fullmatch(re_version, line): - if version: - raise SyntaxError("must have exactly 1 version line") - version = m.group("version") - elif m := re.fullmatch(re_structspec, line): - if m.group("op") == "+=" and m.group("msgid"): - raise SyntaxError("cannot += to a message that is not yet defined") - match m.group("op"): - case "=": - struct = Struct() - if m.group("msgid"): - struct.msgid = int(m.group("msgid")) - struct.name = m.group("name") - struct.members = parse_members(env, [], m.group("members")) - env[struct.name] = struct - prev = struct - case "+=": - if m.group("name") not in env: - raise NameError(f"Unknown type {repr(m.group('name'))}") - _struct = env[m.group("name")] - if not isinstance(_struct, Struct): - raise NameError( - f"Type {repr(m.group('name'))} is not a struct" - ) - struct = _struct - struct.members = parse_members( - env, struct.members, m.group("members") - ) - prev = struct - elif m := re.fullmatch(re_structspec_cont, line): - if not prev: - raise SyntaxError("continuation line must come after a struct line") - prev.members = parse_members(env, prev.members, m.group("members")) - else: - raise SyntaxError(f"invalid line {repr(line)}") - if not version: - raise SyntaxError("must have exactly 1 version line") - structs = [x for x in env.values() if isinstance(x, Struct)] - return version, structs - - -# Generate C ################################################################### - - -def c_typename(idprefix: str, typ: Atom | Struct) -> str: - match typ: - case Atom(): - return f"uint{typ.value*8}_t" - case Struct(): - if typ.msgid is not None: - return f"struct {idprefix}msg_{typ.name}" - return f"struct {idprefix}{typ.name}" - case _: - raise ValueError(f"not a type: {typ.__class__.__name__}") - - -def gen_h(txtname: str, idprefix: str, structs: list[Struct]) -> str: - guard = ( - f"_{txtname.replace('.txt', '.h').upper().replace('/', '_').replace('.', '_')}_" - ) - ret = f"""/* Generated by `{PROGRAM} {txtname}`. DO NOT EDIT! */ - -#ifndef {guard} -#define {guard} - -#define {idprefix.upper()}MIN_MSGLEN 7 -""" - ret += """ -/* non-message structs ********************************************************/ - -""" - for struct in structs: - if struct.msgid is not None: - continue - ret += c_typename(idprefix, struct) + " {\n" - typewidth = max( - len(c_typename(idprefix, member.typ)) for member in struct.members - ) - for member in struct.members: - ret += f"\t{c_typename(idprefix, member.typ).ljust(typewidth)} {'*' if member.cnt else ' '}{member.name};\n" - ret += "};\n" - - ret += """ -/* message types **************************************************************/ - -""" - ret += f"enum {idprefix}msg_type {{ /* uint8_t */\n" - namewidth = max(len(msg.name) for msg in structs if msg.msgid is not None) - for msg in structs: - if msg.msgid is None: - continue - ret += f"\t{idprefix.upper()}TYP_{msg.name.ljust(namewidth)} = {msg.msgid},\n" - ret += "};\n" - - ret += """ -/* message structs ************************************************************/ - -""" - for msg in structs: - if msg.msgid is None: - continue - if not msg.members: - ret += c_typename(idprefix, msg) + " {};\n" - continue - ret += c_typename(idprefix, msg) + " {\n" - typewidth = max(len(c_typename(idprefix, member.typ)) for member in msg.members) - for member in msg.members: - ret += f"\t{c_typename(idprefix, member.typ).ljust(typewidth)} {'*' if member.cnt else ' '}{member.name};\n" - ret += "};\n" - - ret += f""" -/* tables *********************************************************************/ - -#define _P9_TYPECODE_FOR_CTYPE(msg) """ - for msg in structs: - if msg.msgid is None: - continue - ret += f", \\\n\t\t{c_typename(idprefix, msg)}: {idprefix.upper()}TYP_{msg.name}" - ret += "\n" - - ret += "\n" - ret += f"#endif /* {guard} */\n" - return ret - - -def gen_c(txtname: str, idprefix: str, structs: list[Struct]) -> str: - txtdir, txtbase = os.path.split(txtname) - header = os.path.join(txtdir, "defs-" + txtbase.replace(".txt", ".h")) - ret = f"""/* Generated by `{PROGRAM} {txtname}`. DO NOT EDIT! */ - -#include <stdbool.h> -#include <stdint.h> -#include <stdlib.h> /* for malloc() */ - -#include "{header}" -#include "9p/internal.h" -""" - - def used(arg: str) -> str: - return arg - - def unused(arg: str) -> str: - return f"UNUSED({arg})" - - # checksize_* ############################################################## - ret += """ -/* checksize_* ****************************************************************/ - -static inline bool _checksize_list(size_t cnt, _checksize_fn_t fn, size_t host_size, - uint32_t net_len, uint8_t *net_bytes, uint32_t *mut_net_offset, size_t *mut_host_extra) { - for (size_t i = 0; i < cnt; i++) - if (__builtin_add_overflow(*mut_host_extra, host_size, mut_host_extra) - || fn(net_len, net_bytes, mut_net_offset, mut_host_extra)) - return true; - return false; -} - -static inline bool checksize_1(uint32_t net_len, uint8_t *UNUSED(net_bytes), uint32_t *mut_net_offset, size_t *UNUSED(mut_host_extra)) { - return __builtin_add_overflow(*mut_net_offset, 1, mut_net_offset) || net_len < *mut_net_offset; -} -static inline bool checksize_2(uint32_t net_len, uint8_t *UNUSED(net_bytes), uint32_t *mut_net_offset, size_t *UNUSED(mut_host_extra)) { - return __builtin_add_overflow(*mut_net_offset, 2, mut_net_offset) || net_len < *mut_net_offset; -} -static inline bool checksize_4(uint32_t net_len, uint8_t *UNUSED(net_bytes), uint32_t *mut_net_offset, size_t *UNUSED(mut_host_extra)) { - return __builtin_add_overflow(*mut_net_offset, 4, mut_net_offset) || net_len < *mut_net_offset; -} -static inline bool checksize_8(uint32_t net_len, uint8_t *UNUSED(net_bytes), uint32_t *mut_net_offset, size_t *UNUSED(mut_host_extra)) { - return __builtin_add_overflow(*mut_net_offset, 8, mut_net_offset) || net_len < *mut_net_offset; -} - -""" - for struct in structs: - inline = ' inline' if struct.msgid is None else '' - argfn = used if struct.members else unused - ret += f"static{inline} bool checksize_{struct.name}(uint32_t {argfn('net_len')}, uint8_t *{argfn('net_bytes')}, uint32_t *{argfn('mut_net_offset')}, size_t *{argfn('mut_host_extra')}) {{" - if len(struct.members) == 0: - ret += "\n\treturn false;\n" - ret += "}\n" - continue - prefix0 = "\treturn " - prefix1 = "\t || " - prefix2 = "\t " - prefix = prefix0 - prev_size: int | None = None - for member in struct.members: - if member.cnt is not None: - assert prev_size - ret += f"\n{prefix }_checksize_list(decode_u{prev_size*8}le(&net_bytes[(*mut_net_offset)-{prev_size}]), checksize_{member.typ.name}, sizeof({c_typename(idprefix, member.typ)})," - ret += f"\n{prefix2} net_len, net_bytes, mut_net_offset, mut_host_extra)" - else: - ret += f"\n{prefix}checksize_{member.typ.name}(net_len, net_bytes, mut_net_offset, mut_host_extra)" - prefix = prefix1 - prev_size = member.static_size - if struct.name == "s": - ret += ( - f"\n{prefix}__builtin_add_overflow(*mut_host_extra, 1, mut_host_extra)" - ) - ret += ";\n}\n" - - # unmarshal_* ############################################################## - ret += """ -/* unmarshal_* ****************************************************************/ -/* checksize_XXX() should be called before unmarshal_XXX(). */ - -static inline bool unmarshal_1(uint8_t *net_bytes, uint32_t *mut_net_offset, void **UNUSED(mut_host_extra), uint8_t *out) { - *out = decode_u8le(&net_bytes[*mut_net_offset]); - *mut_net_offset += 1; - return false; -} -static inline bool unmarshal_2(uint8_t *net_bytes, uint32_t *mut_net_offset, void **UNUSED(mut_host_extra), uint16_t *out) { - *out = decode_u16le(&net_bytes[*mut_net_offset]); - *mut_net_offset += 2; - return false; -} -static inline bool unmarshal_4(uint8_t *net_bytes, uint32_t *mut_net_offset, void **UNUSED(mut_host_extra), uint32_t *out) { - *out = decode_u32le(&net_bytes[*mut_net_offset]); - *mut_net_offset += 4; - return false; -} -static inline bool unmarshal_8(uint8_t *net_bytes, uint32_t *mut_net_offset, void **UNUSED(mut_host_extra), uint64_t *out) { - *out = decode_u64le(&net_bytes[*mut_net_offset]); - *mut_net_offset += 8; - return false; -} - -""" - for struct in structs: - argfn = used if struct.members else unused - ret += f"static inline bool unmarshal_{struct.name}(uint8_t *{argfn('net_bytes')}, uint32_t *{argfn('mut_net_offset')}, void **{argfn('mut_host_extra')}, {c_typename(idprefix, struct)} *{argfn('out')}) {{\n" - for member in struct.members: - if member.cnt: - ret += f"\tout->{member.name} = *mut_host_extra;\n" - ret += f"\t*mut_host_extra += sizeof(*out->{member.name}) * out->{member.cnt};\n" - ret += f"\tfor (typeof(out->{member.cnt}) i = 0; i < out->{member.cnt}; i++)\n" - ret += f"\t\tif (unmarshal_{member.typ.name}(net_bytes, mut_net_offset, mut_host_extra, &(out->{member.name}[i])))\n" - ret += f"\t\t\treturn true;\n" - if struct.name == "s": - ret += f"\tif (!is_valid_utf8_without_nul(out->{member.name}, out->{member.cnt}))\n" - ret += f"\t\treturn true;\n" - ret += f"\tout->{member.name}[out->{member.cnt}] = '\\0';\n" - else: - ret += f"\tif (unmarshal_{member.typ.name}(net_bytes, mut_net_offset, mut_host_extra, &(out->{member.name})))\n" - ret += f"\t\treturn true;\n" - ret += "\treturn false;\n" - ret += "}\n" - - # marshal_* ################################################################ - ret += """ -/* marshal_* ******************************************************************/ - -static inline bool marshal_1(struct p9_ctx *ctx, uint8_t val, uint8_t *out_net_bytes, uint32_t *mut_net_offset) { - if (*mut_net_offset + 1 > ctx->max_msg_size) - return true; - out_net_bytes[*mut_net_offset] = val; - *mut_net_offset += 1; - return false; -} -static inline bool marshal_2(struct p9_ctx *ctx, uint16_t val, uint8_t *out_net_bytes, uint32_t *mut_net_offset) { - if (*mut_net_offset + 2 > ctx->max_msg_size) - return true; - encode_u16le(val, &out_net_bytes[*mut_net_offset]); - *mut_net_offset += 2; - return false; -} -static inline bool marshal_4(struct p9_ctx *ctx, uint32_t val, uint8_t *out_net_bytes, uint32_t *mut_net_offset) { - if (*mut_net_offset + 4 > ctx->max_msg_size) - return true; - encode_u32le(val, &out_net_bytes[*mut_net_offset]); - *mut_net_offset += 4; - return false; -} -static inline bool marshal_8(struct p9_ctx *ctx, uint64_t val, uint8_t *out_net_bytes, uint32_t *mut_net_offset) { - if (*mut_net_offset + 8 > ctx->max_msg_size) - return true; - encode_u64le(val, &out_net_bytes[*mut_net_offset]); - *mut_net_offset += 8; - return false; -} -""" - for struct in structs: - argfn = used if struct.members else unused - ret += f"static inline bool marshal_{struct.name}(struct p9_ctx *{argfn('ctx')}, {c_typename(idprefix, struct)} {argfn('val')}, uint8_t *{argfn('out_net_bytes')}, uint32_t *{argfn('mut_net_offset')}) {{\n" - for member in struct.members: - if member.cnt: - ret += f"\tfor (typeof(val.{member.cnt}) i = 0; i < val.{member.cnt}; i++)\n" - ret += f"\t\tif (marshal_{member.typ.name}(ctx, val.{member.name}[i], out_net_bytes, mut_net_offset))\n" - ret += f"\t\t\treturn true;\n" - else: - ret += f"\tif (marshal_{member.typ.name}(ctx, val.{member.name}, out_net_bytes, mut_net_offset))\n" - ret += f"\t\treturn true;\n" - ret += "\treturn false;\n" - ret += "}\n" - - # tables ################################################################### - ret += """ -/* tables *********************************************************************/ - -""" - for msg in structs: - if msg.msgid is None: - continue - ret += f"static bool _unmarshal_{msg.name}(uint8_t *net_bytes, uint32_t *mut_net_offset, void **mut_host_extra, void *out) {{ return unmarshal_{msg.name}(net_bytes, mut_net_offset, mut_host_extra, ({c_typename(idprefix, msg)} *)out); }}\n" - for msg in structs: - if msg.msgid is None: - continue - ret += f"static bool _marshal_{msg.name}(struct p9_ctx *ctx, void *val, uint8_t *out_net_bytes, uint32_t *mut_net_offset) {{ return marshal_{msg.name}(ctx, *(({c_typename(idprefix, msg)} *)val), out_net_bytes, mut_net_offset); }}\n" - ret += "struct version version_9P2000 = {\n" - ret += "\t.msgs = {\n" - for msg in structs: - if msg.msgid is None: - continue - ret += f"\t\t[{idprefix.upper()}TYP_{msg.name}] = {{ .unmarshal_basesize=sizeof({c_typename(idprefix, msg)}), .unmarshal_extrasize=checksize_{msg.name}, .unmarshal=_unmarshal_{msg.name}, .marshal=_marshal_{msg.name} }},\n" - ret += "\t},\n" - ret += "};\n" - - ############################################################################ - return ret - - -################################################################################ - -if __name__ == "__main__": - import sys - - for txtname in sys.argv[1:]: - txtdir, txtbase = os.path.split(txtname) - version, structs = parse_file(txtname) - with open( - os.path.join(txtdir, "defs-" + txtbase.replace(".txt", ".h")), "w" - ) as fh: - fh.write(gen_h(txtname, "p9_", structs)) - with open( - os.path.join(txtdir, "defs-" + txtbase.replace(".txt", ".c")), "w" - ) as fh: - fh.write(gen_c(txtname, "p9_", structs)) |