diff options
Diffstat (limited to 'lib9p/idl')
-rw-r--r-- | lib9p/idl/0000-README.md | 63 | ||||
-rw-r--r-- | lib9p/idl/0000-TODO.md | 1 | ||||
-rw-r--r-- | lib9p/idl/1992-9P0.9p.wip | 17 | ||||
-rw-r--r-- | lib9p/idl/2002-9P2000.9p | 21 | ||||
-rw-r--r-- | lib9p/idl/2005-9P2000.u.9p | 2 | ||||
-rw-r--r-- | lib9p/idl/2010-9P2000.L.9p | 48 | ||||
-rw-r--r-- | lib9p/idl/__init__.py | 486 |
7 files changed, 428 insertions, 210 deletions
diff --git a/lib9p/idl/0000-README.md b/lib9p/idl/0000-README.md index e19a1e8..84cf865 100644 --- a/lib9p/idl/0000-README.md +++ b/lib9p/idl/0000-README.md @@ -17,35 +17,62 @@ client->server requests, and R-messages are server->client responses type of a message is represented by a u8 ID; T-messages are even and R-messages are odd. -Messages are made up of the primitives; unsigned little-endian -integers, identified with the following single-character mnemonics: +9P messages are exchanged over a reliable bidirectional in-order octet +stream. Messages are made up of the primitives; unsigned +little-endian integers, identified with the following single-character +mnemonics: - 1 = u8 - 2 = u16le - 4 = u32le - 8 = u16le -Out of these primitives, we can make other numeric types, +Out of these primitives, we can make more complex types: + +## User-defined types + +### Numeric types num NUMNAME = PRIMITIVE_TYPE + "NAME=VAL"... + +Besides just being an alias for a primitive type, a numeric type may +define 0 or more named constants of that type, each wrapped in +"quotes". + +### Bitfields -bitfields, + bitfield BFNAME = PRIMITIVE_TYPE + "bit NBIT=NAME"... + "bit NBIT=reserved(NAME)"... + "bit NBIT=num(NUMNAME)"... + "alias NAME=VAL"... + "mask NAME=VAL"... + "num(NUMNAME) NAME=VAL"... - bitfield BFNAME = PRIMITIVE_TYPE "NBIT=NAME... ALIAS=VAL..." +The same NBIT may not be defined multiple times. The same NAME may +not be defined multiple times. -structures, + - A `reserved(...)` bit indicates that the bit is named but is not + allowed to be used. + - `num(...)` bits embed a numeric/enumerated field within a set of + bits. Once several bits have been allocated to a numeric field + with `bit NBIT=num(NUMNAME)`, constant values for that field may be + declared with `num(NUMNAME) NAME=VAL`. For each numeric field, a + `mask NUMNAME=BITMASK` is automatically declared. + - A `mask` defines a bitmask that selects several bits. + - An `alias` defines a convenience alias for a bit or set of bits. + +### Structures struct STRUCTNAME = "FIELDNAME[FIELDTYPE]..." -and messages (which are a special-case of structures). +Or a special-case for structs that are messages; `msg` has the same +syntax as `struct`, but has restrictions on the STRUCTNAME and the +first 3 fields must all be declared in the same way: msg Tname = "size[4,val=end-&size] typ[1,val=TYP] tag[tag] REST..." -Bitfield bit names may be wrapped in `reserved(...)` or -`subfield(...)`; reserved indicates that the bit is named but is not -allowed to be used, and subfield indicates that the bit is part of a -num/enum that is handled by an alias. - Struct fields that have numeric types (either primitives or `num` types) can add to their type `,val=` and/or `,max=` to specify what the exact value must be and/or what the maximum (inclusive) value is. @@ -59,11 +86,13 @@ can be - `&fieldname` to refer to the offset of a field name in that struct, - the special value `end` to refer to the offset of the end of the struct, - - the special value `s32_max` to refer to the constant value - `(1<<31)-1`, or - - the special value `s64_max` to refer to the constant value - `(1<<63)-1` + - the special value `u{8,16,32,64}_max` to refer to the constant + value `(1<<{n})-1`, or + - the special value `s{8,16,32,64}_max` to refer to the constant value + `(1<<({n}-1))-1`. + +## Parser A parser for this syntax is given in `__init__.py`. However, `__init__.py` places the somewhat arbitrary undocumented restrictions -on fields referenced as the count for a repeated field. +on fields referenced as the count of a repeated field. diff --git a/lib9p/idl/0000-TODO.md b/lib9p/idl/0000-TODO.md index 5079a1d..e52902f 100644 --- a/lib9p/idl/0000-TODO.md +++ b/lib9p/idl/0000-TODO.md @@ -9,4 +9,3 @@ - Decide how to handle duplicate type names from different versions - Decide how to handle duplicate `enum lib9p_msg_type` names and values -- Clean up the iterate-over-all-msgids-in-a-version code diff --git a/lib9p/idl/1992-9P0.9p.wip b/lib9p/idl/1992-9P0.9p.wip index 360f320..a434ba2 100644 --- a/lib9p/idl/1992-9P0.9p.wip +++ b/lib9p/idl/1992-9P0.9p.wip @@ -37,8 +37,8 @@ struct errstr = "64*(txt[1])" # "O"pen flags (flags to pass to Topen and Tcreate) # Unused bits are *ignored*. bitfield o = 1 - "bit 0=subfield(mode)" # low bit of the 2-bit READ/WRITE/RDWR/EXEC enum - "bit 1=subfield(mode)" # high bit of the 2-bit READ/WRITE/RDWR/EXEC enum + "bit 0=num(MODE)" # low bit of the 2-bit READ/WRITE/RDWR/EXEC enum + "bit 1=num(MODE)" # high bit of the 2-bit READ/WRITE/RDWR/EXEC enum #"bit 2=unused" #"bit 3=unused" "bit 4=TRUNC" @@ -46,13 +46,12 @@ bitfield o = 1 "bit 6=RCLOSE" # remove-on-close #"bit 7=unused" - "alias READ = 0" # make available for this FID: Tread() - "alias WRITE = 1" # make available for this FID: Twrite() - "alias RDWR = 2" # make available for this FID: Tread() and Twrite() - "alias EXEC = 3" # make available for this FID: Tread() + "num(MODE) READ = 0" # make available for this FID: Tread() + "num(MODE) WRITE = 1" # make available for this FID: Twrite() + "num(MODE) RDWR = 2" # make available for this FID: Tread() and Twrite() + "num(MODE) EXEC = 3" # make available for this FID: Tread() - "alias MODE_MASK = 0b00000011" - "alias FLAG_MASK = 0b11111100" + "mask FLAG = 0b11111100" # "CH"annel flags - file permissions and attributes (a "channel" is # what a file handle is called inside of the Plan 9 kernel). @@ -71,7 +70,7 @@ bitfield ch = 4 "bit 1=OTHER_W" "bit 0=OTHER_X" - "alias PERM_MASK=0777" # {OWNER,GROUP,OTHER}_{R,W,X} + "mask PERM=0777" # {OWNER,GROUP,OTHER}_{R,W,X} struct stat = "file_name[name]" "file_owner[name]" diff --git a/lib9p/idl/2002-9P2000.9p b/lib9p/idl/2002-9P2000.9p index 204b352..36a2427 100644 --- a/lib9p/idl/2002-9P2000.9p +++ b/lib9p/idl/2002-9P2000.9p @@ -22,11 +22,11 @@ version "9P2000" # tag - identify a request/response pair num tag = 2 - "NOTAG = ~0" + "NOTAG = u16_max" # file identifier - like a UNIX file-descriptor num fid = 4 - "NOFID = ~0" + "NOFID = u32_max" # string - u16le `n`, then `n` bytes of UTF-8, without any nul-bytes struct s = "len[2] len*(utf8[1])" @@ -56,7 +56,7 @@ bitfield dm = 4 "bit 1=OTHER_W" "bit 0=OTHER_X" - "alias PERM_MASK=0777" # {OWNER,GROUP,OTHER}_{R,W,X} + "mask PERM=0777" # {OWNER,GROUP,OTHER}_{R,W,X} # QID Type - see `struct qid` below bitfield qt = 1 @@ -107,8 +107,8 @@ struct stat = "stat_size[2,val=end-&kern_type]" # "O"pen flags (flags to pass to Topen and Tcreate) # Unused bits *must* be 0. bitfield o = 1 - "bit 0=subfield(mode)" # low bit of the 2-bit READ/WRITE/RDWR/EXEC enum - "bit 1=subfield(mode)" # high bit of the 2-bit READ/WRITE/RDWR/EXEC enum + "bit 0=num(MODE)" # low bit of the 2-bit READ/WRITE/RDWR/EXEC enum + "bit 1=num(MODE)" # high bit of the 2-bit READ/WRITE/RDWR/EXEC enum #"bit 2=unused" #"bit 3=unused" "bit 4=TRUNC" @@ -116,13 +116,12 @@ bitfield o = 1 "bit 6=RCLOSE" # remove-on-close #"bit 7=unused" - "alias READ = 0" # make available for this FID: Tread() - "alias WRITE = 1" # make available for this FID: Twrite() - "alias RDWR = 2" # make available for this FID: Tread() and Twrite() - "alias EXEC = 3" # make available for this FID: Tread() + "num(MODE) READ = 0" # make available for this FID: Tread() + "num(MODE) WRITE = 1" # make available for this FID: Twrite() + "num(MODE) RDWR = 2" # make available for this FID: Tread() and Twrite() + "num(MODE) EXEC = 3" # make available for this FID: Tread() - "alias MODE_MASK = 0b00000011" - "alias FLAG_MASK = 0b11111100" + "mask FLAG = 0b11111100" # A 9P2000 session goes: # diff --git a/lib9p/idl/2005-9P2000.u.9p b/lib9p/idl/2005-9P2000.u.9p index 72cc662..6c2f2dc 100644 --- a/lib9p/idl/2005-9P2000.u.9p +++ b/lib9p/idl/2005-9P2000.u.9p @@ -12,7 +12,7 @@ from ./2002-9P2000.9p import * # numeric user ID num nuid = 4 - "NONUID = ~0" + "NONUID = u32_max" num errno = 4 "NOERROR = 0" diff --git a/lib9p/idl/2010-9P2000.L.9p b/lib9p/idl/2010-9P2000.L.9p index 56fcd0d..d81a15b 100644 --- a/lib9p/idl/2010-9P2000.L.9p +++ b/lib9p/idl/2010-9P2000.L.9p @@ -39,8 +39,8 @@ num super_magic = 4 # protocol.h (and are different than the Linux kernel's values, which # vary by architecture). bitfield lo = 4 - "bit 0=subfield(mode)" # low bit of the 2-bit RDONLY/WRONLY/RDWR/NOACCESS enum - "bit 1=subfield(mode)" # high bit of the 2-bit RDONLY/WRONLY/RDWR/NOACCESS enum + "bit 0=num(MODE)" # low bit of the 2-bit RDONLY/WRONLY/RDWR/NOACCESS enum + "bit 1=num(MODE)" # high bit of the 2-bit RDONLY/WRONLY/RDWR/NOACCESS enum #"bit 2=unused" #"bit 3=unused" #"bit 4=unused" @@ -61,13 +61,12 @@ bitfield lo = 4 "bit 19=CLOEXEC" "bit 20=SYNC" - "alias RDONLY = 0" - "alias WRONLY = 1" - "alias RDWR = 2" - "alias NOACCESS = 3" + "num(MODE) RDONLY = 0" + "num(MODE) WRONLY = 1" + "num(MODE) RDWR = 2" + "num(MODE) NOACCESS = 3" - "alias MODE_MASK = 0b000000000000000000011" - "alias FLAG_MASK = 0b111111111111111000000" + "mask FLAG = 0b111111111111111000000" # "D"irentry "T"ype # @@ -77,11 +76,11 @@ num dt = 1 "PIPE = 1" "CHAR_DEV = 2" "DIRECTORY = 4" - "BLOCK_DEV = 6" + "BLOCK_DEV = 6" # proof it's not a bitfield "REGULAR = 8" - "SYMLINK = 10" - "SOCKET = 12" - "_WHITEOUT = 14" + "SYMLINK = 10" # proof it's not a bitfield + "SOCKET = 12" # proof it's not a bitfield + "_WHITEOUT = 14" # proof it's not a bitfield # Mode # @@ -89,10 +88,10 @@ num dt = 1 # instead of just 16? Who knows? bitfield mode = 4 #... - "bit 15=subfield(fmt)" # bit of the 4-bit FMT_ enum - "bit 14=subfield(fmt)" # bit of the 4-bit FMT_ enum - "bit 13=subfield(fmt)" # bit of the 4-bit FMT_ enum - "bit 12=subfield(fmt)" # bit of the 4-bit FMT_ enum + "bit 15=num(FMT)" # bit of the 4-bit FMT_ enum + "bit 14=num(FMT)" # bit of the 4-bit FMT_ enum + "bit 13=num(FMT)" # bit of the 4-bit FMT_ enum + "bit 12=num(FMT)" # bit of the 4-bit FMT_ enum #... "bit 11=PERM_SETGROUP" "bit 10=PERM_SETUSER" @@ -107,16 +106,15 @@ bitfield mode = 4 "bit 1=PERM_OTHER_W" "bit 0=PERM_OTHER_X" - "alias FMT_PIPE = LIB9P_DT_PIPE<<12" - "alias FMT_CHAR_DEV = LIB9P_DT_CHAR_DEV<<12" - "alias FMT_DIRECTORY = LIB9P_DT_DIRECTORY<<12" - "alias FMT_BLOCK_DEV = LIB9P_DT_BLOCK_DEV<<12" - "alias FMT_REGULAR = LIB9P_DT_REGULAR<<12" - "alias FMT_SYMLINK = LIB9P_DT_SYMLINK<<12" - "alias FMT_SOCKET = LIB9P_DT_SOCKET<<12" + "num(FMT) PIPE = dt.PIPE<<12" + "num(FMT) CHAR_DEV = dt.CHAR_DEV<<12" + "num(FMT) DIRECTORY = dt.DIRECTORY<<12" + "num(FMT) BLOCK_DEV = dt.BLOCK_DEV<<12" + "num(FMT) REGULAR = dt.REGULAR<<12" + "num(FMT) SYMLINK = dt.SYMLINK<<12" + "num(FMT) SOCKET = dt.SOCKET<<12" - "alias PERM_MASK = 0000777" # PERM_* - "alias FMT_MASK = 0170000" # _fmt_* + "mask PERM = 07777" # PERM_* # A boolean value that is for some reason 4 bytes wide. num b4 = 4 diff --git a/lib9p/idl/__init__.py b/lib9p/idl/__init__.py index 78246d2..2d09217 100644 --- a/lib9p/idl/__init__.py +++ b/lib9p/idl/__init__.py @@ -15,14 +15,27 @@ __all__ = [ # types "Type", "Primitive", + *["Expr", "ExprTok", "ExprOp", "ExprLit", "ExprSym", "ExprOff", "ExprNum"], "Number", - *["Bitfield", "Bit", "BitCat", "BitAlias"], - *["Struct", "StructMember", "Expr", "ExprOp", "ExprSym", "ExprLit"], + *["Bitfield", "Bit", "BitCat", "BitNum", "BitAlias"], + *["Struct", "StructMember"], "Message", ] # The syntax that this parses is described in `./0000-README.md`. +# Utilities #################################################################### + + +def get_type(env: dict[str, "Type"], name: str, tc: type["T"]) -> "T": + if name not in env: + raise NameError(f"Unknown type {name!r}") + ret = env[name] + if (not isinstance(ret, tc)) or (ret.__class__.__name__ != tc.__name__): + raise NameError(f"Type {ret.typname!r} is not a {tc.__name__}") + return ret + + # Types ######################################################################## @@ -51,13 +64,129 @@ class Primitive(enum.Enum): return self.value +class ExprOp: + op: typing.Literal["-", "+", "<<"] + + def __init__(self, op: typing.Literal["-", "+", "<<"]) -> None: + self.op = op + + +class ExprLit: + val: int + + def __init__(self, val: int) -> None: + self.val = val + + +class ExprSym: + symname: str + + def __init__(self, name: str) -> None: + self.symname = name + + +class ExprOff: + membname: str + + def __init__(self, name: str) -> None: + self.membname = name + + +class ExprNum: + numname: str + valname: str + + def __init__(self, numname: str, valname: str) -> None: + self.numname = numname + self.valname = valname + + +type ExprTok = ExprOp | ExprLit | ExprSym | ExprOff | ExprNum + + +class Expr: + tokens: typing.Sequence[ExprTok] + const: int | None + + def __init__( + self, env: dict[str, "Type"], tokens: typing.Sequence[ExprTok] = () + ) -> None: + self.tokens = tokens + self.const = self._const(env, tokens) + + def _const( + self, env: dict[str, "Type"], toks: typing.Sequence[ExprTok] + ) -> int | None: + if not toks: + return None + + def read_val() -> int | None: + nonlocal toks + assert toks + neg = False + match toks[0]: + case ExprOp(op="-"): + neg = True + toks = toks[1:] + assert not isinstance(toks[0], ExprOp) + val: int + match toks[0]: + case ExprLit(): + val = toks[0].val + case ExprSym(): + if m := re.fullmatch(r"^u(8|16|32|64)_max$", toks[0].symname): + n = int(m.group(1)) + val = (1 << n) - 1 + elif m := re.fullmatch(r"^s(8|16|32|64)_max$", toks[0].symname): + n = int(m.group(1)) + val = (1 << (n - 1)) - 1 + else: + return None + case ExprOff(): + return None + case ExprNum(): + num = get_type(env, toks[0].numname, Number) + if toks[0].valname not in num.vals: + raise NameError( + f"Type {toks[0].numname!r} does not have a value {toks[0].valname!r}" + ) + _val = num.vals[toks[0].valname].const + if _val is None: + return None + val = _val + toks = toks[1:] + return -val if neg else val + + ret = read_val() + if ret is None: + return None + while toks: + assert isinstance(toks[0], ExprOp) + op = toks[0].op + toks = toks[1:] + operand = read_val() + if operand is None: + return None + match op: + case "+": + ret = ret + operand + case "-": + ret = ret - operand + case "<<": + ret = ret << operand + return ret + + def __bool__(self) -> bool: + return len(self.tokens) > 0 + + class Number: typname: str in_versions: set[str] prim: Primitive - vals: dict[str, str] + vals: dict[str, Expr] def __init__(self) -> None: self.in_versions = set() @@ -74,11 +203,31 @@ class Number: return self.static_size -class BitCat(enum.Enum): - UNUSED = 1 - USED = 2 - RESERVED = 3 - SUBFIELD = 4 +class BitAlias: + bitname: str + in_versions: set[str] + val: Expr + + def __init__(self, name: str, val: Expr) -> None: + if val.const is None: + raise ValueError(f"{name!r} value is not constant") + self.bitname = name + self.in_versions = set() + self.val = val + + +class BitNum: + numname: str + mask: int + vals: dict[str, BitAlias] + + def __init__(self, name: str) -> None: + self.numname = name + self.mask = 0 + self.vals = {} + + +type BitCat = typing.Literal["UNUSED", "USED", "RESERVED"] | BitNum class Bit: @@ -91,33 +240,32 @@ class Bit: self.bitname = "" self.in_versions = set() self.num = num - self.cat = BitCat.UNUSED - - -class BitAlias: - bitname: str - in_versions: set[str] - val: str # FIXME: Don't have bitfield aliases be raw C expressions - - def __init__(self, name: str, val: str) -> None: - self.bitname = name - self.in_versions = set() - self.val = val + self.cat = "UNUSED" class Bitfield: typname: str in_versions: set[str] prim: Primitive + bits: list[Bit] - names: dict[str, Bit | BitAlias] + nums: dict[str, BitNum] + masks: dict[str, BitAlias] + aliases: dict[str, BitAlias] + + names: set[str] def __init__(self, name: str, prim: Primitive) -> None: self.typname = name self.in_versions = set() self.prim = prim + self.bits = [Bit(i) for i in range(prim.static_size * 8)] - self.names = {} + self.nums = {} + self.masks = {} + self.aliases = {} + + self.names = set() @property def static_size(self) -> int: @@ -130,37 +278,6 @@ class Bitfield: return self.static_size -class ExprLit: - val: int - - def __init__(self, val: int) -> None: - self.val = val - - -class ExprSym: - symname: str - - def __init__(self, name: str) -> None: - self.symname = name - - -class ExprOp: - op: typing.Literal["-", "+"] - - def __init__(self, op: typing.Literal["-", "+"]) -> None: - self.op = op - - -class Expr: - tokens: list[ExprLit | ExprSym | ExprOp] - - def __init__(self) -> None: - self.tokens = [] - - def __bool__(self) -> bool: - return len(self.tokens) > 0 - - class StructMember: # from left-to-right when parsing cnt: "StructMember| int | None" = None @@ -197,21 +314,12 @@ class StructMember: raise ValueError(f"list count may not have ,val=: {self.cnt.membname!r}") if self.cnt.max: # TODO: be more flexible? - if len(self.cnt.max.tokens) != 1: + val = self.cnt.max.const + if val is None: raise ValueError( - f"list count ,max= may only have 1 token: {self.cnt.membname!r}" + f"list count ,max= must be a constant value: {self.cnt.membname!r}" ) - match tok := self.cnt.max.tokens[0]: - case ExprLit(): - return tok.val - case ExprSym(symname="s32_max"): - return (1 << 31) - 1 - case ExprSym(symname="s64_max"): - return (1 << 63) - 1 - case _: - raise ValueError( - f'list count ,max= only allows literal, "s32_max", and "s64_max" tokens: {self.cnt.membname!r}' - ) + return val return (1 << (self.cnt.typ.value * 8)) - 1 @property @@ -283,6 +391,8 @@ T = typing.TypeVar("T", Number, Bitfield, Struct, Message) # Parse ######################################################################## +# common elements ###################### + re_priname = "(?:1|2|4|8)" # primitive names re_symname = "(?:[a-zA-Z_][a-zA-Z_0-9]*)" # "symbol" names; most *.9p-defined names re_symname_u = "(?:[A-Z_][A-Z_0-9]*)" # upper-case "symbol" names; bit names @@ -292,99 +402,186 @@ re_msgname = r"(?:[TR][a-zA-Z_0-9]*)" # names a message can be re_memtype = f"(?:{re_symname}|{re_priname})" # typenames that a struct member can be -re_expr = f"(?:(?:-|\\+|[0-9]+|&?{re_symname})+)" +valid_syms = [ + "end", + "u8_max", + "u16_max", + "u32_max", + "u64_max", + "s8_max", + "s16_max", + "s32_max", + "s64_max", +] -re_numspec = f"(?P<name>{re_symname})\\s*=\\s*(?P<val>\\S+)" +_re_expr_op = r"(?:-|\+|<<)" -re_bitspec_bit = ( - "bit\\s+(?P<bitnum>[0-9]+)\\s*=\\s*(?:" +_res_expr_val = { + "lit_2": r"0b[01]+", + "lit_8": r"0[0-7]+", + "lit_10": r"0(?![0-9bxX])|[1-9][0-9]*", + "lit_16": r"0[xX][0-9a-fA-F]+", + "sym": "|".join(valid_syms), # pre-defined symbols + "off": f"&{re_symname}", # offset of a field this struct + "num": f"{re_symname}\\.{re_symname}", # `num` values +} + +re_expr_tok = ( + "(?:" + "|".join( [ - f"(?P<name_used>{re_symname_u})", - f"reserved\\((?P<name_reserved>{re_symname_u})\\)", - f"subfield\\((?P<name_subfield>{re_symname_l})\\)", + f"(?P<op>{_re_expr_op})", + *[f"(?P<{k}>{v})" for k, v in _res_expr_val.items()], ] ) + ")" ) -re_bitspec_alias = f"alias\\s+(?P<name>{re_symname_u})\\s*=\\s*(?P<val>\\S+)" -re_memberspec = f"(?:(?P<cnt>{re_symname}|[1-9][0-9]*)\\*\\()?(?P<name>{re_symname})\\[(?P<typ>{re_memtype})(?:,max=(?P<max>{re_expr})|,val=(?P<val>{re_expr}))*\\]\\)?" +_re_expr_val = "(?:" + "|".join(_res_expr_val.values()) + ")" + +re_expr = f"(?:\\s*(?:-\\s*)?{_re_expr_val}\\s*(?:{_re_expr_op}\\s*(?:-\\s*)?{_re_expr_val}\\s*)*)" + + +def parse_expr(env: dict[str, Type], expr: str) -> Expr: + assert re.fullmatch(re_expr, expr) + tokens: list[ExprTok] = [] + for m in re.finditer(re_expr_tok, expr): + if tok := m.group("op"): + tokens.append(ExprOp(typing.cast(typing.Literal["-", "+", "<<"], tok))) + elif tok := m.group("lit_2"): + tokens.append(ExprLit(int(tok[2:], 2))) + elif tok := m.group("lit_8"): + tokens.append(ExprLit(int(tok[1:], 8))) + elif tok := m.group("lit_10"): + tokens.append(ExprLit(int(tok, 10))) + elif tok := m.group("lit_16"): + tokens.append(ExprLit(int(tok[2:], 16))) + elif tok := m.group("sym"): + tokens.append(ExprSym(tok)) + elif tok := m.group("off"): + tokens.append(ExprOff(tok[1:])) + elif tok := m.group("num"): + [numname, valname] = tok.split(".", 1) + tokens.append(ExprNum(numname, valname)) + else: + assert False + return Expr(env, tokens) -def parse_numspec(ver: str, n: Number, spec: str) -> None: +# numspec ############################## + +re_numspec = f"(?P<name>{re_symname})\\s*=\\s*(?P<val>{re_expr})" + + +def parse_numspec(env: dict[str, Type], ver: str, n: Number, spec: str) -> None: spec = spec.strip() if m := re.fullmatch(re_numspec, spec): name = m.group("name") - val = m.group("val") if name in n.vals: raise ValueError(f"{n.typname}: name {name!r} already assigned") + val = parse_expr(env, m.group("val")) + if val is None: + raise ValueError( + f"{n.typname}: {name!r} value is not constant: {m.group('val')!r}" + ) n.vals[name] = val else: raise SyntaxError(f"invalid num spec {spec!r}") -def parse_bitspec(ver: str, bf: Bitfield, spec: str) -> None: +# bitspec ############################## + +re_bitspec_bit = ( + "bit\\s+(?P<bitnum>[0-9]+)\\s*=\\s*(?:" + + "|".join( + [ + f"(?P<name_used>{re_symname_u})", + f"reserved\\((?P<name_reserved>{re_symname_u})\\)", + f"num\\((?P<name_num>{re_symname_u})\\)", + ] + ) + + ")" +) +re_bitspec_mask = f"mask\\s+(?P<name>{re_symname_u})\\s*=\\s*(?P<val>{re_expr})" +re_bitspec_alias = f"alias\\s+(?P<name>{re_symname_u})\\s*=\\s*(?P<val>{re_expr})" +re_bitspec_num = f"num\\((?P<num>{re_symname_u})\\)\\s+(?P<name>{re_symname_u})\\s*=\\s*(?P<val>{re_expr})" + + +def parse_bitspec(env: dict[str, Type], ver: str, bf: Bitfield, spec: str) -> None: spec = spec.strip() + def check_name(name: str, is_num: bool = False) -> None: + if name == "MASK": + raise ValueError(f"{bf.typname}: bit name may not be {'MASK'!r}: {name!r}") + if name.endswith("_MASK"): + raise ValueError( + f"{bf.typname}: bit name may not end with {'_MASK'!r}: {name!r}" + ) + if name in bf.names and not (is_num and name in bf.nums): + raise ValueError(f"{bf.typname}: bit name already assigned: {name!r}") + if m := re.fullmatch(re_bitspec_bit, spec): bitnum = int(m.group("bitnum")) if bitnum < 0 or bitnum >= len(bf.bits): raise ValueError(f"{bf.typname}: bit num {bitnum} out-of-bounds") bit = bf.bits[bitnum] - if bit.cat != BitCat.UNUSED: + if bit.cat != "UNUSED": raise ValueError(f"{bf.typname}: bit num {bitnum} already assigned") if name := m.group("name_used"): bit.bitname = name - bit.cat = BitCat.USED + bit.cat = "USED" bit.in_versions.add(ver) elif name := m.group("name_reserved"): bit.bitname = name - bit.cat = BitCat.RESERVED + bit.cat = "RESERVED" bit.in_versions.add(ver) - elif name := m.group("name_subfield"): + elif name := m.group("name_num"): bit.bitname = name - bit.cat = BitCat.SUBFIELD + if name not in bf.nums: + bf.nums[name] = BitNum(name) + bf.nums[name].mask |= 1 << bit.num + bit.cat = bf.nums[name] bit.in_versions.add(ver) if bit.bitname: - if bit.bitname in bf.names: - other = bf.names[bit.bitname] - if ( - isinstance(other, Bit) - and other.cat == bit.cat - and bit.cat == BitCat.SUBFIELD - ): - return - raise ValueError( - f"{bf.typname}: bit name {bit.bitname!r} already assigned" - ) - bf.names[bit.bitname] = bit + check_name(name, isinstance(bit.cat, BitNum)) + bf.names.add(bit.bitname) + elif m := re.fullmatch(re_bitspec_mask, spec): + mask = BitAlias(m.group("name"), parse_expr(env, m.group("val"))) + mask.in_versions.add(ver) + check_name(mask.bitname) + bf.masks[mask.bitname] = mask + bf.names.add(mask.bitname) elif m := re.fullmatch(re_bitspec_alias, spec): - alias = BitAlias(m.group("name"), m.group("val")) + alias = BitAlias(m.group("name"), parse_expr(env, m.group("val"))) alias.in_versions.add(ver) - if alias.bitname in bf.names: + check_name(alias.bitname) + bf.aliases[alias.bitname] = alias + bf.names.add(alias.bitname) + elif m := re.fullmatch(re_bitspec_num, spec): + numname = m.group("num") + alias = BitAlias(m.group("name"), parse_expr(env, m.group("val"))) + alias.in_versions.add(ver) + check_name(alias.bitname) + if numname not in bf.nums: + raise NameError( + f"{bf.typname}: nested num not allocated any bits: {numname!r}" + ) + assert alias.val.const is not None + if alias.val.const & ~bf.nums[numname].mask: raise ValueError( - f"{bf.typname}: bit name {alias.bitname!r} already assigned" + f"{bf.typname}: {alias.bitname!r} does not fit within bitmask: val={alias.val.const:b} mask={bf.nums[numname].mask}" ) - bf.names[alias.bitname] = alias + bf.nums[numname].vals[alias.bitname] = alias + bf.names.add(alias.bitname) else: raise SyntaxError(f"invalid bitfield spec {spec!r}") -def parse_expr(expr: str) -> Expr: - assert re.fullmatch(re_expr, expr) - ret = Expr() - for tok in re.split("([-+])", expr): - if tok in ("-", "+"): - # I, for the life of me, do not understand why I need this - # typing.cast() to keep mypy happy. - ret.tokens += [ExprOp(typing.cast(typing.Literal["-", "+"], tok))] - elif re.fullmatch("[0-9]+", tok): - ret.tokens += [ExprLit(int(tok))] - else: - ret.tokens += [ExprSym(tok)] - return ret +# struct members ####################### + + +re_memberspec = f"(?:(?P<cnt>{re_symname}|[1-9][0-9]*)\\*\\()?(?P<name>{re_symname})\\[(?P<typ>{re_memtype})(?:,max=(?P<max>{re_expr})|,val=(?P<val>{re_expr}))*\\]\\)?" def parse_members(ver: str, env: dict[str, Type], struct: Struct, specs: str) -> None: @@ -421,9 +618,9 @@ def parse_members(ver: str, env: dict[str, Type], struct: Struct, specs: str) -> raise ValueError( "',max=' may only be specified on a non-repeated numeric type" ) - member.max = parse_expr(maxstr) + member.max = parse_expr(env, maxstr) else: - member.max = Expr() + member.max = Expr(env) if valstr := m.group("val"): if ( @@ -433,13 +630,16 @@ def parse_members(ver: str, env: dict[str, Type], struct: Struct, specs: str) -> raise ValueError( "',val=' may only be specified on a non-repeated numeric type" ) - member.val = parse_expr(valstr) + member.val = parse_expr(env, valstr) else: - member.val = Expr() + member.val = Expr(env) struct.members += [member] +# main parser ########################## + + def re_string(grpname: str) -> str: return f'"(?P<{grpname}>[^"]*)"' @@ -471,15 +671,6 @@ def parse_file( "8": Primitive.u64, } - def get_type(name: str, tc: type[T]) -> T: - nonlocal env - if name not in env: - raise NameError(f"Unknown type {name!r}") - ret = env[name] - if (not isinstance(ret, tc)) or (ret.__class__.__name__ != tc.__name__): - raise NameError(f"Type {ret.typname!r} is not a {tc.__name__}") - return ret - with open(filename, "r", encoding="utf-8") as fh: prev: Type | None = None for lineno, line in enumerate(fh): @@ -510,12 +701,19 @@ def parse_file( typ.in_versions.add(version) case Bitfield(): typ.in_versions.add(version) - for bit in typ.bits: - if other_version in bit.in_versions: - bit.in_versions.add(version) - for val in typ.names.values(): - if other_version in val.in_versions: - val.in_versions.add(version) + for bf_bit in typ.bits: + if other_version in bf_bit.in_versions: + bf_bit.in_versions.add(version) + for bf_num in typ.nums.values(): + for bf_val in bf_num.vals.values(): + if other_version in bf_val.in_versions: + bf_val.in_versions.add(version) + for bf_mask in typ.masks.values(): + if other_version in bf_mask.in_versions: + bf_mask.in_versions.add(version) + for bf_alias in typ.aliases.values(): + if other_version in bf_alias.in_versions: + bf_alias.in_versions.add(version) case Struct(): # and Message() typ.in_versions.add(version) for member in typ.members: @@ -555,8 +753,8 @@ def parse_file( env[bf.typname] = bf prev = bf elif m := re.fullmatch(re_line_bitfield_, line): - bf = get_type(m.group("name"), Bitfield) - parse_bitspec(version, bf, m.group("member")) + bf = get_type(env, m.group("name"), Bitfield) + parse_bitspec(env, version, bf, m.group("member")) prev = bf elif m := re.fullmatch(re_line_struct, line): @@ -575,7 +773,7 @@ def parse_file( env[struct.typname] = struct prev = struct case "+=": - struct = get_type(m.group("name"), Struct) + struct = get_type(env, m.group("name"), Struct) parse_members(version, env, struct, m.group("members")) prev = struct @@ -593,16 +791,16 @@ def parse_file( env[msg.typname] = msg prev = msg case "+=": - msg = get_type(m.group("name"), Message) + msg = get_type(env, m.group("name"), Message) parse_members(version, env, msg, m.group("members")) prev = msg elif m := re.fullmatch(re_line_cont, line): match prev: case Bitfield(): - parse_bitspec(version, prev, m.group("specs")) + parse_bitspec(env, version, prev, m.group("specs")) case Number(): - parse_numspec(version, prev, m.group("specs")) + parse_numspec(env, version, prev, m.group("specs")) case Struct(): # and Message() parse_members(version, env, prev, m.group("specs")) case _: @@ -623,12 +821,6 @@ def parse_file( typs: list[UserType] = [x for x in env.values() if not isinstance(x, Primitive)] for typ in [typ for typ in typs if isinstance(typ, Struct)]: - valid_syms = [ - "end", - "s32_max", - "s64_max", - *["&" + m.membname for m in typ.members], - ] for member in typ.members: if ( not isinstance(member.typ, Primitive) @@ -638,9 +830,11 @@ def parse_file( f"{typ.typname}.{member.membname}: type {member.typ.typname} does not exist in {member.in_versions.difference(member.typ.in_versions)}" ) for tok in [*member.max.tokens, *member.val.tokens]: - if isinstance(tok, ExprSym) and tok.symname not in valid_syms: - raise ValueError( - f"{typ.typname}.{member.membname}: invalid sym: {tok.symname}" + if isinstance(tok, ExprOff) and not any( + m.membname == tok.membname for m in typ.members + ): + raise NameError( + f"{typ.typname}.{member.membname}: invalid offset: &{tok.membname}" ) return version, typs |