diff options
Diffstat (limited to 'lib9p/idl/__init__.py')
-rw-r--r-- | lib9p/idl/__init__.py | 522 |
1 files changed, 366 insertions, 156 deletions
diff --git a/lib9p/idl/__init__.py b/lib9p/idl/__init__.py index e7b3670..2d09217 100644 --- a/lib9p/idl/__init__.py +++ b/lib9p/idl/__init__.py @@ -15,14 +15,27 @@ __all__ = [ # types "Type", "Primitive", + *["Expr", "ExprTok", "ExprOp", "ExprLit", "ExprSym", "ExprOff", "ExprNum"], "Number", - *["Bitfield", "Bit", "BitCat", "BitAlias"], - *["Struct", "StructMember", "Expr", "ExprOp", "ExprSym", "ExprLit"], + *["Bitfield", "Bit", "BitCat", "BitNum", "BitAlias"], + *["Struct", "StructMember"], "Message", ] # The syntax that this parses is described in `./0000-README.md`. +# Utilities #################################################################### + + +def get_type(env: dict[str, "Type"], name: str, tc: type["T"]) -> "T": + if name not in env: + raise NameError(f"Unknown type {name!r}") + ret = env[name] + if (not isinstance(ret, tc)) or (ret.__class__.__name__ != tc.__name__): + raise NameError(f"Type {ret.typname!r} is not a {tc.__name__}") + return ret + + # Types ######################################################################## @@ -51,13 +64,129 @@ class Primitive(enum.Enum): return self.value +class ExprOp: + op: typing.Literal["-", "+", "<<"] + + def __init__(self, op: typing.Literal["-", "+", "<<"]) -> None: + self.op = op + + +class ExprLit: + val: int + + def __init__(self, val: int) -> None: + self.val = val + + +class ExprSym: + symname: str + + def __init__(self, name: str) -> None: + self.symname = name + + +class ExprOff: + membname: str + + def __init__(self, name: str) -> None: + self.membname = name + + +class ExprNum: + numname: str + valname: str + + def __init__(self, numname: str, valname: str) -> None: + self.numname = numname + self.valname = valname + + +type ExprTok = ExprOp | ExprLit | ExprSym | ExprOff | ExprNum + + +class Expr: + tokens: typing.Sequence[ExprTok] + const: int | None + + def __init__( + self, env: dict[str, "Type"], tokens: typing.Sequence[ExprTok] = () + ) -> None: + self.tokens = tokens + self.const = self._const(env, tokens) + + def _const( + self, env: dict[str, "Type"], toks: typing.Sequence[ExprTok] + ) -> int | None: + if not toks: + return None + + def read_val() -> int | None: + nonlocal toks + assert toks + neg = False + match toks[0]: + case ExprOp(op="-"): + neg = True + toks = toks[1:] + assert not isinstance(toks[0], ExprOp) + val: int + match toks[0]: + case ExprLit(): + val = toks[0].val + case ExprSym(): + if m := re.fullmatch(r"^u(8|16|32|64)_max$", toks[0].symname): + n = int(m.group(1)) + val = (1 << n) - 1 + elif m := re.fullmatch(r"^s(8|16|32|64)_max$", toks[0].symname): + n = int(m.group(1)) + val = (1 << (n - 1)) - 1 + else: + return None + case ExprOff(): + return None + case ExprNum(): + num = get_type(env, toks[0].numname, Number) + if toks[0].valname not in num.vals: + raise NameError( + f"Type {toks[0].numname!r} does not have a value {toks[0].valname!r}" + ) + _val = num.vals[toks[0].valname].const + if _val is None: + return None + val = _val + toks = toks[1:] + return -val if neg else val + + ret = read_val() + if ret is None: + return None + while toks: + assert isinstance(toks[0], ExprOp) + op = toks[0].op + toks = toks[1:] + operand = read_val() + if operand is None: + return None + match op: + case "+": + ret = ret + operand + case "-": + ret = ret - operand + case "<<": + ret = ret << operand + return ret + + def __bool__(self) -> bool: + return len(self.tokens) > 0 + + class Number: typname: str in_versions: set[str] prim: Primitive - vals: dict[str, str] + vals: dict[str, Expr] def __init__(self) -> None: self.in_versions = set() @@ -74,11 +203,31 @@ class Number: return self.static_size -class BitCat(enum.Enum): - UNUSED = 1 - USED = 2 - RESERVED = 3 - SUBFIELD = 4 +class BitAlias: + bitname: str + in_versions: set[str] + val: Expr + + def __init__(self, name: str, val: Expr) -> None: + if val.const is None: + raise ValueError(f"{name!r} value is not constant") + self.bitname = name + self.in_versions = set() + self.val = val + + +class BitNum: + numname: str + mask: int + vals: dict[str, BitAlias] + + def __init__(self, name: str) -> None: + self.numname = name + self.mask = 0 + self.vals = {} + + +type BitCat = typing.Literal["UNUSED", "USED", "RESERVED"] | BitNum class Bit: @@ -91,33 +240,32 @@ class Bit: self.bitname = "" self.in_versions = set() self.num = num - self.cat = BitCat.UNUSED - - -class BitAlias: - bitname: str - in_versions: set[str] - val: str # FIXME: Don't have bitfield aliases be raw C expressions - - def __init__(self, name: str, val: str) -> None: - self.bitname = name - self.in_versions = set() - self.val = val + self.cat = "UNUSED" class Bitfield: typname: str in_versions: set[str] prim: Primitive + bits: list[Bit] - names: dict[str, Bit | BitAlias] + nums: dict[str, BitNum] + masks: dict[str, BitAlias] + aliases: dict[str, BitAlias] + + names: set[str] def __init__(self, name: str, prim: Primitive) -> None: self.typname = name self.in_versions = set() self.prim = prim + self.bits = [Bit(i) for i in range(prim.static_size * 8)] - self.names = {} + self.nums = {} + self.masks = {} + self.aliases = {} + + self.names = set() @property def static_size(self) -> int: @@ -130,40 +278,9 @@ class Bitfield: return self.static_size -class ExprLit: - val: int - - def __init__(self, val: int) -> None: - self.val = val - - -class ExprSym: - symname: str - - def __init__(self, name: str) -> None: - self.symname = name - - -class ExprOp: - op: typing.Literal["-", "+"] - - def __init__(self, op: typing.Literal["-", "+"]) -> None: - self.op = op - - -class Expr: - tokens: list[ExprLit | ExprSym | ExprOp] - - def __init__(self) -> None: - self.tokens = [] - - def __bool__(self) -> bool: - return len(self.tokens) > 0 - - class StructMember: # from left-to-right when parsing - cnt: "StructMember | None" = None + cnt: "StructMember| int | None" = None membname: str typ: "Type" max: Expr @@ -174,6 +291,8 @@ class StructMember: @property def min_cnt(self) -> int: assert self.cnt + if isinstance(self.cnt, int): + return self.cnt if not isinstance(self.cnt.typ, Primitive): raise ValueError( f"list count must be an integer type: {self.cnt.membname!r}" @@ -185,6 +304,8 @@ class StructMember: @property def max_cnt(self) -> int: assert self.cnt + if isinstance(self.cnt, int): + return self.cnt if not isinstance(self.cnt.typ, Primitive): raise ValueError( f"list count must be an integer type: {self.cnt.membname!r}" @@ -193,21 +314,12 @@ class StructMember: raise ValueError(f"list count may not have ,val=: {self.cnt.membname!r}") if self.cnt.max: # TODO: be more flexible? - if len(self.cnt.max.tokens) != 1: + val = self.cnt.max.const + if val is None: raise ValueError( - f"list count ,max= may only have 1 token: {self.cnt.membname!r}" + f"list count ,max= must be a constant value: {self.cnt.membname!r}" ) - match tok := self.cnt.max.tokens[0]: - case ExprLit(): - return tok.val - case ExprSym(symname="s32_max"): - return (1 << 31) - 1 - case ExprSym(symname="s64_max"): - return (1 << 63) - 1 - case _: - raise ValueError( - f'list count ,max= only allows literal, "s32_max", and "s64_max" tokens: {self.cnt.membname!r}' - ) + return val return (1 << (self.cnt.typ.value * 8)) - 1 @property @@ -279,6 +391,8 @@ T = typing.TypeVar("T", Number, Bitfield, Struct, Message) # Parse ######################################################################## +# common elements ###################### + re_priname = "(?:1|2|4|8)" # primitive names re_symname = "(?:[a-zA-Z_][a-zA-Z_0-9]*)" # "symbol" names; most *.9p-defined names re_symname_u = "(?:[A-Z_][A-Z_0-9]*)" # upper-case "symbol" names; bit names @@ -288,99 +402,186 @@ re_msgname = r"(?:[TR][a-zA-Z_0-9]*)" # names a message can be re_memtype = f"(?:{re_symname}|{re_priname})" # typenames that a struct member can be -re_expr = f"(?:(?:-|\\+|[0-9]+|&?{re_symname})+)" +valid_syms = [ + "end", + "u8_max", + "u16_max", + "u32_max", + "u64_max", + "s8_max", + "s16_max", + "s32_max", + "s64_max", +] -re_numspec = f"(?P<name>{re_symname})\\s*=\\s*(?P<val>\\S+)" +_re_expr_op = r"(?:-|\+|<<)" -re_bitspec_bit = ( - "(?P<bitnum>[0-9]+)\\s*=\\s*(?:" +_res_expr_val = { + "lit_2": r"0b[01]+", + "lit_8": r"0[0-7]+", + "lit_10": r"0(?![0-9bxX])|[1-9][0-9]*", + "lit_16": r"0[xX][0-9a-fA-F]+", + "sym": "|".join(valid_syms), # pre-defined symbols + "off": f"&{re_symname}", # offset of a field this struct + "num": f"{re_symname}\\.{re_symname}", # `num` values +} + +re_expr_tok = ( + "(?:" + "|".join( [ - f"(?P<name_used>{re_symname_u})", - f"reserved\\((?P<name_reserved>{re_symname_u})\\)", - f"subfield\\((?P<name_subfield>{re_symname_l})\\)", + f"(?P<op>{_re_expr_op})", + *[f"(?P<{k}>{v})" for k, v in _res_expr_val.items()], ] ) + ")" ) -re_bitspec_alias = f"(?P<name>{re_symname_u})\\s*=\\s*(?P<val>\\S+)" -re_memberspec = f"(?:(?P<cnt>{re_symname})\\*\\()?(?P<name>{re_symname})\\[(?P<typ>{re_memtype})(?:,max=(?P<max>{re_expr})|,val=(?P<val>{re_expr}))*\\]\\)?" +_re_expr_val = "(?:" + "|".join(_res_expr_val.values()) + ")" +re_expr = f"(?:\\s*(?:-\\s*)?{_re_expr_val}\\s*(?:{_re_expr_op}\\s*(?:-\\s*)?{_re_expr_val}\\s*)*)" -def parse_numspec(ver: str, n: Number, spec: str) -> None: + +def parse_expr(env: dict[str, Type], expr: str) -> Expr: + assert re.fullmatch(re_expr, expr) + tokens: list[ExprTok] = [] + for m in re.finditer(re_expr_tok, expr): + if tok := m.group("op"): + tokens.append(ExprOp(typing.cast(typing.Literal["-", "+", "<<"], tok))) + elif tok := m.group("lit_2"): + tokens.append(ExprLit(int(tok[2:], 2))) + elif tok := m.group("lit_8"): + tokens.append(ExprLit(int(tok[1:], 8))) + elif tok := m.group("lit_10"): + tokens.append(ExprLit(int(tok, 10))) + elif tok := m.group("lit_16"): + tokens.append(ExprLit(int(tok[2:], 16))) + elif tok := m.group("sym"): + tokens.append(ExprSym(tok)) + elif tok := m.group("off"): + tokens.append(ExprOff(tok[1:])) + elif tok := m.group("num"): + [numname, valname] = tok.split(".", 1) + tokens.append(ExprNum(numname, valname)) + else: + assert False + return Expr(env, tokens) + + +# numspec ############################## + +re_numspec = f"(?P<name>{re_symname})\\s*=\\s*(?P<val>{re_expr})" + + +def parse_numspec(env: dict[str, Type], ver: str, n: Number, spec: str) -> None: spec = spec.strip() if m := re.fullmatch(re_numspec, spec): name = m.group("name") - val = m.group("val") if name in n.vals: raise ValueError(f"{n.typname}: name {name!r} already assigned") + val = parse_expr(env, m.group("val")) + if val is None: + raise ValueError( + f"{n.typname}: {name!r} value is not constant: {m.group('val')!r}" + ) n.vals[name] = val else: raise SyntaxError(f"invalid num spec {spec!r}") -def parse_bitspec(ver: str, bf: Bitfield, spec: str) -> None: +# bitspec ############################## + +re_bitspec_bit = ( + "bit\\s+(?P<bitnum>[0-9]+)\\s*=\\s*(?:" + + "|".join( + [ + f"(?P<name_used>{re_symname_u})", + f"reserved\\((?P<name_reserved>{re_symname_u})\\)", + f"num\\((?P<name_num>{re_symname_u})\\)", + ] + ) + + ")" +) +re_bitspec_mask = f"mask\\s+(?P<name>{re_symname_u})\\s*=\\s*(?P<val>{re_expr})" +re_bitspec_alias = f"alias\\s+(?P<name>{re_symname_u})\\s*=\\s*(?P<val>{re_expr})" +re_bitspec_num = f"num\\((?P<num>{re_symname_u})\\)\\s+(?P<name>{re_symname_u})\\s*=\\s*(?P<val>{re_expr})" + + +def parse_bitspec(env: dict[str, Type], ver: str, bf: Bitfield, spec: str) -> None: spec = spec.strip() + def check_name(name: str, is_num: bool = False) -> None: + if name == "MASK": + raise ValueError(f"{bf.typname}: bit name may not be {'MASK'!r}: {name!r}") + if name.endswith("_MASK"): + raise ValueError( + f"{bf.typname}: bit name may not end with {'_MASK'!r}: {name!r}" + ) + if name in bf.names and not (is_num and name in bf.nums): + raise ValueError(f"{bf.typname}: bit name already assigned: {name!r}") + if m := re.fullmatch(re_bitspec_bit, spec): bitnum = int(m.group("bitnum")) if bitnum < 0 or bitnum >= len(bf.bits): raise ValueError(f"{bf.typname}: bit num {bitnum} out-of-bounds") bit = bf.bits[bitnum] - if bit.cat != BitCat.UNUSED: + if bit.cat != "UNUSED": raise ValueError(f"{bf.typname}: bit num {bitnum} already assigned") if name := m.group("name_used"): bit.bitname = name - bit.cat = BitCat.USED + bit.cat = "USED" bit.in_versions.add(ver) elif name := m.group("name_reserved"): bit.bitname = name - bit.cat = BitCat.RESERVED + bit.cat = "RESERVED" bit.in_versions.add(ver) - elif name := m.group("name_subfield"): + elif name := m.group("name_num"): bit.bitname = name - bit.cat = BitCat.SUBFIELD + if name not in bf.nums: + bf.nums[name] = BitNum(name) + bf.nums[name].mask |= 1 << bit.num + bit.cat = bf.nums[name] bit.in_versions.add(ver) if bit.bitname: - if bit.bitname in bf.names: - other = bf.names[bit.bitname] - if ( - isinstance(other, Bit) - and other.cat == bit.cat - and bit.cat == BitCat.SUBFIELD - ): - return - raise ValueError( - f"{bf.typname}: bit name {bit.bitname!r} already assigned" - ) - bf.names[bit.bitname] = bit + check_name(name, isinstance(bit.cat, BitNum)) + bf.names.add(bit.bitname) + elif m := re.fullmatch(re_bitspec_mask, spec): + mask = BitAlias(m.group("name"), parse_expr(env, m.group("val"))) + mask.in_versions.add(ver) + check_name(mask.bitname) + bf.masks[mask.bitname] = mask + bf.names.add(mask.bitname) elif m := re.fullmatch(re_bitspec_alias, spec): - alias = BitAlias(m.group("name"), m.group("val")) + alias = BitAlias(m.group("name"), parse_expr(env, m.group("val"))) alias.in_versions.add(ver) - if alias.bitname in bf.names: + check_name(alias.bitname) + bf.aliases[alias.bitname] = alias + bf.names.add(alias.bitname) + elif m := re.fullmatch(re_bitspec_num, spec): + numname = m.group("num") + alias = BitAlias(m.group("name"), parse_expr(env, m.group("val"))) + alias.in_versions.add(ver) + check_name(alias.bitname) + if numname not in bf.nums: + raise NameError( + f"{bf.typname}: nested num not allocated any bits: {numname!r}" + ) + assert alias.val.const is not None + if alias.val.const & ~bf.nums[numname].mask: raise ValueError( - f"{bf.typname}: bit name {alias.bitname!r} already assigned" + f"{bf.typname}: {alias.bitname!r} does not fit within bitmask: val={alias.val.const:b} mask={bf.nums[numname].mask}" ) - bf.names[alias.bitname] = alias + bf.nums[numname].vals[alias.bitname] = alias + bf.names.add(alias.bitname) else: raise SyntaxError(f"invalid bitfield spec {spec!r}") -def parse_expr(expr: str) -> Expr: - assert re.fullmatch(re_expr, expr) - ret = Expr() - for tok in re.split("([-+])", expr): - if tok in ("-", "+"): - # I, for the life of me, do not understand why I need this - # typing.cast() to keep mypy happy. - ret.tokens += [ExprOp(typing.cast(typing.Literal["-", "+"], tok))] - elif re.fullmatch("[0-9]+", tok): - ret.tokens += [ExprLit(int(tok))] - else: - ret.tokens += [ExprSym(tok)] - return ret +# struct members ####################### + + +re_memberspec = f"(?:(?P<cnt>{re_symname}|[1-9][0-9]*)\\*\\()?(?P<name>{re_symname})\\[(?P<typ>{re_memtype})(?:,max=(?P<max>{re_expr})|,val=(?P<val>{re_expr}))*\\]\\)?" def parse_members(ver: str, env: dict[str, Type], struct: Struct, specs: str) -> None: @@ -401,29 +602,44 @@ def parse_members(ver: str, env: dict[str, Type], struct: Struct, specs: str) -> member.typ = env[m.group("typ")] if cnt := m.group("cnt"): - if len(struct.members) == 0 or struct.members[-1].membname != cnt: - raise ValueError(f"list count must be previous item: {cnt!r}") - cnt_mem = struct.members[-1] - member.cnt = cnt_mem - _ = member.max_cnt # force validation + if cnt.isnumeric(): + member.cnt = int(cnt) + else: + if len(struct.members) == 0 or struct.members[-1].membname != cnt: + raise ValueError(f"list count must be previous item: {cnt!r}") + member.cnt = struct.members[-1] + _ = member.max_cnt # force validation if maxstr := m.group("max"): - if (not isinstance(member.typ, Primitive)) or member.cnt: - raise ValueError("',max=' may only be specified on a non-repeated atom") - member.max = parse_expr(maxstr) + if ( + not isinstance(member.typ, Primitive) + and not isinstance(member.typ, Number) + ) or member.cnt: + raise ValueError( + "',max=' may only be specified on a non-repeated numeric type" + ) + member.max = parse_expr(env, maxstr) else: - member.max = Expr() + member.max = Expr(env) if valstr := m.group("val"): - if (not isinstance(member.typ, Primitive)) or member.cnt: - raise ValueError("',val=' may only be specified on a non-repeated atom") - member.val = parse_expr(valstr) + if ( + not isinstance(member.typ, Primitive) + and not isinstance(member.typ, Number) + ) or member.cnt: + raise ValueError( + "',val=' may only be specified on a non-repeated numeric type" + ) + member.val = parse_expr(env, valstr) else: - member.val = Expr() + member.val = Expr(env) struct.members += [member] +# main parser ########################## + + def re_string(grpname: str) -> str: return f'"(?P<{grpname}>[^"]*)"' @@ -455,15 +671,6 @@ def parse_file( "8": Primitive.u64, } - def get_type(name: str, tc: type[T]) -> T: - nonlocal env - if name not in env: - raise NameError(f"Unknown type {name!r}") - ret = env[name] - if (not isinstance(ret, tc)) or (ret.__class__.__name__ != tc.__name__): - raise NameError(f"Type {ret.typname!r} is not a {tc.__name__}") - return ret - with open(filename, "r", encoding="utf-8") as fh: prev: Type | None = None for lineno, line in enumerate(fh): @@ -494,12 +701,19 @@ def parse_file( typ.in_versions.add(version) case Bitfield(): typ.in_versions.add(version) - for bit in typ.bits: - if other_version in bit.in_versions: - bit.in_versions.add(version) - for val in typ.names.values(): - if other_version in val.in_versions: - val.in_versions.add(version) + for bf_bit in typ.bits: + if other_version in bf_bit.in_versions: + bf_bit.in_versions.add(version) + for bf_num in typ.nums.values(): + for bf_val in bf_num.vals.values(): + if other_version in bf_val.in_versions: + bf_val.in_versions.add(version) + for bf_mask in typ.masks.values(): + if other_version in bf_mask.in_versions: + bf_mask.in_versions.add(version) + for bf_alias in typ.aliases.values(): + if other_version in bf_alias.in_versions: + bf_alias.in_versions.add(version) case Struct(): # and Message() typ.in_versions.add(version) for member in typ.members: @@ -539,8 +753,8 @@ def parse_file( env[bf.typname] = bf prev = bf elif m := re.fullmatch(re_line_bitfield_, line): - bf = get_type(m.group("name"), Bitfield) - parse_bitspec(version, bf, m.group("member")) + bf = get_type(env, m.group("name"), Bitfield) + parse_bitspec(env, version, bf, m.group("member")) prev = bf elif m := re.fullmatch(re_line_struct, line): @@ -559,7 +773,7 @@ def parse_file( env[struct.typname] = struct prev = struct case "+=": - struct = get_type(m.group("name"), Struct) + struct = get_type(env, m.group("name"), Struct) parse_members(version, env, struct, m.group("members")) prev = struct @@ -577,16 +791,16 @@ def parse_file( env[msg.typname] = msg prev = msg case "+=": - msg = get_type(m.group("name"), Message) + msg = get_type(env, m.group("name"), Message) parse_members(version, env, msg, m.group("members")) prev = msg elif m := re.fullmatch(re_line_cont, line): match prev: case Bitfield(): - parse_bitspec(version, prev, m.group("specs")) + parse_bitspec(env, version, prev, m.group("specs")) case Number(): - parse_numspec(version, prev, m.group("specs")) + parse_numspec(env, version, prev, m.group("specs")) case Struct(): # and Message() parse_members(version, env, prev, m.group("specs")) case _: @@ -607,12 +821,6 @@ def parse_file( typs: list[UserType] = [x for x in env.values() if not isinstance(x, Primitive)] for typ in [typ for typ in typs if isinstance(typ, Struct)]: - valid_syms = [ - "end", - "s32_max", - "s64_max", - *["&" + m.membname for m in typ.members], - ] for member in typ.members: if ( not isinstance(member.typ, Primitive) @@ -622,9 +830,11 @@ def parse_file( f"{typ.typname}.{member.membname}: type {member.typ.typname} does not exist in {member.in_versions.difference(member.typ.in_versions)}" ) for tok in [*member.max.tokens, *member.val.tokens]: - if isinstance(tok, ExprSym) and tok.symname not in valid_syms: - raise ValueError( - f"{typ.typname}.{member.membname}: invalid sym: {tok.symname}" + if isinstance(tok, ExprOff) and not any( + m.membname == tok.membname for m in typ.members + ): + raise NameError( + f"{typ.typname}.{member.membname}: invalid offset: &{tok.membname}" ) return version, typs |