diff options
Diffstat (limited to 'lib9p/idl')
-rw-r--r-- | lib9p/idl/0000-README.md | 63 | ||||
-rw-r--r-- | lib9p/idl/0000-TODO.md | 4 | ||||
-rw-r--r-- | lib9p/idl/1992-9P0.9p.wip | 56 | ||||
-rw-r--r-- | lib9p/idl/1995-9P1.9p.wip | 2 | ||||
-rw-r--r-- | lib9p/idl/1996-Styx.9p.wip | 2 | ||||
-rw-r--r-- | lib9p/idl/2002-9P2000.9p | 83 | ||||
-rw-r--r-- | lib9p/idl/2005-9P2000.u.9p | 14 | ||||
-rw-r--r-- | lib9p/idl/2010-9P2000.L.9p | 186 | ||||
-rw-r--r-- | lib9p/idl/__init__.py | 522 |
9 files changed, 583 insertions, 349 deletions
diff --git a/lib9p/idl/0000-README.md b/lib9p/idl/0000-README.md index e19a1e8..84cf865 100644 --- a/lib9p/idl/0000-README.md +++ b/lib9p/idl/0000-README.md @@ -17,35 +17,62 @@ client->server requests, and R-messages are server->client responses type of a message is represented by a u8 ID; T-messages are even and R-messages are odd. -Messages are made up of the primitives; unsigned little-endian -integers, identified with the following single-character mnemonics: +9P messages are exchanged over a reliable bidirectional in-order octet +stream. Messages are made up of the primitives; unsigned +little-endian integers, identified with the following single-character +mnemonics: - 1 = u8 - 2 = u16le - 4 = u32le - 8 = u16le -Out of these primitives, we can make other numeric types, +Out of these primitives, we can make more complex types: + +## User-defined types + +### Numeric types num NUMNAME = PRIMITIVE_TYPE + "NAME=VAL"... + +Besides just being an alias for a primitive type, a numeric type may +define 0 or more named constants of that type, each wrapped in +"quotes". + +### Bitfields -bitfields, + bitfield BFNAME = PRIMITIVE_TYPE + "bit NBIT=NAME"... + "bit NBIT=reserved(NAME)"... + "bit NBIT=num(NUMNAME)"... + "alias NAME=VAL"... + "mask NAME=VAL"... + "num(NUMNAME) NAME=VAL"... - bitfield BFNAME = PRIMITIVE_TYPE "NBIT=NAME... ALIAS=VAL..." +The same NBIT may not be defined multiple times. The same NAME may +not be defined multiple times. -structures, + - A `reserved(...)` bit indicates that the bit is named but is not + allowed to be used. + - `num(...)` bits embed a numeric/enumerated field within a set of + bits. Once several bits have been allocated to a numeric field + with `bit NBIT=num(NUMNAME)`, constant values for that field may be + declared with `num(NUMNAME) NAME=VAL`. For each numeric field, a + `mask NUMNAME=BITMASK` is automatically declared. + - A `mask` defines a bitmask that selects several bits. + - An `alias` defines a convenience alias for a bit or set of bits. + +### Structures struct STRUCTNAME = "FIELDNAME[FIELDTYPE]..." -and messages (which are a special-case of structures). +Or a special-case for structs that are messages; `msg` has the same +syntax as `struct`, but has restrictions on the STRUCTNAME and the +first 3 fields must all be declared in the same way: msg Tname = "size[4,val=end-&size] typ[1,val=TYP] tag[tag] REST..." -Bitfield bit names may be wrapped in `reserved(...)` or -`subfield(...)`; reserved indicates that the bit is named but is not -allowed to be used, and subfield indicates that the bit is part of a -num/enum that is handled by an alias. - Struct fields that have numeric types (either primitives or `num` types) can add to their type `,val=` and/or `,max=` to specify what the exact value must be and/or what the maximum (inclusive) value is. @@ -59,11 +86,13 @@ can be - `&fieldname` to refer to the offset of a field name in that struct, - the special value `end` to refer to the offset of the end of the struct, - - the special value `s32_max` to refer to the constant value - `(1<<31)-1`, or - - the special value `s64_max` to refer to the constant value - `(1<<63)-1` + - the special value `u{8,16,32,64}_max` to refer to the constant + value `(1<<{n})-1`, or + - the special value `s{8,16,32,64}_max` to refer to the constant value + `(1<<({n}-1))-1`. + +## Parser A parser for this syntax is given in `__init__.py`. However, `__init__.py` places the somewhat arbitrary undocumented restrictions -on fields referenced as the count for a repeated field. +on fields referenced as the count of a repeated field. diff --git a/lib9p/idl/0000-TODO.md b/lib9p/idl/0000-TODO.md index d196ac9..e52902f 100644 --- a/lib9p/idl/0000-TODO.md +++ b/lib9p/idl/0000-TODO.md @@ -1,6 +1,6 @@ <!-- lib9p/idl/0000-TODO.md - Changes I intend to make to idl/__init__.py - and idl.gen + and proto.gen Copyright (C) 2025 Luke T. Shumaker <lukeshu@lukeshu.com> SPDX-License-Identifier: AGPL-3.0-or-later @@ -9,5 +9,3 @@ - Decide how to handle duplicate type names from different versions - Decide how to handle duplicate `enum lib9p_msg_type` names and values -- Clean up the iterate-over-all-msgids-in-a-version code -- Allow for const `.cnt` instead of only having previous-member `.cnt` diff --git a/lib9p/idl/1992-9P0.9p.wip b/lib9p/idl/1992-9P0.9p.wip index 086e8e4..a434ba2 100644 --- a/lib9p/idl/1992-9P0.9p.wip +++ b/lib9p/idl/1992-9P0.9p.wip @@ -37,40 +37,40 @@ struct errstr = "64*(txt[1])" # "O"pen flags (flags to pass to Topen and Tcreate) # Unused bits are *ignored*. bitfield o = 1 - "0=mode_0" # low bit of the 2-bit READ/WRITE/RDWR/EXEC enum - "1=mode_1" # high bit of the 2-bit READ/WRITE/RDWR/EXEC enum - #"2=unused" - #"3=unused" - "4=TRUNC" - #"5=_reserved_CEXEC" # close-on-exec - "6=RCLOSE" # remove-on-close - #"7=unused" + "bit 0=num(MODE)" # low bit of the 2-bit READ/WRITE/RDWR/EXEC enum + "bit 1=num(MODE)" # high bit of the 2-bit READ/WRITE/RDWR/EXEC enum + #"bit 2=unused" + #"bit 3=unused" + "bit 4=TRUNC" + "bit 5=reserved(CEXEC)" # close-on-exec + "bit 6=RCLOSE" # remove-on-close + #"bit 7=unused" - "READ = 0" # make available for this FID: Tread() - "WRITE = 1" # make available for this FID: Twrite() - "RDWR = 2" # make available for this FID: Tread() and Twrite() - "EXEC = 3" # make available for this FID: Tread() + "num(MODE) READ = 0" # make available for this FID: Tread() + "num(MODE) WRITE = 1" # make available for this FID: Twrite() + "num(MODE) RDWR = 2" # make available for this FID: Tread() and Twrite() + "num(MODE) EXEC = 3" # make available for this FID: Tread() - "MODE_MASK = 0b00000011" - "FLAG_MASK = 0b11111100" + "mask FLAG = 0b11111100" -# "C"??? "H"??? - file permissions and attributes +# "CH"annel flags - file permissions and attributes (a "channel" is +# what a file handle is called inside of the Plan 9 kernel). bitfield ch = 4 - "31=DIR" - "30=APPEND" - "29=EXCL" + "bit 31=DIR" + "bit 30=APPEND" + "bit 29=EXCL" #... - "8=OWNER_R" - "7=OWNER_W" - "6=OWNER_X" - "5=GROUP_R" - "4=GROUP_W" - "3=GROUP_X" - "2=OTHER_R" - "1=OTHER_W" - "0=OTHER_X" + "bit 8=OWNER_R" + "bit 7=OWNER_W" + "bit 6=OWNER_X" + "bit 5=GROUP_R" + "bit 4=GROUP_W" + "bit 3=GROUP_X" + "bit 2=OTHER_R" + "bit 1=OTHER_W" + "bit 0=OTHER_X" - "PERM_MASK=0777" # {OWNER,GROUP,OTHER}_{R,W,X} + "mask PERM=0777" # {OWNER,GROUP,OTHER}_{R,W,X} struct stat = "file_name[name]" "file_owner[name]" diff --git a/lib9p/idl/1995-9P1.9p.wip b/lib9p/idl/1995-9P1.9p.wip index 2caf39d..660e24a 100644 --- a/lib9p/idl/1995-9P1.9p.wip +++ b/lib9p/idl/1995-9P1.9p.wip @@ -43,7 +43,7 @@ from ./1992-9P0.9p import tag, fid, qid, name, errstr, o, ch, stat # draft RFC). As I understand it, CHMOUNT indicates that the file is # mounted by the kernel as a 9P transport; that the kernel has a lock # on doing I/O on it, so userspace can't do I/O on it. -bitfield ch += "28=MOUNT" +bitfield ch += "bit 28=_PLAN9_MOUNT" # Authentication uses DES encryption. The client obtains a ticket and # a nonce-key from a separate auth-server; how it does this is beyond diff --git a/lib9p/idl/1996-Styx.9p.wip b/lib9p/idl/1996-Styx.9p.wip index 6ba4509..3cb3774 100644 --- a/lib9p/idl/1996-Styx.9p.wip +++ b/lib9p/idl/1996-Styx.9p.wip @@ -40,7 +40,7 @@ msg Ropen = "typ[1,val=11] tag[tag] fid[fid] qid[qid]" msg Tcreate = "typ[1,val=12] tag[tag] fid[fid] name[name] perm[ch] mode[o]" msg Rcreate = "typ[1,val=13] tag[tag] fid[fid] qid[qid]" # For `offset:max`, see `fs.c` `f_read()` and `f_write()`. -# For `count:max`, see `styx.h:MAXFDATA'. +# For `count:max`, see `styx.h:MAXFDATA`. msg Tread = "typ[1,val=14] tag[tag] fid[fid] offset[8,max=s64_max] count[2,max=8192]" msg Rread = "typ[1,val=15] tag[tag] fid[fid] count[2,max=8192] pad[1] count*(data[1])" msg Twrite = "typ[1,val=16] tag[tag] fid[fid] offset[8,max=s64_max] count[2,max=8192] pad[1] count*(data[1])" diff --git a/lib9p/idl/2002-9P2000.9p b/lib9p/idl/2002-9P2000.9p index 2a4f7ed..2b51612 100644 --- a/lib9p/idl/2002-9P2000.9p +++ b/lib9p/idl/2002-9P2000.9p @@ -22,64 +22,64 @@ version "9P2000" # tag - identify a request/response pair num tag = 2 - "NOTAG = ~0" + "NOTAG = u16_max" # file identifier - like a UNIX file-descriptor num fid = 4 - "NOFID = ~0" + "NOFID = u32_max" # string - u16le `n`, then `n` bytes of UTF-8, without any nul-bytes struct s = "len[2] len*(utf8[1])" # "D"ir-entry "M"ode - file permissions and attributes bitfield dm = 4 - "31=DIR" - "30=APPEND" - "29=EXCL" - # DMMOUNT has been around in Plan 9 forever (CHMOUNT in <4e), + "bit 31=DIR" + "bit 30=APPEND" + "bit 29=EXCL" + # DMMOUNT has been around in Plan 9 since 2e (CHMOUNT in <4e), # but is undocumented, and is explicitly excluded from the # 9P2000 draft RFC. As I understand it, DMMOUNT indicates # that the file is mounted by the kernel as a 9P transport; # that the kernel has a lock on doing I/O on it, so userspace # can't do I/O on it. - "28=reserved(PLAN9_MOUNT)" - "27=AUTH" - "26=TMP" + "bit 28=_PLAN9_MOUNT" + "bit 27=AUTH" + "bit 26=TMP" #... - "8=OWNER_R" - "7=OWNER_W" - "6=OWNER_X" - "5=GROUP_R" - "4=GROUP_W" - "3=GROUP_X" - "2=OTHER_R" - "1=OTHER_W" - "0=OTHER_X" + "bit 8=OWNER_R" + "bit 7=OWNER_W" + "bit 6=OWNER_X" + "bit 5=GROUP_R" + "bit 4=GROUP_W" + "bit 3=GROUP_X" + "bit 2=OTHER_R" + "bit 1=OTHER_W" + "bit 0=OTHER_X" - "PERM_MASK=0777" # {OWNER,GROUP,OTHER}_{R,W,X} + "mask PERM=0777" # {OWNER,GROUP,OTHER}_{R,W,X} # QID Type - see `struct qid` below bitfield qt = 1 - "7=DIR" - "6=APPEND" - "5=EXCL" - "4=reserved(PLAN9_MOUNT)" # See "PLAN9_MOUNT" in "dm" above. - "3=AUTH" + "bit 7=DIR" + "bit 6=APPEND" + "bit 5=EXCL" + "bit 4=_PLAN9_MOUNT" # See "_PLAN9_MOUNT" in "dm" above. + "bit 3=AUTH" # Fun historical fact: QTTMP was a relatively late addition to # Plan 9, in 2003-12. - "2=TMP" - #"1=unused" + "bit 2=TMP" + #"bit 1=unused" # "The name QTFILE, defined to be zero, identifies the value # of the type for a plain file." - "FILE=0" + "alias FILE=0" # uni"Q"ue "ID"entification - "two files on the same server hierarchy # are the same if and only if their qids are the same" # # - "path" is a unique uint64_t that does most of the work in the # above statement about files being the same if their QIDs are the -# same; " If a file is deleted and recreated with the same name in +# same; "If a file is deleted and recreated with the same name in # the same directory, the old and new path components of the qids # should be different" # @@ -107,22 +107,21 @@ struct stat = "stat_size[2,val=end-&kern_type]" # "O"pen flags (flags to pass to Topen and Tcreate) # Unused bits *must* be 0. bitfield o = 1 - "0=subfield(mode)" # low bit of the 2-bit READ/WRITE/RDWR/EXEC enum - "1=subfield(mode)" # high bit of the 2-bit READ/WRITE/RDWR/EXEC enum - #"2=unused" - #"3=unused" - "4=TRUNC" - "5=reserved(CEXEC)" # close-on-exec - "6=RCLOSE" # remove-on-close - #"7=unused" + "bit 0=num(MODE)" # low bit of the 2-bit READ/WRITE/RDWR/EXEC enum + "bit 1=num(MODE)" # high bit of the 2-bit READ/WRITE/RDWR/EXEC enum + #"bit 2=unused" + #"bit 3=unused" + "bit 4=TRUNC" + "bit 5=reserved(CEXEC)" # close-on-exec + "bit 6=RCLOSE" # remove-on-close + #"bit 7=unused" - "READ = 0" # make available for this FID: Tread() - "WRITE = 1" # make available for this FID: Twrite() - "RDWR = 2" # make available for this FID: Tread() and Twrite() - "EXEC = 3" # make available for this FID: Tread() + "num(MODE) READ = 0" # make available for this FID: Tread() + "num(MODE) WRITE = 1" # make available for this FID: Twrite() + "num(MODE) RDWR = 2" # make available for this FID: Tread() and Twrite() + "num(MODE) EXEC = 3" # make available for this FID: Tread() - "MODE_MASK = 0b00000011" - "FLAG_MASK = 0b11111100" + "mask FLAG = 0b11111100" # A 9P2000 session goes: # diff --git a/lib9p/idl/2005-9P2000.u.9p b/lib9p/idl/2005-9P2000.u.9p index fefe3e9..6c2f2dc 100644 --- a/lib9p/idl/2005-9P2000.u.9p +++ b/lib9p/idl/2005-9P2000.u.9p @@ -12,7 +12,7 @@ from ./2002-9P2000.9p import * # numeric user ID num nuid = 4 - "NONUID = ~0" + "NONUID = u32_max" num errno = 4 "NOERROR = 0" @@ -27,10 +27,10 @@ msg Tattach += "n_uid[nuid]" msg Rerror += "errno[errno]" -bitfield dm += "23=DEVICE" - "21=NAMEDPIPE" - "20=SOCKET" - "19=SETUID" - "18=SETGID" +bitfield dm += "bit 23=DEVICE" + "bit 21=PIPE" + "bit 20=SOCKET" + "bit 19=SETUID" + "bit 18=SETGID" -bitfield qt += "1=SYMLINK" +bitfield qt += "bit 1=SYMLINK" diff --git a/lib9p/idl/2010-9P2000.L.9p b/lib9p/idl/2010-9P2000.L.9p index 7ac86a6..d81a15b 100644 --- a/lib9p/idl/2010-9P2000.L.9p +++ b/lib9p/idl/2010-9P2000.L.9p @@ -39,49 +39,48 @@ num super_magic = 4 # protocol.h (and are different than the Linux kernel's values, which # vary by architecture). bitfield lo = 4 - "0=subfield(mode)" # low bit of the 2-bit RDONLY/WRONLY/RDWR/NOACCESS enum - "1=subfield(mode)" # high bit of the 2-bit RDONLY/WRONLY/RDWR/NOACCESS enum - #"2=unused" - #"3=unused" - #"4=unused" - #"5=unused" - "6=CREATE" - "7=EXCL" - "8=NOCTTY" - "9=TRUNC" - "10=APPEND" - "11=NONBLOCK" - "12=DSYNC" - "13=BSD_FASYNC" - "14=DIRECT" - "15=LARGEFILE" - "16=DIRECTORY" - "17=NOFOLLOW" - "18=NOATIME" - "19=CLOEXEC" - "20=SYNC" - - "RDONLY = 0" - "WRONLY = 1" - "RDWR = 2" - "NOACCESS = 3" - - "MODE_MASK = 0b000000000000000000011" - "FLAG_MASK = 0b111111111111111000000" + "bit 0=num(MODE)" # low bit of the 2-bit RDONLY/WRONLY/RDWR/NOACCESS enum + "bit 1=num(MODE)" # high bit of the 2-bit RDONLY/WRONLY/RDWR/NOACCESS enum + #"bit 2=unused" + #"bit 3=unused" + #"bit 4=unused" + #"bit 5=unused" + "bit 6=CREATE" + "bit 7=EXCL" + "bit 8=NOCTTY" + "bit 9=TRUNC" + "bit 10=APPEND" + "bit 11=NONBLOCK" + "bit 12=DSYNC" + "bit 13=BSD_FASYNC" + "bit 14=DIRECT" + "bit 15=LARGEFILE" + "bit 16=DIRECTORY" + "bit 17=NOFOLLOW" + "bit 18=NOATIME" + "bit 19=CLOEXEC" + "bit 20=SYNC" + + "num(MODE) RDONLY = 0" + "num(MODE) WRONLY = 1" + "num(MODE) RDWR = 2" + "num(MODE) NOACCESS = 3" + + "mask FLAG = 0b111111111111111000000" # "D"irentry "T"ype # # These match the Linux kernel's values. num dt = 1 - "UNKNOWN = 0" - "NAMED_PIPE = 1" - "CHAR_DEV = 2" - "DIRECTORY = 4" - "BLOCK_DEV = 6" - "REGULAR = 8" - "SYMLINK = 10" - "SOCKET = 12" - "WHITEOUT = 14" + "UNKNOWN = 0" + "PIPE = 1" + "CHAR_DEV = 2" + "DIRECTORY = 4" + "BLOCK_DEV = 6" # proof it's not a bitfield + "REGULAR = 8" + "SYMLINK = 10" # proof it's not a bitfield + "SOCKET = 12" # proof it's not a bitfield + "_WHITEOUT = 14" # proof it's not a bitfield # Mode # @@ -89,34 +88,33 @@ num dt = 1 # instead of just 16? Who knows? bitfield mode = 4 #... - "15=subfield(fmt)" # bit of the 4-bit FMT_ enum - "14=subfield(fmt)" # bit of the 4-bit FMT_ enum - "13=subfield(fmt)" # bit of the 4-bit FMT_ enum - "12=subfield(fmt)" # bit of the 4-bit FMT_ enum + "bit 15=num(FMT)" # bit of the 4-bit FMT_ enum + "bit 14=num(FMT)" # bit of the 4-bit FMT_ enum + "bit 13=num(FMT)" # bit of the 4-bit FMT_ enum + "bit 12=num(FMT)" # bit of the 4-bit FMT_ enum #... - "11=PERM_SETGROUP" - "10=PERM_SETUSER" - "9=PERM_STICKY" - "8=PERM_OWNER_R" - "7=PERM_OWNER_W" - "6=PERM_OWNER_X" - "5=PERM_GROUP_R" - "4=PERM_GROUP_W" - "3=PERM_GROUP_X" - "2=PERM_OTHER_R" - "1=PERM_OTHER_W" - "0=PERM_OTHER_X" - - "FMT_NAMED_PIPE = LIB9P_DT_NAMED_PIPE<<12" - "FMT_CHAR_DEV = LIB9P_DT_CHAR_DEV<<12" - "FMT_DIRECTORY = LIB9P_DT_DIRECTORY<<12" - "FMT_BLOCK_DEV = LIB9P_DT_BLOCK_DEV<<12" - "FMT_REGULAR = LIB9P_DT_REGULAR<<12" - "FMT_SYMLINK = LIB9P_DT_SYMLINK<<12" - "FMT_SOCKET = LIB9P_DT_SOCKET<<12" - - "PERM_MASK = 0000777" # PERM_* - "FMT_MASK = 0170000" # _fmt_* + "bit 11=PERM_SETGROUP" + "bit 10=PERM_SETUSER" + "bit 9=PERM_STICKY" + "bit 8=PERM_OWNER_R" + "bit 7=PERM_OWNER_W" + "bit 6=PERM_OWNER_X" + "bit 5=PERM_GROUP_R" + "bit 4=PERM_GROUP_W" + "bit 3=PERM_GROUP_X" + "bit 2=PERM_OTHER_R" + "bit 1=PERM_OTHER_W" + "bit 0=PERM_OTHER_X" + + "num(FMT) PIPE = dt.PIPE<<12" + "num(FMT) CHAR_DEV = dt.CHAR_DEV<<12" + "num(FMT) DIRECTORY = dt.DIRECTORY<<12" + "num(FMT) BLOCK_DEV = dt.BLOCK_DEV<<12" + "num(FMT) REGULAR = dt.REGULAR<<12" + "num(FMT) SYMLINK = dt.SYMLINK<<12" + "num(FMT) SOCKET = dt.SOCKET<<12" + + "mask PERM = 07777" # PERM_* # A boolean value that is for some reason 4 bytes wide. num b4 = 4 @@ -125,35 +123,35 @@ num b4 = 4 # all other values are true also bitfield getattr = 8 - "0=MODE" - "1=NLINK" - "2=UID" - "3=GID" - "4=RDEV" - "5=ATIME" - "6=MTIME" - "7=CTIME" - "8=INO" - "9=SIZE" - "10=BLOCKS" - - "11=BTIME" - "12=GEN" - "13=DATA_VERSION" - - "BASIC=0x000007ff" # Mask for fields up to BLOCKS - "ALL =0x00003fff" # Mask for All fields above + "bit 0=MODE" + "bit 1=NLINK" + "bit 2=UID" + "bit 3=GID" + "bit 4=RDEV" + "bit 5=ATIME" + "bit 6=MTIME" + "bit 7=CTIME" + "bit 8=INO" + "bit 9=SIZE" + "bit 10=BLOCKS" + + "bit 11=BTIME" + "bit 12=GEN" + "bit 13=DATA_VERSION" + + "alias BASIC=0x000007ff" # Mask for fields up to BLOCKS + "alias ALL =0x00003fff" # Mask for All fields above bitfield setattr = 4 - "0=MODE" - "1=UID" - "2=GID" - "3=SIZE" - "4=ATIME" - "5=MTIME" - "6=CTIME" - "7=ATIME_SET" - "8=MTIME_SET" + "bit 0=MODE" + "bit 1=UID" + "bit 2=GID" + "bit 3=SIZE" + "bit 4=ATIME" + "bit 5=MTIME" + "bit 6=CTIME" + "bit 7=ATIME_SET" + "bit 8=MTIME_SET" num lock_type = 1 "RDLCK=0" @@ -161,8 +159,8 @@ num lock_type = 1 "UNLCK=2" bitfield lock_flags = 4 - "0=BLOCK" - "1=RECLAIM" + "bit 0=BLOCK" + "bit 1=RECLAIM" num lock_status = 1 "SUCCESS=0" diff --git a/lib9p/idl/__init__.py b/lib9p/idl/__init__.py index e7b3670..2d09217 100644 --- a/lib9p/idl/__init__.py +++ b/lib9p/idl/__init__.py @@ -15,14 +15,27 @@ __all__ = [ # types "Type", "Primitive", + *["Expr", "ExprTok", "ExprOp", "ExprLit", "ExprSym", "ExprOff", "ExprNum"], "Number", - *["Bitfield", "Bit", "BitCat", "BitAlias"], - *["Struct", "StructMember", "Expr", "ExprOp", "ExprSym", "ExprLit"], + *["Bitfield", "Bit", "BitCat", "BitNum", "BitAlias"], + *["Struct", "StructMember"], "Message", ] # The syntax that this parses is described in `./0000-README.md`. +# Utilities #################################################################### + + +def get_type(env: dict[str, "Type"], name: str, tc: type["T"]) -> "T": + if name not in env: + raise NameError(f"Unknown type {name!r}") + ret = env[name] + if (not isinstance(ret, tc)) or (ret.__class__.__name__ != tc.__name__): + raise NameError(f"Type {ret.typname!r} is not a {tc.__name__}") + return ret + + # Types ######################################################################## @@ -51,13 +64,129 @@ class Primitive(enum.Enum): return self.value +class ExprOp: + op: typing.Literal["-", "+", "<<"] + + def __init__(self, op: typing.Literal["-", "+", "<<"]) -> None: + self.op = op + + +class ExprLit: + val: int + + def __init__(self, val: int) -> None: + self.val = val + + +class ExprSym: + symname: str + + def __init__(self, name: str) -> None: + self.symname = name + + +class ExprOff: + membname: str + + def __init__(self, name: str) -> None: + self.membname = name + + +class ExprNum: + numname: str + valname: str + + def __init__(self, numname: str, valname: str) -> None: + self.numname = numname + self.valname = valname + + +type ExprTok = ExprOp | ExprLit | ExprSym | ExprOff | ExprNum + + +class Expr: + tokens: typing.Sequence[ExprTok] + const: int | None + + def __init__( + self, env: dict[str, "Type"], tokens: typing.Sequence[ExprTok] = () + ) -> None: + self.tokens = tokens + self.const = self._const(env, tokens) + + def _const( + self, env: dict[str, "Type"], toks: typing.Sequence[ExprTok] + ) -> int | None: + if not toks: + return None + + def read_val() -> int | None: + nonlocal toks + assert toks + neg = False + match toks[0]: + case ExprOp(op="-"): + neg = True + toks = toks[1:] + assert not isinstance(toks[0], ExprOp) + val: int + match toks[0]: + case ExprLit(): + val = toks[0].val + case ExprSym(): + if m := re.fullmatch(r"^u(8|16|32|64)_max$", toks[0].symname): + n = int(m.group(1)) + val = (1 << n) - 1 + elif m := re.fullmatch(r"^s(8|16|32|64)_max$", toks[0].symname): + n = int(m.group(1)) + val = (1 << (n - 1)) - 1 + else: + return None + case ExprOff(): + return None + case ExprNum(): + num = get_type(env, toks[0].numname, Number) + if toks[0].valname not in num.vals: + raise NameError( + f"Type {toks[0].numname!r} does not have a value {toks[0].valname!r}" + ) + _val = num.vals[toks[0].valname].const + if _val is None: + return None + val = _val + toks = toks[1:] + return -val if neg else val + + ret = read_val() + if ret is None: + return None + while toks: + assert isinstance(toks[0], ExprOp) + op = toks[0].op + toks = toks[1:] + operand = read_val() + if operand is None: + return None + match op: + case "+": + ret = ret + operand + case "-": + ret = ret - operand + case "<<": + ret = ret << operand + return ret + + def __bool__(self) -> bool: + return len(self.tokens) > 0 + + class Number: typname: str in_versions: set[str] prim: Primitive - vals: dict[str, str] + vals: dict[str, Expr] def __init__(self) -> None: self.in_versions = set() @@ -74,11 +203,31 @@ class Number: return self.static_size -class BitCat(enum.Enum): - UNUSED = 1 - USED = 2 - RESERVED = 3 - SUBFIELD = 4 +class BitAlias: + bitname: str + in_versions: set[str] + val: Expr + + def __init__(self, name: str, val: Expr) -> None: + if val.const is None: + raise ValueError(f"{name!r} value is not constant") + self.bitname = name + self.in_versions = set() + self.val = val + + +class BitNum: + numname: str + mask: int + vals: dict[str, BitAlias] + + def __init__(self, name: str) -> None: + self.numname = name + self.mask = 0 + self.vals = {} + + +type BitCat = typing.Literal["UNUSED", "USED", "RESERVED"] | BitNum class Bit: @@ -91,33 +240,32 @@ class Bit: self.bitname = "" self.in_versions = set() self.num = num - self.cat = BitCat.UNUSED - - -class BitAlias: - bitname: str - in_versions: set[str] - val: str # FIXME: Don't have bitfield aliases be raw C expressions - - def __init__(self, name: str, val: str) -> None: - self.bitname = name - self.in_versions = set() - self.val = val + self.cat = "UNUSED" class Bitfield: typname: str in_versions: set[str] prim: Primitive + bits: list[Bit] - names: dict[str, Bit | BitAlias] + nums: dict[str, BitNum] + masks: dict[str, BitAlias] + aliases: dict[str, BitAlias] + + names: set[str] def __init__(self, name: str, prim: Primitive) -> None: self.typname = name self.in_versions = set() self.prim = prim + self.bits = [Bit(i) for i in range(prim.static_size * 8)] - self.names = {} + self.nums = {} + self.masks = {} + self.aliases = {} + + self.names = set() @property def static_size(self) -> int: @@ -130,40 +278,9 @@ class Bitfield: return self.static_size -class ExprLit: - val: int - - def __init__(self, val: int) -> None: - self.val = val - - -class ExprSym: - symname: str - - def __init__(self, name: str) -> None: - self.symname = name - - -class ExprOp: - op: typing.Literal["-", "+"] - - def __init__(self, op: typing.Literal["-", "+"]) -> None: - self.op = op - - -class Expr: - tokens: list[ExprLit | ExprSym | ExprOp] - - def __init__(self) -> None: - self.tokens = [] - - def __bool__(self) -> bool: - return len(self.tokens) > 0 - - class StructMember: # from left-to-right when parsing - cnt: "StructMember | None" = None + cnt: "StructMember| int | None" = None membname: str typ: "Type" max: Expr @@ -174,6 +291,8 @@ class StructMember: @property def min_cnt(self) -> int: assert self.cnt + if isinstance(self.cnt, int): + return self.cnt if not isinstance(self.cnt.typ, Primitive): raise ValueError( f"list count must be an integer type: {self.cnt.membname!r}" @@ -185,6 +304,8 @@ class StructMember: @property def max_cnt(self) -> int: assert self.cnt + if isinstance(self.cnt, int): + return self.cnt if not isinstance(self.cnt.typ, Primitive): raise ValueError( f"list count must be an integer type: {self.cnt.membname!r}" @@ -193,21 +314,12 @@ class StructMember: raise ValueError(f"list count may not have ,val=: {self.cnt.membname!r}") if self.cnt.max: # TODO: be more flexible? - if len(self.cnt.max.tokens) != 1: + val = self.cnt.max.const + if val is None: raise ValueError( - f"list count ,max= may only have 1 token: {self.cnt.membname!r}" + f"list count ,max= must be a constant value: {self.cnt.membname!r}" ) - match tok := self.cnt.max.tokens[0]: - case ExprLit(): - return tok.val - case ExprSym(symname="s32_max"): - return (1 << 31) - 1 - case ExprSym(symname="s64_max"): - return (1 << 63) - 1 - case _: - raise ValueError( - f'list count ,max= only allows literal, "s32_max", and "s64_max" tokens: {self.cnt.membname!r}' - ) + return val return (1 << (self.cnt.typ.value * 8)) - 1 @property @@ -279,6 +391,8 @@ T = typing.TypeVar("T", Number, Bitfield, Struct, Message) # Parse ######################################################################## +# common elements ###################### + re_priname = "(?:1|2|4|8)" # primitive names re_symname = "(?:[a-zA-Z_][a-zA-Z_0-9]*)" # "symbol" names; most *.9p-defined names re_symname_u = "(?:[A-Z_][A-Z_0-9]*)" # upper-case "symbol" names; bit names @@ -288,99 +402,186 @@ re_msgname = r"(?:[TR][a-zA-Z_0-9]*)" # names a message can be re_memtype = f"(?:{re_symname}|{re_priname})" # typenames that a struct member can be -re_expr = f"(?:(?:-|\\+|[0-9]+|&?{re_symname})+)" +valid_syms = [ + "end", + "u8_max", + "u16_max", + "u32_max", + "u64_max", + "s8_max", + "s16_max", + "s32_max", + "s64_max", +] -re_numspec = f"(?P<name>{re_symname})\\s*=\\s*(?P<val>\\S+)" +_re_expr_op = r"(?:-|\+|<<)" -re_bitspec_bit = ( - "(?P<bitnum>[0-9]+)\\s*=\\s*(?:" +_res_expr_val = { + "lit_2": r"0b[01]+", + "lit_8": r"0[0-7]+", + "lit_10": r"0(?![0-9bxX])|[1-9][0-9]*", + "lit_16": r"0[xX][0-9a-fA-F]+", + "sym": "|".join(valid_syms), # pre-defined symbols + "off": f"&{re_symname}", # offset of a field this struct + "num": f"{re_symname}\\.{re_symname}", # `num` values +} + +re_expr_tok = ( + "(?:" + "|".join( [ - f"(?P<name_used>{re_symname_u})", - f"reserved\\((?P<name_reserved>{re_symname_u})\\)", - f"subfield\\((?P<name_subfield>{re_symname_l})\\)", + f"(?P<op>{_re_expr_op})", + *[f"(?P<{k}>{v})" for k, v in _res_expr_val.items()], ] ) + ")" ) -re_bitspec_alias = f"(?P<name>{re_symname_u})\\s*=\\s*(?P<val>\\S+)" -re_memberspec = f"(?:(?P<cnt>{re_symname})\\*\\()?(?P<name>{re_symname})\\[(?P<typ>{re_memtype})(?:,max=(?P<max>{re_expr})|,val=(?P<val>{re_expr}))*\\]\\)?" +_re_expr_val = "(?:" + "|".join(_res_expr_val.values()) + ")" +re_expr = f"(?:\\s*(?:-\\s*)?{_re_expr_val}\\s*(?:{_re_expr_op}\\s*(?:-\\s*)?{_re_expr_val}\\s*)*)" -def parse_numspec(ver: str, n: Number, spec: str) -> None: + +def parse_expr(env: dict[str, Type], expr: str) -> Expr: + assert re.fullmatch(re_expr, expr) + tokens: list[ExprTok] = [] + for m in re.finditer(re_expr_tok, expr): + if tok := m.group("op"): + tokens.append(ExprOp(typing.cast(typing.Literal["-", "+", "<<"], tok))) + elif tok := m.group("lit_2"): + tokens.append(ExprLit(int(tok[2:], 2))) + elif tok := m.group("lit_8"): + tokens.append(ExprLit(int(tok[1:], 8))) + elif tok := m.group("lit_10"): + tokens.append(ExprLit(int(tok, 10))) + elif tok := m.group("lit_16"): + tokens.append(ExprLit(int(tok[2:], 16))) + elif tok := m.group("sym"): + tokens.append(ExprSym(tok)) + elif tok := m.group("off"): + tokens.append(ExprOff(tok[1:])) + elif tok := m.group("num"): + [numname, valname] = tok.split(".", 1) + tokens.append(ExprNum(numname, valname)) + else: + assert False + return Expr(env, tokens) + + +# numspec ############################## + +re_numspec = f"(?P<name>{re_symname})\\s*=\\s*(?P<val>{re_expr})" + + +def parse_numspec(env: dict[str, Type], ver: str, n: Number, spec: str) -> None: spec = spec.strip() if m := re.fullmatch(re_numspec, spec): name = m.group("name") - val = m.group("val") if name in n.vals: raise ValueError(f"{n.typname}: name {name!r} already assigned") + val = parse_expr(env, m.group("val")) + if val is None: + raise ValueError( + f"{n.typname}: {name!r} value is not constant: {m.group('val')!r}" + ) n.vals[name] = val else: raise SyntaxError(f"invalid num spec {spec!r}") -def parse_bitspec(ver: str, bf: Bitfield, spec: str) -> None: +# bitspec ############################## + +re_bitspec_bit = ( + "bit\\s+(?P<bitnum>[0-9]+)\\s*=\\s*(?:" + + "|".join( + [ + f"(?P<name_used>{re_symname_u})", + f"reserved\\((?P<name_reserved>{re_symname_u})\\)", + f"num\\((?P<name_num>{re_symname_u})\\)", + ] + ) + + ")" +) +re_bitspec_mask = f"mask\\s+(?P<name>{re_symname_u})\\s*=\\s*(?P<val>{re_expr})" +re_bitspec_alias = f"alias\\s+(?P<name>{re_symname_u})\\s*=\\s*(?P<val>{re_expr})" +re_bitspec_num = f"num\\((?P<num>{re_symname_u})\\)\\s+(?P<name>{re_symname_u})\\s*=\\s*(?P<val>{re_expr})" + + +def parse_bitspec(env: dict[str, Type], ver: str, bf: Bitfield, spec: str) -> None: spec = spec.strip() + def check_name(name: str, is_num: bool = False) -> None: + if name == "MASK": + raise ValueError(f"{bf.typname}: bit name may not be {'MASK'!r}: {name!r}") + if name.endswith("_MASK"): + raise ValueError( + f"{bf.typname}: bit name may not end with {'_MASK'!r}: {name!r}" + ) + if name in bf.names and not (is_num and name in bf.nums): + raise ValueError(f"{bf.typname}: bit name already assigned: {name!r}") + if m := re.fullmatch(re_bitspec_bit, spec): bitnum = int(m.group("bitnum")) if bitnum < 0 or bitnum >= len(bf.bits): raise ValueError(f"{bf.typname}: bit num {bitnum} out-of-bounds") bit = bf.bits[bitnum] - if bit.cat != BitCat.UNUSED: + if bit.cat != "UNUSED": raise ValueError(f"{bf.typname}: bit num {bitnum} already assigned") if name := m.group("name_used"): bit.bitname = name - bit.cat = BitCat.USED + bit.cat = "USED" bit.in_versions.add(ver) elif name := m.group("name_reserved"): bit.bitname = name - bit.cat = BitCat.RESERVED + bit.cat = "RESERVED" bit.in_versions.add(ver) - elif name := m.group("name_subfield"): + elif name := m.group("name_num"): bit.bitname = name - bit.cat = BitCat.SUBFIELD + if name not in bf.nums: + bf.nums[name] = BitNum(name) + bf.nums[name].mask |= 1 << bit.num + bit.cat = bf.nums[name] bit.in_versions.add(ver) if bit.bitname: - if bit.bitname in bf.names: - other = bf.names[bit.bitname] - if ( - isinstance(other, Bit) - and other.cat == bit.cat - and bit.cat == BitCat.SUBFIELD - ): - return - raise ValueError( - f"{bf.typname}: bit name {bit.bitname!r} already assigned" - ) - bf.names[bit.bitname] = bit + check_name(name, isinstance(bit.cat, BitNum)) + bf.names.add(bit.bitname) + elif m := re.fullmatch(re_bitspec_mask, spec): + mask = BitAlias(m.group("name"), parse_expr(env, m.group("val"))) + mask.in_versions.add(ver) + check_name(mask.bitname) + bf.masks[mask.bitname] = mask + bf.names.add(mask.bitname) elif m := re.fullmatch(re_bitspec_alias, spec): - alias = BitAlias(m.group("name"), m.group("val")) + alias = BitAlias(m.group("name"), parse_expr(env, m.group("val"))) alias.in_versions.add(ver) - if alias.bitname in bf.names: + check_name(alias.bitname) + bf.aliases[alias.bitname] = alias + bf.names.add(alias.bitname) + elif m := re.fullmatch(re_bitspec_num, spec): + numname = m.group("num") + alias = BitAlias(m.group("name"), parse_expr(env, m.group("val"))) + alias.in_versions.add(ver) + check_name(alias.bitname) + if numname not in bf.nums: + raise NameError( + f"{bf.typname}: nested num not allocated any bits: {numname!r}" + ) + assert alias.val.const is not None + if alias.val.const & ~bf.nums[numname].mask: raise ValueError( - f"{bf.typname}: bit name {alias.bitname!r} already assigned" + f"{bf.typname}: {alias.bitname!r} does not fit within bitmask: val={alias.val.const:b} mask={bf.nums[numname].mask}" ) - bf.names[alias.bitname] = alias + bf.nums[numname].vals[alias.bitname] = alias + bf.names.add(alias.bitname) else: raise SyntaxError(f"invalid bitfield spec {spec!r}") -def parse_expr(expr: str) -> Expr: - assert re.fullmatch(re_expr, expr) - ret = Expr() - for tok in re.split("([-+])", expr): - if tok in ("-", "+"): - # I, for the life of me, do not understand why I need this - # typing.cast() to keep mypy happy. - ret.tokens += [ExprOp(typing.cast(typing.Literal["-", "+"], tok))] - elif re.fullmatch("[0-9]+", tok): - ret.tokens += [ExprLit(int(tok))] - else: - ret.tokens += [ExprSym(tok)] - return ret +# struct members ####################### + + +re_memberspec = f"(?:(?P<cnt>{re_symname}|[1-9][0-9]*)\\*\\()?(?P<name>{re_symname})\\[(?P<typ>{re_memtype})(?:,max=(?P<max>{re_expr})|,val=(?P<val>{re_expr}))*\\]\\)?" def parse_members(ver: str, env: dict[str, Type], struct: Struct, specs: str) -> None: @@ -401,29 +602,44 @@ def parse_members(ver: str, env: dict[str, Type], struct: Struct, specs: str) -> member.typ = env[m.group("typ")] if cnt := m.group("cnt"): - if len(struct.members) == 0 or struct.members[-1].membname != cnt: - raise ValueError(f"list count must be previous item: {cnt!r}") - cnt_mem = struct.members[-1] - member.cnt = cnt_mem - _ = member.max_cnt # force validation + if cnt.isnumeric(): + member.cnt = int(cnt) + else: + if len(struct.members) == 0 or struct.members[-1].membname != cnt: + raise ValueError(f"list count must be previous item: {cnt!r}") + member.cnt = struct.members[-1] + _ = member.max_cnt # force validation if maxstr := m.group("max"): - if (not isinstance(member.typ, Primitive)) or member.cnt: - raise ValueError("',max=' may only be specified on a non-repeated atom") - member.max = parse_expr(maxstr) + if ( + not isinstance(member.typ, Primitive) + and not isinstance(member.typ, Number) + ) or member.cnt: + raise ValueError( + "',max=' may only be specified on a non-repeated numeric type" + ) + member.max = parse_expr(env, maxstr) else: - member.max = Expr() + member.max = Expr(env) if valstr := m.group("val"): - if (not isinstance(member.typ, Primitive)) or member.cnt: - raise ValueError("',val=' may only be specified on a non-repeated atom") - member.val = parse_expr(valstr) + if ( + not isinstance(member.typ, Primitive) + and not isinstance(member.typ, Number) + ) or member.cnt: + raise ValueError( + "',val=' may only be specified on a non-repeated numeric type" + ) + member.val = parse_expr(env, valstr) else: - member.val = Expr() + member.val = Expr(env) struct.members += [member] +# main parser ########################## + + def re_string(grpname: str) -> str: return f'"(?P<{grpname}>[^"]*)"' @@ -455,15 +671,6 @@ def parse_file( "8": Primitive.u64, } - def get_type(name: str, tc: type[T]) -> T: - nonlocal env - if name not in env: - raise NameError(f"Unknown type {name!r}") - ret = env[name] - if (not isinstance(ret, tc)) or (ret.__class__.__name__ != tc.__name__): - raise NameError(f"Type {ret.typname!r} is not a {tc.__name__}") - return ret - with open(filename, "r", encoding="utf-8") as fh: prev: Type | None = None for lineno, line in enumerate(fh): @@ -494,12 +701,19 @@ def parse_file( typ.in_versions.add(version) case Bitfield(): typ.in_versions.add(version) - for bit in typ.bits: - if other_version in bit.in_versions: - bit.in_versions.add(version) - for val in typ.names.values(): - if other_version in val.in_versions: - val.in_versions.add(version) + for bf_bit in typ.bits: + if other_version in bf_bit.in_versions: + bf_bit.in_versions.add(version) + for bf_num in typ.nums.values(): + for bf_val in bf_num.vals.values(): + if other_version in bf_val.in_versions: + bf_val.in_versions.add(version) + for bf_mask in typ.masks.values(): + if other_version in bf_mask.in_versions: + bf_mask.in_versions.add(version) + for bf_alias in typ.aliases.values(): + if other_version in bf_alias.in_versions: + bf_alias.in_versions.add(version) case Struct(): # and Message() typ.in_versions.add(version) for member in typ.members: @@ -539,8 +753,8 @@ def parse_file( env[bf.typname] = bf prev = bf elif m := re.fullmatch(re_line_bitfield_, line): - bf = get_type(m.group("name"), Bitfield) - parse_bitspec(version, bf, m.group("member")) + bf = get_type(env, m.group("name"), Bitfield) + parse_bitspec(env, version, bf, m.group("member")) prev = bf elif m := re.fullmatch(re_line_struct, line): @@ -559,7 +773,7 @@ def parse_file( env[struct.typname] = struct prev = struct case "+=": - struct = get_type(m.group("name"), Struct) + struct = get_type(env, m.group("name"), Struct) parse_members(version, env, struct, m.group("members")) prev = struct @@ -577,16 +791,16 @@ def parse_file( env[msg.typname] = msg prev = msg case "+=": - msg = get_type(m.group("name"), Message) + msg = get_type(env, m.group("name"), Message) parse_members(version, env, msg, m.group("members")) prev = msg elif m := re.fullmatch(re_line_cont, line): match prev: case Bitfield(): - parse_bitspec(version, prev, m.group("specs")) + parse_bitspec(env, version, prev, m.group("specs")) case Number(): - parse_numspec(version, prev, m.group("specs")) + parse_numspec(env, version, prev, m.group("specs")) case Struct(): # and Message() parse_members(version, env, prev, m.group("specs")) case _: @@ -607,12 +821,6 @@ def parse_file( typs: list[UserType] = [x for x in env.values() if not isinstance(x, Primitive)] for typ in [typ for typ in typs if isinstance(typ, Struct)]: - valid_syms = [ - "end", - "s32_max", - "s64_max", - *["&" + m.membname for m in typ.members], - ] for member in typ.members: if ( not isinstance(member.typ, Primitive) @@ -622,9 +830,11 @@ def parse_file( f"{typ.typname}.{member.membname}: type {member.typ.typname} does not exist in {member.in_versions.difference(member.typ.in_versions)}" ) for tok in [*member.max.tokens, *member.val.tokens]: - if isinstance(tok, ExprSym) and tok.symname not in valid_syms: - raise ValueError( - f"{typ.typname}.{member.membname}: invalid sym: {tok.symname}" + if isinstance(tok, ExprOff) and not any( + m.membname == tok.membname for m in typ.members + ): + raise NameError( + f"{typ.typname}.{member.membname}: invalid offset: &{tok.membname}" ) return version, typs |