# build-aux/kicad_sexpr.py - A KiCad-compatible S-expression unmarshaler/marshaler # # Copyright (C) 2025 Luke T. Shumaker # SPDX-License-Identifier: AGPL-3.0-or-later # At first I wanted to use # https://gitlab.com/kicad/libraries/kicad-library-utils/-/blob/master/common/sexpr.py # but # 1. It doesn't differentiate between symbols and strings. # 2. Its license is unclear; Rosetta Code is GFDL-1.3-only, while # kicad-library-utils is GPL-3.0-or-later. # 3. It's just over-complicated for what it is. # # Instead, this parser is based on: # - https://gitlab.com/kicad/code/kicad/-/blob/master/libs/sexpr/sexpr_parser.cpp (the main syntax) # - https://gitlab.com/kicad/code/kicad/-/blob/master/common/dsnlexer.cpp (string escapes) # # See-also: # - https://dev-docs.kicad.org/en/components/sexpr/index.html import re import typing # pylint: disable=unused-variable __all__ = [ "Symbol", "Atom", "List", "Expr", "marshal", "unmarshal", ] class Symbol: name: str __match_args__ = ("name",) def __init__(self, name: str) -> None: self.name = name def __repr__(self) -> str: return f"Symbol({self.name!r})" def __eq__(self, other: typing.Any) -> bool: return isinstance(other, Symbol) and other.name == self.name type Atom = Symbol | str | int | float type List = list["Expr"] type Expr = Atom | List class _StrIterator: val: str filename: str row: int col: int off: int row_off: int def __init__(self, s: str, filename: str = "") -> None: self.val = s self.filename = filename self.row = 1 self.col = 0 self.off = 0 self.row_off = 0 def advance(self, n: int = 1) -> None: for _ in range(n): if self.val[self.off] == "\n": self.row += 1 self.col = 0 self.row_off = self.off + 1 else: self.col += 1 self.off += 1 def char(self) -> str | None: if self.off == len(self.val): return None return self.val[self.off] def syntax_error(self, msg: str, length: int = -1) -> SyntaxError: e = SyntaxError(msg) e.text = self.val[self.row_off :] if length > 0: e.text = e.text[:length] else: e.text = e.text.split("\n", 1)[0] e.filename = self.filename e.lineno = self.row e.end_lineno = e.lineno + e.text.count("\n") e.offset = self.col if length <= 0: e.end_offset = -1 else: e.end_offset = e.offset + len(e.text) return e _whitespace_characters = " \t\n\r\b\f\v" _re_atom_end = re.compile("[" + _whitespace_characters + "()]|$") _re_int = re.compile(r"^-?[0-9]+$") _re_float = re.compile(r"^-?[0-9]*\.[0-9]*$") def _unmarshal(iterator: _StrIterator) -> Expr | None: while True: char: str | None match char := iterator.char(): case None: # EOF return None case "(": iterator.advance() l: List = [] while True: item = _unmarshal(iterator) if item is None: break l.append(item) if iterator.char() != ")": raise iterator.syntax_error("Expected close-paren") iterator.advance() return l case ")": return None case '"': iterator.advance() s: str = "" while True: match char := iterator.char(): case None: raise iterator.syntax_error("Expected close-quote") case "\\": iterator.advance() match char := iterator.char(): case '"' | "\\": s += char iterator.advance() case "a": s += "\x07" iterator.advance() case "b": s += "\x08" iterator.advance() case "f": s += "\x0c" iterator.advance() case "n": s += "\n" iterator.advance() case "r": s += "\r" iterator.advance() case "t": s += "\x09" iterator.advance() case "t": s += "\x0b" iterator.advance() case "x": tbuf = "" iterator.advance() for _ in range(2): if ( char := iterator.char() ) and char in "0123456789abcdefABCDEF": tbuf += char iterator.advance() if tbuf: s += chr(int(tbuf, 16)) else: s += "x" case _: tbuf = "" for _ in range(3): if ( char := iterator.char() ) and char in "01234567": tbuf += char iterator.advance() if tbuf: s += chr(int(tbuf, 8)) else: s += "\\" case '"': iterator.advance() return s case _: iterator.advance() s += char case _: if char in _whitespace_characters: iterator.advance() else: end = _re_atom_end.search(iterator.val, iterator.off) assert end tok = iterator.val[iterator.off : end.start()] iterator.advance(len(tok)) if _re_int.fullmatch(tok): return int(tok, 10) if _re_float.fullmatch(tok): return float(tok) return Symbol(tok) def unmarshal(s: str, filename: str = "") -> Expr: iterator = _StrIterator(s, filename) o = _unmarshal(iterator) if o is None: raise iterator.syntax_error("Empty S-Expression") char: str | None while (char := iterator.char()) and char in _whitespace_characters: iterator.advance() if iterator.char(): raise iterator.syntax_error( "Extra text after S-Expression", len(iterator.val) - iterator.off ) return o def _marshal(o: Expr, indent: str) -> str: match o: case list(): ret = "(" seen_list = False for child in o: sep = " " if isinstance(child, list): seen_list = True if seen_list: sep = "\n" + indent + "\t" if len(o) > 0 and o[0] == Symbol("pts") and (ret.count("(") - 1) % 6: sep = " " if ret == "(": sep = "" ret += sep + _marshal(child, indent + "\t") if o and isinstance(o[-1], list): ret += "\n" + indent ret += ")" return ret case Symbol(): return o.name case str(): ret = '"' for char in o: match char: case "\r": ret += "\\r" case "\n": ret += "\\n" case "\\": ret += "\\\\" case '"': ret += '\\"' case _: ret += char ret += '"' return ret case int(): return str(o) case float(): return str(o) case _: assert False def marshal(o: Expr) -> str: return _marshal(o, "")