diff options
Diffstat (limited to 'build-aux/measurestack/vcg.py')
-rw-r--r-- | build-aux/measurestack/vcg.py | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/build-aux/measurestack/vcg.py b/build-aux/measurestack/vcg.py new file mode 100644 index 0000000..39755e9 --- /dev/null +++ b/build-aux/measurestack/vcg.py @@ -0,0 +1,97 @@ +# build-aux/measurestack/vcg.py - Parse the "VCG" language +# +# Copyright (C) 2024-2025 Luke T. Shumaker <lukeshu@lukeshu.com> +# SPDX-License-Identifier: AGPL-3.0-or-later + +import re +import typing + +# pylint: disable=unused-variable +__all__ = [ + "VCGElem", + "parse_vcg", +] + +# Parse the "VCG" language +# +# https://www.rw.cdl.uni-saarland.de/people/sander/private/html/gsvcg1.html +# +# The formal syntax is found at +# ftp://ftp.cs.uni-sb.de/pub/graphics/vcg/vcg.tgz `doc/grammar.txt`. + + +class VCGElem: + typ: str + lineno: int + attrs: dict[str, str] + + +re_beg = re.compile(r"(edge|node):\s*\{\s*") +_re_tok = r"[a-zA-Z_][a-zA-Z0-9_]*" +_re_str = r'"(?:[^\"]|\\.)*"' +re_attr = re.compile("(" + _re_tok + r")\s*:\s*(" + _re_tok + "|" + _re_str + r")\s*") +re_end = re.compile(r"\}\s*$") +re_skip = re.compile(r"(graph:\s*\{\s*title\s*:\s*" + _re_str + r"\s*|\})\s*") +re_esc = re.compile(r"\\.") + + +def parse_vcg(reader: typing.TextIO) -> typing.Iterator[VCGElem]: + + for lineno, line in enumerate(reader): + pos = 0 + + def _raise(msg: str) -> typing.NoReturn: + nonlocal lineno + nonlocal line + nonlocal pos + e = SyntaxError(msg) + e.lineno = lineno + e.offset = pos + e.text = line + raise e + + if re_skip.fullmatch(line): + continue + + elem = VCGElem() + elem.lineno = lineno + + m = re_beg.match(line, pos=pos) + if not m: + _raise("does not look like a VCG line") + elem.typ = m.group(1) + pos = m.end() + + elem.attrs = {} + while True: + if re_end.match(line, pos=pos): + break + m = re_attr.match(line, pos=pos) + if not m: + _raise("unexpected character") + k = m.group(1) + v = m.group(2) + if k in elem.attrs: + _raise(f"duplicate key: {k!r}") + if v.startswith('"'): + + def unesc(esc: re.Match[str]) -> str: + match esc.group(0)[1:]: + case "n": + return "\n" + case '"': + return '"' + case "\\": + return "\\" + case _: + _raise(f"invalid escape code {esc.group(0)!r}") + + v = re_esc.sub(unesc, v[1:-1]) + elem.attrs[k] = v + pos = m.end() + + del _raise + del pos + del line + del lineno + yield elem |