From ee5abed3cda095115d5afb72c860819d9369fc45 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Mon, 31 Mar 2025 04:22:52 -0600 Subject: measurestack: Split into several files --- build-aux/measurestack/vcg.py | 97 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 build-aux/measurestack/vcg.py (limited to 'build-aux/measurestack/vcg.py') diff --git a/build-aux/measurestack/vcg.py b/build-aux/measurestack/vcg.py new file mode 100644 index 0000000..ca20b34 --- /dev/null +++ b/build-aux/measurestack/vcg.py @@ -0,0 +1,97 @@ +# build-aux/measurestack/vcg.py - Parse the "VCG" language +# +# Copyright (C) 2024-2025 Luke T. Shumaker +# SPDX-License-Identifier: AGPL-3.0-or-later + +import re +import typing + +# pylint: disable=unused-variable +__all__ = [ + "VCGElem", + "parse_vcg", +] + +# Parse the "VCG" language +# +# https://www.rw.cdl.uni-saarland.de/people/sander/private/html/gsvcg1.html +# +# The formal syntax is found at +# ftp://ftp.cs.uni-sb.de/pub/graphics/vcg/vcg.tgz `doc/grammar.txt`. + + +class VCGElem: + typ: str + lineno: int + attrs: dict[str, str] + + +def parse_vcg(reader: typing.TextIO) -> typing.Iterator[VCGElem]: + re_beg = re.compile(r"(edge|node):\s*\{\s*") + _re_tok = r"[a-zA-Z_][a-zA-Z0-9_]*" + _re_str = r'"(?:[^\"]|\\.)*"' + re_attr = re.compile( + "(" + _re_tok + r")\s*:\s*(" + _re_tok + "|" + _re_str + r")\s*" + ) + re_end = re.compile(r"\}\s*$") + re_skip = re.compile(r"(graph:\s*\{\s*title\s*:\s*" + _re_str + r"\s*|\})\s*") + re_esc = re.compile(r"\\.") + + for lineno, line in enumerate(reader): + pos = 0 + + def _raise(msg: str) -> typing.NoReturn: + nonlocal lineno + nonlocal line + nonlocal pos + e = SyntaxError(msg) + e.lineno = lineno + e.offset = pos + e.text = line + raise e + + if re_skip.fullmatch(line): + continue + + elem = VCGElem() + elem.lineno = lineno + + m = re_beg.match(line, pos=pos) + if not m: + _raise("does not look like a VCG line") + elem.typ = m.group(1) + pos = m.end() + + elem.attrs = {} + while True: + if re_end.match(line, pos=pos): + break + m = re_attr.match(line, pos=pos) + if not m: + _raise("unexpected character") + k = m.group(1) + v = m.group(2) + if k in elem.attrs: + _raise(f"duplicate key: {k!r}") + if v.startswith('"'): + + def unesc(esc: re.Match[str]) -> str: + match esc.group(0)[1:]: + case "n": + return "\n" + case '"': + return '"' + case "\\": + return "\\" + case _: + _raise(f"invalid escape code {esc.group(0)!r}") + + v = re_esc.sub(unesc, v[1:-1]) + elem.attrs[k] = v + pos = m.end() + + del _raise + del pos + del line + del lineno + yield elem -- cgit v1.2.3-2-g168b From 2ced5e02eacfc6b9e67435fe3a24dcb6c3a29037 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Mon, 31 Mar 2025 15:58:51 -0600 Subject: measurestack: Compile all regexes upfront --- build-aux/measurestack/vcg.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'build-aux/measurestack/vcg.py') diff --git a/build-aux/measurestack/vcg.py b/build-aux/measurestack/vcg.py index ca20b34..39755e9 100644 --- a/build-aux/measurestack/vcg.py +++ b/build-aux/measurestack/vcg.py @@ -26,16 +26,16 @@ class VCGElem: attrs: dict[str, str] +re_beg = re.compile(r"(edge|node):\s*\{\s*") +_re_tok = r"[a-zA-Z_][a-zA-Z0-9_]*" +_re_str = r'"(?:[^\"]|\\.)*"' +re_attr = re.compile("(" + _re_tok + r")\s*:\s*(" + _re_tok + "|" + _re_str + r")\s*") +re_end = re.compile(r"\}\s*$") +re_skip = re.compile(r"(graph:\s*\{\s*title\s*:\s*" + _re_str + r"\s*|\})\s*") +re_esc = re.compile(r"\\.") + + def parse_vcg(reader: typing.TextIO) -> typing.Iterator[VCGElem]: - re_beg = re.compile(r"(edge|node):\s*\{\s*") - _re_tok = r"[a-zA-Z_][a-zA-Z0-9_]*" - _re_str = r'"(?:[^\"]|\\.)*"' - re_attr = re.compile( - "(" + _re_tok + r")\s*:\s*(" + _re_tok + "|" + _re_str + r")\s*" - ) - re_end = re.compile(r"\}\s*$") - re_skip = re.compile(r"(graph:\s*\{\s*title\s*:\s*" + _re_str + r"\s*|\})\s*") - re_esc = re.compile(r"\\.") for lineno, line in enumerate(reader): pos = 0 -- cgit v1.2.3-2-g168b