summaryrefslogtreecommitdiff
path: root/build-aux/measurestack/vcg.py
blob: 39755e9b6f38b7bf099c0631e33119891c831c54 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# build-aux/measurestack/vcg.py - Parse the "VCG" language
#
# Copyright (C) 2024-2025  Luke T. Shumaker <lukeshu@lukeshu.com>
# SPDX-License-Identifier: AGPL-3.0-or-later

import re
import typing

# pylint: disable=unused-variable
__all__ = [
    "VCGElem",
    "parse_vcg",
]

# Parse the "VCG" language
#
# https://www.rw.cdl.uni-saarland.de/people/sander/private/html/gsvcg1.html
#
# The formal syntax is found at
# ftp://ftp.cs.uni-sb.de/pub/graphics/vcg/vcg.tgz `doc/grammar.txt`.


class VCGElem:
    typ: str
    lineno: int
    attrs: dict[str, str]


re_beg = re.compile(r"(edge|node):\s*\{\s*")
_re_tok = r"[a-zA-Z_][a-zA-Z0-9_]*"
_re_str = r'"(?:[^\"]|\\.)*"'
re_attr = re.compile("(" + _re_tok + r")\s*:\s*(" + _re_tok + "|" + _re_str + r")\s*")
re_end = re.compile(r"\}\s*$")
re_skip = re.compile(r"(graph:\s*\{\s*title\s*:\s*" + _re_str + r"\s*|\})\s*")
re_esc = re.compile(r"\\.")


def parse_vcg(reader: typing.TextIO) -> typing.Iterator[VCGElem]:

    for lineno, line in enumerate(reader):
        pos = 0

        def _raise(msg: str) -> typing.NoReturn:
            nonlocal lineno
            nonlocal line
            nonlocal pos
            e = SyntaxError(msg)
            e.lineno = lineno
            e.offset = pos
            e.text = line
            raise e

        if re_skip.fullmatch(line):
            continue

        elem = VCGElem()
        elem.lineno = lineno

        m = re_beg.match(line, pos=pos)
        if not m:
            _raise("does not look like a VCG line")
        elem.typ = m.group(1)
        pos = m.end()

        elem.attrs = {}
        while True:
            if re_end.match(line, pos=pos):
                break
            m = re_attr.match(line, pos=pos)
            if not m:
                _raise("unexpected character")
            k = m.group(1)
            v = m.group(2)
            if k in elem.attrs:
                _raise(f"duplicate key: {k!r}")
            if v.startswith('"'):

                def unesc(esc: re.Match[str]) -> str:
                    match esc.group(0)[1:]:
                        case "n":
                            return "\n"
                        case '"':
                            return '"'
                        case "\\":
                            return "\\"
                        case _:
                            _raise(f"invalid escape code {esc.group(0)!r}")

                v = re_esc.sub(unesc, v[1:-1])
            elem.attrs[k] = v
            pos = m.end()

        del _raise
        del pos
        del line
        del lineno
        yield elem