measurestack: Split into several files

author: Luke T. Shumaker <lukeshu@lukeshu.com> 2025-03-31 04:22:52 -0600
committer: Luke T. Shumaker <lukeshu@lukeshu.com> 2025-03-31 04:26:10 -0600
commit: ee5abed3cda095115d5afb72c860819d9369fc45 (patch)
tree: 90e70afee34f5ce1714393d37063b3c17cbf3ad6 /build-aux/measurestack/analyze.py
parent: f5d598b06cd6cfdcfa1f89819a843b2b462e7a1c (diff)
1 files changed, 308 insertions, 0 deletions
diff --git a/build-aux/measurestack/analyze.py b/build-aux/measurestack/analyze.py
new file mode 100644
index 0000000..67edd5d
--- /dev/null
+++ b/build-aux/measurestack/analyze.py
@@ -0,0 +1,308 @@
+# build-aux/measurestack/analyze.py - Analyze stack sizes for compiled objects
+#
+# Copyright (C) 2024-2025  Luke T. Shumaker <lukeshu@lukeshu.com>
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import re
+import typing
+
+from . import vcg
+
+# pylint: disable=unused-variable
+__all__ = [
+    "BaseName",
+    "QName",
+    "UsageKind",
+    "Node",
+    "AnalyzeResultVal",
+    "AnalyzeResultGroup",
+    "AnalyzeResult",
+    "analyze",
+]
+
+
+class BaseName:
+    _content: str
+
+    def __init__(self, content: str) -> None:
+        if ":" in content:
+            raise ValueError(f"invalid non-qualified name: {content!r}")
+        self._content = content
+
+    def __str__(self) -> str:
+        return self._content
+
+    def __repr__(self) -> str:
+        return f"BaseName({self._content!r})"
+
+    def __format__(self, fmt_spec: str, /) -> str:
+        return repr(self)
+
+    def __eq__(self, other: typing.Any) -> bool:
+        assert isinstance(
+            other, BaseName
+        ), f"comparing BaseName with {other.__class__.__name__}"
+        return self._content == other._content
+
+    def __lt__(self, other: "BaseName") -> bool:
+        return self._content < other._content
+
+    def __hash__(self) -> int:
+        return hash(self._content)
+
+    def as_qname(self) -> "QName":
+        return QName(self._content)
+
+
+class QName:
+    _content: str
+
+    def __init__(self, content: str) -> None:
+        self._content = content
+
+    def __str__(self) -> str:
+        return self._content
+
+    def __repr__(self) -> str:
+        return f"QName({self._content!r})"
+
+    def __format__(self, fmt_spec: str, /) -> str:
+        return repr(self)
+
+    def __eq__(self, other: typing.Any) -> bool:
+        assert isinstance(
+            other, QName
+        ), f"comparing QName with {other.__class__.__name__}"
+        return self._content == other._content
+
+    def __lt__(self, other: "QName") -> bool:
+        return self._content < other._content
+
+    def __hash__(self) -> int:
+        return hash(self._content)
+
+    def base(self) -> BaseName:
+        return BaseName(self._content.rsplit(":", 1)[-1].split(".", 1)[0])
+
+
+UsageKind: typing.TypeAlias = typing.Literal["static", "dynamic", "dynamic,bounded"]
+
+
+class Node:
+    # from .title (`static` and `__weak` functions are prefixed with
+    # the compilation unit .c file.  For static functions that's fine,
+    # but we'll have to handle it specially for __weak.).
+    funcname: QName
+    # .label is "{funcname}\n{location}\n{nstatic} bytes (static}\n{ndynamic} dynamic objects"
+    location: str
+    usage_kind: UsageKind
+    nstatic: int
+    ndynamic: int
+
+    # edges with .sourcename set to this node, val is if it's
+    # OK/expected that the function be missing.
+    calls: dict[QName, bool]
+
+
+class AnalyzeResultVal(typing.NamedTuple):
+    nstatic: int
+    cnt: int
+
+
+class AnalyzeResultGroup(typing.NamedTuple):
+    rows: dict[QName, AnalyzeResultVal]
+
+
+class AnalyzeResult(typing.NamedTuple):
+    groups: dict[str, AnalyzeResultGroup]
+    missing: set[QName]
+    dynamic: set[QName]
+
+    included_funcs: set[QName]
+
+
+class Application(typing.Protocol):
+    def extra_nodes(self) -> typing.Collection[Node]: ...
+    def indirect_callees(
+        self, elem: vcg.VCGElem
+    ) -> tuple[typing.Collection[QName], bool]: ...
+    def skip_call(self, chain: typing.Sequence[QName], funcname: QName) -> bool: ...
+
+
+def analyze(
+    *,
+    ci_fnames: typing.Collection[str],
+    app_func_filters: dict[str, typing.Callable[[QName], tuple[int, bool]]],
+    app: Application,
+    cfg_max_call_depth: int,
+) -> AnalyzeResult:
+    re_node_label = re.compile(
+        r"(?P<funcname>[^\n]+)\n"
+        + r"(?P<location>[^\n]+:[0-9]+:[0-9]+)\n"
+        + r"(?P<nstatic>[0-9]+) bytes \((?P<usage_kind>static|dynamic|dynamic,bounded)\)\n"
+        + r"(?P<ndynamic>[0-9]+) dynamic objects"
+        + r"(?:\n.*)*",
+        flags=re.MULTILINE,
+    )
+
+    graph: dict[QName, Node] = {}
+    qualified: dict[BaseName, set[QName]] = {}
+
+    def handle_elem(elem: vcg.VCGElem) -> None:
+        match elem.typ:
+            case "node":
+                node = Node()
+                node.calls = {}
+                skip = False
+                for k, v in elem.attrs.items():
+                    match k:
+                        case "title":
+                            node.funcname = QName(v)
+                        case "label":
+                            if elem.attrs.get("shape", "") != "ellipse":
+                                m = re_node_label.fullmatch(v)
+                                if not m:
+                                    raise ValueError(f"unexpected label value {v!r}")
+                                node.location = m.group("location")
+                                node.usage_kind = typing.cast(
+                                    UsageKind, m.group("usage_kind")
+                                )
+                                node.nstatic = int(m.group("nstatic"))
+                                node.ndynamic = int(m.group("ndynamic"))
+                        case "shape":
+                            if v != "ellipse":
+                                raise ValueError(f"unexpected shape value {v!r}")
+                            skip = True
+                        case _:
+                            raise ValueError(f"unknown edge key {k!r}")
+                if not skip:
+                    if node.funcname in graph:
+                        raise ValueError(f"duplicate node {node.funcname}")
+                    graph[node.funcname] = node
+                    if ":" in str(node.funcname):
+                        basename = node.funcname.base()
+                        if basename not in qualified:
+                            qualified[basename] = set()
+                        qualified[basename].add(node.funcname)
+            case "edge":
+                caller: QName | None = None
+                callee: QName | None = None
+                for k, v in elem.attrs.items():
+                    match k:
+                        case "sourcename":
+                            caller = QName(v)
+                        case "targetname":
+                            callee = QName(v)
+                        case "label":
+                            pass
+                        case _:
+                            raise ValueError(f"unknown edge key {k!r}")
+                if caller is None or callee is None:
+                    raise ValueError(f"incomplete edge: {elem.attrs!r}")
+                if caller not in graph:
+                    raise ValueError(f"unknown caller: {caller}")
+                if callee == QName("__indirect_call"):
+                    callees, missing_ok = app.indirect_callees(elem)
+                    for callee in callees:
+                        if callee not in graph[caller].calls:
+                            graph[caller].calls[callee] = missing_ok
+                else:
+                    graph[caller].calls[callee] = False
+            case _:
+                raise ValueError(f"unknown elem type {elem.typ!r}")
+
+    for ci_fname in ci_fnames:
+        with open(ci_fname, "r", encoding="utf-8") as fh:
+            for elem in vcg.parse_vcg(fh):
+                handle_elem(elem)
+
+    for node in app.extra_nodes():
+        if node.funcname in graph:
+            raise ValueError(f"duplicate node {node.funcname}")
+        graph[node.funcname] = node
+
+    missing: set[QName] = set()
+    dynamic: set[QName] = set()
+    included_funcs: set[QName] = set()
+
+    dbg = False
+
+    def resolve_funcname(funcname: QName) -> QName | None:
+        # Handle `ld --wrap` functions
+        if QName(f"__wrap_{str(funcname)}") in graph:
+            return QName(f"__wrap_{str(funcname)}")
+        if (
+            str(funcname).startswith("__real_")
+            and QName(str(funcname)[len("__real_") :]) in graph
+        ):
+            funcname = QName(str(funcname)[len("__real_") :])
+
+        # Usual case
+        if funcname in graph:
+            return funcname
+
+        # Handle `__weak` functions
+        if ":" not in str(funcname):
+            qnames = qualified.get(BaseName(str(funcname)), set())
+            if len(qnames) == 1:
+                return sorted(qnames)[0]
+
+        return None
+
+    track_inclusion: bool = True
+
+    def nstatic(
+        orig_funcname: QName,
+        chain: typing.Sequence[QName] = (),
+        missing_ok: bool = False,
+    ) -> int:
+        nonlocal dbg
+        nonlocal track_inclusion
+        funcname = resolve_funcname(orig_funcname)
+        if not funcname:
+            if chain and app.skip_call(chain, orig_funcname):
+                if dbg:
+                    print(f"//dbg: {'- '*len(chain)}{orig_funcname}\tskip missing")
+                return 0
+            if not missing_ok:
+                missing.add(orig_funcname)
+            if dbg:
+                print(f"//dbg: {'- '*len(chain)}{orig_funcname}\tmissing")
+            return 0
+        if chain and app.skip_call(chain, funcname):
+            if dbg:
+                print(f"//dbg: {'- '*len(chain)}{orig_funcname}\tskip")
+            return 0
+
+        if len(chain) == cfg_max_call_depth:
+            raise ValueError(f"max call depth exceeded: {[*chain, funcname]}")
+
+        node = graph[funcname]
+        if dbg:
+            print(f"//dbg: {'- '*len(chain)}{funcname}\t{node.nstatic}")
+        if node.usage_kind == "dynamic" or node.ndynamic > 0:
+            dynamic.add(funcname)
+        if track_inclusion:
+            included_funcs.add(funcname)
+        return node.nstatic + max(
+            [
+                0,
+                *[
+                    nstatic(call, [*chain, funcname], missing_ok)
+                    for call, missing_ok in node.calls.items()
+                ],
+            ]
+        )
+
+    groups: dict[str, AnalyzeResultGroup] = {}
+    for grp_name, grp_filter in app_func_filters.items():
+        rows: dict[QName, AnalyzeResultVal] = {}
+        for funcname in graph:
+            cnt, track_inclusion = grp_filter(funcname)
+            if cnt:
+                rows[funcname] = AnalyzeResultVal(nstatic=nstatic(funcname), cnt=cnt)
+        groups[grp_name] = AnalyzeResultGroup(rows=rows)
+
+    return AnalyzeResult(
+        groups=groups, missing=missing, dynamic=dynamic, included_funcs=included_funcs
+    )
author	Luke T. Shumaker <lukeshu@lukeshu.com>	2025-03-31 04:22:52 -0600
committer	Luke T. Shumaker <lukeshu@lukeshu.com>	2025-03-31 04:26:10 -0600
commit	ee5abed3cda095115d5afb72c860819d9369fc45 (patch)
tree	90e70afee34f5ce1714393d37063b3c17cbf3ad6 /build-aux/measurestack/analyze.py
parent	f5d598b06cd6cfdcfa1f89819a843b2b462e7a1c (diff)