diff options
Diffstat (limited to 'build-aux/measurestack/analyze.py')
-rw-r--r-- | build-aux/measurestack/analyze.py | 308 |
1 files changed, 308 insertions, 0 deletions
diff --git a/build-aux/measurestack/analyze.py b/build-aux/measurestack/analyze.py new file mode 100644 index 0000000..67edd5d --- /dev/null +++ b/build-aux/measurestack/analyze.py @@ -0,0 +1,308 @@ +# build-aux/measurestack/analyze.py - Analyze stack sizes for compiled objects +# +# Copyright (C) 2024-2025 Luke T. Shumaker <lukeshu@lukeshu.com> +# SPDX-License-Identifier: AGPL-3.0-or-later + +import re +import typing + +from . import vcg + +# pylint: disable=unused-variable +__all__ = [ + "BaseName", + "QName", + "UsageKind", + "Node", + "AnalyzeResultVal", + "AnalyzeResultGroup", + "AnalyzeResult", + "analyze", +] + + +class BaseName: + _content: str + + def __init__(self, content: str) -> None: + if ":" in content: + raise ValueError(f"invalid non-qualified name: {content!r}") + self._content = content + + def __str__(self) -> str: + return self._content + + def __repr__(self) -> str: + return f"BaseName({self._content!r})" + + def __format__(self, fmt_spec: str, /) -> str: + return repr(self) + + def __eq__(self, other: typing.Any) -> bool: + assert isinstance( + other, BaseName + ), f"comparing BaseName with {other.__class__.__name__}" + return self._content == other._content + + def __lt__(self, other: "BaseName") -> bool: + return self._content < other._content + + def __hash__(self) -> int: + return hash(self._content) + + def as_qname(self) -> "QName": + return QName(self._content) + + +class QName: + _content: str + + def __init__(self, content: str) -> None: + self._content = content + + def __str__(self) -> str: + return self._content + + def __repr__(self) -> str: + return f"QName({self._content!r})" + + def __format__(self, fmt_spec: str, /) -> str: + return repr(self) + + def __eq__(self, other: typing.Any) -> bool: + assert isinstance( + other, QName + ), f"comparing QName with {other.__class__.__name__}" + return self._content == other._content + + def __lt__(self, other: "QName") -> bool: + return self._content < other._content + + def __hash__(self) -> int: + return hash(self._content) + + def base(self) -> BaseName: + return BaseName(self._content.rsplit(":", 1)[-1].split(".", 1)[0]) + + +UsageKind: typing.TypeAlias = typing.Literal["static", "dynamic", "dynamic,bounded"] + + +class Node: + # from .title (`static` and `__weak` functions are prefixed with + # the compilation unit .c file. For static functions that's fine, + # but we'll have to handle it specially for __weak.). + funcname: QName + # .label is "{funcname}\n{location}\n{nstatic} bytes (static}\n{ndynamic} dynamic objects" + location: str + usage_kind: UsageKind + nstatic: int + ndynamic: int + + # edges with .sourcename set to this node, val is if it's + # OK/expected that the function be missing. + calls: dict[QName, bool] + + +class AnalyzeResultVal(typing.NamedTuple): + nstatic: int + cnt: int + + +class AnalyzeResultGroup(typing.NamedTuple): + rows: dict[QName, AnalyzeResultVal] + + +class AnalyzeResult(typing.NamedTuple): + groups: dict[str, AnalyzeResultGroup] + missing: set[QName] + dynamic: set[QName] + + included_funcs: set[QName] + + +class Application(typing.Protocol): + def extra_nodes(self) -> typing.Collection[Node]: ... + def indirect_callees( + self, elem: vcg.VCGElem + ) -> tuple[typing.Collection[QName], bool]: ... + def skip_call(self, chain: typing.Sequence[QName], funcname: QName) -> bool: ... + + +def analyze( + *, + ci_fnames: typing.Collection[str], + app_func_filters: dict[str, typing.Callable[[QName], tuple[int, bool]]], + app: Application, + cfg_max_call_depth: int, +) -> AnalyzeResult: + re_node_label = re.compile( + r"(?P<funcname>[^\n]+)\n" + + r"(?P<location>[^\n]+:[0-9]+:[0-9]+)\n" + + r"(?P<nstatic>[0-9]+) bytes \((?P<usage_kind>static|dynamic|dynamic,bounded)\)\n" + + r"(?P<ndynamic>[0-9]+) dynamic objects" + + r"(?:\n.*)*", + flags=re.MULTILINE, + ) + + graph: dict[QName, Node] = {} + qualified: dict[BaseName, set[QName]] = {} + + def handle_elem(elem: vcg.VCGElem) -> None: + match elem.typ: + case "node": + node = Node() + node.calls = {} + skip = False + for k, v in elem.attrs.items(): + match k: + case "title": + node.funcname = QName(v) + case "label": + if elem.attrs.get("shape", "") != "ellipse": + m = re_node_label.fullmatch(v) + if not m: + raise ValueError(f"unexpected label value {v!r}") + node.location = m.group("location") + node.usage_kind = typing.cast( + UsageKind, m.group("usage_kind") + ) + node.nstatic = int(m.group("nstatic")) + node.ndynamic = int(m.group("ndynamic")) + case "shape": + if v != "ellipse": + raise ValueError(f"unexpected shape value {v!r}") + skip = True + case _: + raise ValueError(f"unknown edge key {k!r}") + if not skip: + if node.funcname in graph: + raise ValueError(f"duplicate node {node.funcname}") + graph[node.funcname] = node + if ":" in str(node.funcname): + basename = node.funcname.base() + if basename not in qualified: + qualified[basename] = set() + qualified[basename].add(node.funcname) + case "edge": + caller: QName | None = None + callee: QName | None = None + for k, v in elem.attrs.items(): + match k: + case "sourcename": + caller = QName(v) + case "targetname": + callee = QName(v) + case "label": + pass + case _: + raise ValueError(f"unknown edge key {k!r}") + if caller is None or callee is None: + raise ValueError(f"incomplete edge: {elem.attrs!r}") + if caller not in graph: + raise ValueError(f"unknown caller: {caller}") + if callee == QName("__indirect_call"): + callees, missing_ok = app.indirect_callees(elem) + for callee in callees: + if callee not in graph[caller].calls: + graph[caller].calls[callee] = missing_ok + else: + graph[caller].calls[callee] = False + case _: + raise ValueError(f"unknown elem type {elem.typ!r}") + + for ci_fname in ci_fnames: + with open(ci_fname, "r", encoding="utf-8") as fh: + for elem in vcg.parse_vcg(fh): + handle_elem(elem) + + for node in app.extra_nodes(): + if node.funcname in graph: + raise ValueError(f"duplicate node {node.funcname}") + graph[node.funcname] = node + + missing: set[QName] = set() + dynamic: set[QName] = set() + included_funcs: set[QName] = set() + + dbg = False + + def resolve_funcname(funcname: QName) -> QName | None: + # Handle `ld --wrap` functions + if QName(f"__wrap_{str(funcname)}") in graph: + return QName(f"__wrap_{str(funcname)}") + if ( + str(funcname).startswith("__real_") + and QName(str(funcname)[len("__real_") :]) in graph + ): + funcname = QName(str(funcname)[len("__real_") :]) + + # Usual case + if funcname in graph: + return funcname + + # Handle `__weak` functions + if ":" not in str(funcname): + qnames = qualified.get(BaseName(str(funcname)), set()) + if len(qnames) == 1: + return sorted(qnames)[0] + + return None + + track_inclusion: bool = True + + def nstatic( + orig_funcname: QName, + chain: typing.Sequence[QName] = (), + missing_ok: bool = False, + ) -> int: + nonlocal dbg + nonlocal track_inclusion + funcname = resolve_funcname(orig_funcname) + if not funcname: + if chain and app.skip_call(chain, orig_funcname): + if dbg: + print(f"//dbg: {'- '*len(chain)}{orig_funcname}\tskip missing") + return 0 + if not missing_ok: + missing.add(orig_funcname) + if dbg: + print(f"//dbg: {'- '*len(chain)}{orig_funcname}\tmissing") + return 0 + if chain and app.skip_call(chain, funcname): + if dbg: + print(f"//dbg: {'- '*len(chain)}{orig_funcname}\tskip") + return 0 + + if len(chain) == cfg_max_call_depth: + raise ValueError(f"max call depth exceeded: {[*chain, funcname]}") + + node = graph[funcname] + if dbg: + print(f"//dbg: {'- '*len(chain)}{funcname}\t{node.nstatic}") + if node.usage_kind == "dynamic" or node.ndynamic > 0: + dynamic.add(funcname) + if track_inclusion: + included_funcs.add(funcname) + return node.nstatic + max( + [ + 0, + *[ + nstatic(call, [*chain, funcname], missing_ok) + for call, missing_ok in node.calls.items() + ], + ] + ) + + groups: dict[str, AnalyzeResultGroup] = {} + for grp_name, grp_filter in app_func_filters.items(): + rows: dict[QName, AnalyzeResultVal] = {} + for funcname in graph: + cnt, track_inclusion = grp_filter(funcname) + if cnt: + rows[funcname] = AnalyzeResultVal(nstatic=nstatic(funcname), cnt=cnt) + groups[grp_name] = AnalyzeResultGroup(rows=rows) + + return AnalyzeResult( + groups=groups, missing=missing, dynamic=dynamic, included_funcs=included_funcs + ) |