diff options
Diffstat (limited to 'build-aux/measurestack/analyze.py')
-rw-r--r-- | build-aux/measurestack/analyze.py | 429 |
1 files changed, 429 insertions, 0 deletions
diff --git a/build-aux/measurestack/analyze.py b/build-aux/measurestack/analyze.py new file mode 100644 index 0000000..a93874f --- /dev/null +++ b/build-aux/measurestack/analyze.py @@ -0,0 +1,429 @@ +# build-aux/measurestack/analyze.py - Analyze stack sizes for compiled objects +# +# Copyright (C) 2024-2025 Luke T. Shumaker <lukeshu@lukeshu.com> +# SPDX-License-Identifier: AGPL-3.0-or-later + +import re +import sys +import typing + +from . import vcg + +# pylint: disable=unused-variable +__all__ = [ + "BaseName", + "QName", + "UsageKind", + "Node", + "AnalyzeResultVal", + "AnalyzeResultGroup", + "AnalyzeResult", + "analyze", +] + +# types ######################################################################## + + +class BaseName: + # class ########################################################## + + _interned: dict[str, "BaseName"] = {} + + def __new__(cls, content: str) -> "BaseName": + if ":" in content: + raise ValueError(f"invalid non-qualified name: {content!r}") + content = sys.intern(content) + if content not in cls._interned: + self = super().__new__(cls) + self._content = content + cls._interned[content] = self + return cls._interned[content] + + # instance ####################################################### + + _content: str + + def __str__(self) -> str: + return self._content + + def __repr__(self) -> str: + return f"BaseName({self._content!r})" + + def __format__(self, fmt_spec: str, /) -> str: + return repr(self) + + def __eq__(self, other: typing.Any) -> bool: + assert isinstance( + other, BaseName + ), f"comparing BaseName with {other.__class__.__name__}" + return self._content == other._content + + def __lt__(self, other: "BaseName") -> bool: + return self._content < other._content + + def __hash__(self) -> int: + return self._content.__hash__() + + def as_qname(self) -> "QName": + return QName(self._content) + + +class QName: + # class ########################################################## + + _interned: dict[str, "QName"] = {} + + def __new__(cls, content: str) -> "QName": + content = sys.intern(content) + if content not in cls._interned: + self = super().__new__(cls) + self._content = content + self._base = None + cls._interned[content] = self + return cls._interned[content] + + # instance ####################################################### + + _content: str + _base: BaseName | None + + def __str__(self) -> str: + return self._content + + def __repr__(self) -> str: + return f"QName({self._content!r})" + + def __format__(self, fmt_spec: str, /) -> str: + return repr(self) + + def __eq__(self, other: typing.Any) -> bool: + assert isinstance( + other, QName + ), f"comparing QName with {other.__class__.__name__}" + return self._content == other._content + + def __lt__(self, other: "QName") -> bool: + return self._content < other._content + + def __hash__(self) -> int: + return self._content.__hash__() + + def base(self) -> BaseName: + if self._base is None: + self._base = BaseName(self._content.rsplit(":", 1)[-1].split(".", 1)[0]) + return self._base + + +UsageKind: typing.TypeAlias = typing.Literal["static", "dynamic", "dynamic,bounded"] + + +class Node: + # from .title (`static` and `__weak` functions are prefixed with + # the compilation unit .c file. For static functions that's fine, + # but we'll have to handle it specially for __weak.). + funcname: QName + # .label is "{funcname}\n{location}\n{nstatic} bytes (static}\n{ndynamic} dynamic objects" + location: str + usage_kind: UsageKind + nstatic: int + ndynamic: int + + # edges with .sourcename set to this node, val is if it's + # OK/expected that the function be missing. + calls: dict[QName, bool] + + +class AnalyzeResultVal(typing.NamedTuple): + nstatic: int + cnt: int + + +class AnalyzeResultGroup(typing.NamedTuple): + rows: dict[QName, AnalyzeResultVal] + + +class AnalyzeResult(typing.NamedTuple): + groups: dict[str, AnalyzeResultGroup] + missing: set[QName] + dynamic: set[QName] + + included_funcs: set[QName] + + +class SkipModel(typing.NamedTuple): + """Running the skipmodel calls `.fn(chain, ...)` with the chain + consisting of the last `.nchain` items (if .nchain is an int), or + the chain starting with the *last* occurance of `.nchain` (if + .nchain is a collection). If the chain is not that long or does + not contain a member of the collection, then .fn is not called and + the call is *not* skipped. + + """ + + nchain: int | typing.Collection[BaseName] + fn: typing.Callable[[typing.Sequence[QName], QName], bool] + + def __call__(self, chain: typing.Sequence[QName], call: QName) -> tuple[bool, int]: + if isinstance(self.nchain, int): + if len(chain) >= self.nchain: + _chain = chain[-self.nchain :] + return self.fn(_chain, call), len(_chain) + else: + for i in reversed(range(len(chain))): + if chain[i].base() in self.nchain: + _chain = chain[i - 1 :] + return self.fn(_chain, call), len(_chain) + return False, 0 + + +class Application(typing.Protocol): + def extra_nodes(self) -> typing.Collection[Node]: ... + def indirect_callees( + self, elem: vcg.VCGElem + ) -> tuple[typing.Collection[QName], bool]: ... + def skipmodels(self) -> dict[BaseName, SkipModel]: ... + + +# code ######################################################################### + +re_node_label = re.compile( + r"(?P<funcname>[^\n]+)\n" + + r"(?P<location>[^\n]+:[0-9]+:[0-9]+)\n" + + r"(?P<nstatic>[0-9]+) bytes \((?P<usage_kind>static|dynamic|dynamic,bounded)\)\n" + + r"(?P<ndynamic>[0-9]+) dynamic objects" + + r"(?:\n.*)*", + flags=re.MULTILINE, +) + + +class _Graph: + graph: dict[QName, Node] + qualified: dict[BaseName, QName] + + _resolve_cache: dict[QName, QName | None] + + def __init__(self) -> None: + self._resolve_cache = {} + + def _resolve_funcname(self, funcname: QName) -> QName | None: + s = str(funcname) + is_qualified = ":" in s + + # Handle `ld --wrap` functions + if not is_qualified: + with_wrap = QName(f"__wrap_{s}") + if with_wrap in self.graph: + return with_wrap + if s.startswith("__real_"): + without_real = QName(s[len("__real_") :]) + if without_real in self.graph: + funcname = without_real + + # Usual case + if funcname in self.graph: + return funcname + + # Handle `__weak`/`[[gnu::weak]]` functions + if not is_qualified: + return self.qualified.get(BaseName(s)) + + return None + + def resolve_funcname(self, funcname: QName) -> QName | None: + if funcname not in self._resolve_cache: + self._resolve_cache[funcname] = self._resolve_funcname(funcname) + return self._resolve_cache[funcname] + + +def _make_graph( + ci_fnames: typing.Collection[str], + app: Application, +) -> _Graph: + graph: dict[QName, Node] = {} + qualified: dict[BaseName, set[QName]] = {} + + def handle_elem(elem: vcg.VCGElem) -> None: + match elem.typ: + case "node": + node = Node() + node.calls = {} + skip = False + for k, v in elem.attrs.items(): + match k: + case "title": + node.funcname = QName(v) + case "label": + if elem.attrs.get("shape", "") != "ellipse": + m = re_node_label.fullmatch(v) + if not m: + raise ValueError(f"unexpected label value {v!r}") + node.location = m.group("location") + node.usage_kind = typing.cast( + UsageKind, m.group("usage_kind") + ) + node.nstatic = int(m.group("nstatic")) + node.ndynamic = int(m.group("ndynamic")) + case "shape": + if v != "ellipse": + raise ValueError(f"unexpected shape value {v!r}") + skip = True + case _: + raise ValueError(f"unknown edge key {k!r}") + if not skip: + if node.funcname in graph: + raise ValueError(f"duplicate node {node.funcname}") + graph[node.funcname] = node + if ":" in str(node.funcname): + basename = node.funcname.base() + if basename not in qualified: + qualified[basename] = set() + qualified[basename].add(node.funcname) + case "edge": + caller: QName | None = None + callee: QName | None = None + for k, v in elem.attrs.items(): + match k: + case "sourcename": + caller = QName(v) + case "targetname": + callee = QName(v) + case "label": + pass + case _: + raise ValueError(f"unknown edge key {k!r}") + if caller is None or callee is None: + raise ValueError(f"incomplete edge: {elem.attrs!r}") + if caller not in graph: + raise ValueError(f"unknown caller: {caller}") + if callee == QName("__indirect_call"): + callees, missing_ok = app.indirect_callees(elem) + for callee in callees: + if callee not in graph[caller].calls: + graph[caller].calls[callee] = missing_ok + else: + graph[caller].calls[callee] = False + case _: + raise ValueError(f"unknown elem type {elem.typ!r}") + + for ci_fname in ci_fnames: + with open(ci_fname, "r", encoding="utf-8") as fh: + for elem in vcg.parse_vcg(fh): + handle_elem(elem) + + for node in app.extra_nodes(): + if node.funcname in graph: + raise ValueError(f"duplicate node {node.funcname}") + graph[node.funcname] = node + + ret = _Graph() + ret.graph = graph + ret.qualified = {} + for bname, qnames in qualified.items(): + if len(qnames) == 1: + ret.qualified[bname] = next(name for name in qnames) + return ret + + +def analyze( + *, + ci_fnames: typing.Collection[str], + app_func_filters: dict[str, typing.Callable[[QName], tuple[int, bool]]], + app: Application, + cfg_max_call_depth: int, +) -> AnalyzeResult: + graphdata = _make_graph(ci_fnames, app) + + missing: set[QName] = set() + dynamic: set[QName] = set() + included_funcs: set[QName] = set() + + dbg = False + + track_inclusion: bool = True + + skipmodels = app.skipmodels() + for name, model in skipmodels.items(): + if isinstance(model.nchain, int): + assert model.nchain > 1 + else: + assert len(model.nchain) > 0 + + _nstatic_cache: dict[QName, int] = {} + + def _nstatic(chain: list[QName], funcname: QName) -> tuple[int, int]: + nonlocal dbg + nonlocal track_inclusion + + assert funcname in graphdata.graph + + node = graphdata.graph[funcname] + if dbg: + print(f"//dbg: {'- '*len(chain)}{funcname}\t{node.nstatic}") + if node.usage_kind == "dynamic" or node.ndynamic > 0: + dynamic.add(funcname) + if track_inclusion: + included_funcs.add(funcname) + + max_call_nstatic = 0 + max_call_nchain = 0 + + if node.calls: + skipmodel = skipmodels.get(funcname.base()) + chain.append(funcname) + if len(chain) == cfg_max_call_depth: + raise ValueError(f"max call depth exceeded: {chain}") + for call_orig_qname, call_missing_ok in node.calls.items(): + skip_nchain = 0 + # 1. Resolve + call_qname = graphdata.resolve_funcname(call_orig_qname) + if not call_qname: + if skipmodel: + skip, _ = skipmodel(chain, call_orig_qname) + if skip: + if dbg: + print( + f"//dbg: {'- '*len(chain)}{call_orig_qname}\tskip missing" + ) + continue + if not call_missing_ok: + missing.add(call_orig_qname) + if dbg: + print(f"//dbg: {'- '*len(chain)}{call_orig_qname}\tmissing") + continue + + # 2. Skip + if skipmodel: + skip, skip_nchain = skipmodel(chain, call_qname) + max_call_nchain = max(max_call_nchain, skip_nchain) + if skip: + if dbg: + print(f"//dbg: {'- '*len(chain)}{call_qname}\tskip") + continue + + # 3. Call + if skip_nchain == 0 and call_qname in _nstatic_cache: + max_call_nstatic = max(max_call_nstatic, _nstatic_cache[call_qname]) + else: + call_nstatic, call_nchain = _nstatic(chain, call_qname) + max_call_nstatic = max(max_call_nstatic, call_nstatic) + max_call_nchain = max(max_call_nchain, call_nchain) + if skip_nchain == 0 and call_nchain == 0: + _nstatic_cache[call_qname] = call_nstatic + chain.pop() + return node.nstatic + max_call_nstatic, max(0, max_call_nchain - 1) + + def nstatic(funcname: QName) -> int: + return _nstatic([], funcname)[0] + + groups: dict[str, AnalyzeResultGroup] = {} + for grp_name, grp_filter in app_func_filters.items(): + rows: dict[QName, AnalyzeResultVal] = {} + for funcname in graphdata.graph: + cnt, track_inclusion = grp_filter(funcname) + if cnt: + rows[funcname] = AnalyzeResultVal(nstatic=nstatic(funcname), cnt=cnt) + groups[grp_name] = AnalyzeResultGroup(rows=rows) + + return AnalyzeResult( + groups=groups, missing=missing, dynamic=dynamic, included_funcs=included_funcs + ) |