# build-aux/measurestack/analyze.py - Analyze stack sizes for compiled objects # # Copyright (C) 2024-2025 Luke T. Shumaker # SPDX-License-Identifier: AGPL-3.0-or-later import re import typing from . import vcg # pylint: disable=unused-variable __all__ = [ "BaseName", "QName", "UsageKind", "Node", "AnalyzeResultVal", "AnalyzeResultGroup", "AnalyzeResult", "analyze", ] class BaseName: _content: str def __init__(self, content: str) -> None: if ":" in content: raise ValueError(f"invalid non-qualified name: {content!r}") self._content = content def __str__(self) -> str: return self._content def __repr__(self) -> str: return f"BaseName({self._content!r})" def __format__(self, fmt_spec: str, /) -> str: return repr(self) def __eq__(self, other: typing.Any) -> bool: assert isinstance( other, BaseName ), f"comparing BaseName with {other.__class__.__name__}" return self._content == other._content def __lt__(self, other: "BaseName") -> bool: return self._content < other._content def __hash__(self) -> int: return hash(self._content) def as_qname(self) -> "QName": return QName(self._content) class QName: _content: str def __init__(self, content: str) -> None: self._content = content def __str__(self) -> str: return self._content def __repr__(self) -> str: return f"QName({self._content!r})" def __format__(self, fmt_spec: str, /) -> str: return repr(self) def __eq__(self, other: typing.Any) -> bool: assert isinstance( other, QName ), f"comparing QName with {other.__class__.__name__}" return self._content == other._content def __lt__(self, other: "QName") -> bool: return self._content < other._content def __hash__(self) -> int: return hash(self._content) def base(self) -> BaseName: return BaseName(self._content.rsplit(":", 1)[-1].split(".", 1)[0]) UsageKind: typing.TypeAlias = typing.Literal["static", "dynamic", "dynamic,bounded"] class Node: # from .title (`static` and `__weak` functions are prefixed with # the compilation unit .c file. For static functions that's fine, # but we'll have to handle it specially for __weak.). funcname: QName # .label is "{funcname}\n{location}\n{nstatic} bytes (static}\n{ndynamic} dynamic objects" location: str usage_kind: UsageKind nstatic: int ndynamic: int # edges with .sourcename set to this node, val is if it's # OK/expected that the function be missing. calls: dict[QName, bool] class AnalyzeResultVal(typing.NamedTuple): nstatic: int cnt: int class AnalyzeResultGroup(typing.NamedTuple): rows: dict[QName, AnalyzeResultVal] class AnalyzeResult(typing.NamedTuple): groups: dict[str, AnalyzeResultGroup] missing: set[QName] dynamic: set[QName] included_funcs: set[QName] class Application(typing.Protocol): def extra_nodes(self) -> typing.Collection[Node]: ... def indirect_callees( self, elem: vcg.VCGElem ) -> tuple[typing.Collection[QName], bool]: ... def skip_call(self, chain: typing.Sequence[QName], funcname: QName) -> bool: ... def analyze( *, ci_fnames: typing.Collection[str], app_func_filters: dict[str, typing.Callable[[QName], tuple[int, bool]]], app: Application, cfg_max_call_depth: int, ) -> AnalyzeResult: re_node_label = re.compile( r"(?P[^\n]+)\n" + r"(?P[^\n]+:[0-9]+:[0-9]+)\n" + r"(?P[0-9]+) bytes \((?Pstatic|dynamic|dynamic,bounded)\)\n" + r"(?P[0-9]+) dynamic objects" + r"(?:\n.*)*", flags=re.MULTILINE, ) graph: dict[QName, Node] = {} qualified: dict[BaseName, set[QName]] = {} def handle_elem(elem: vcg.VCGElem) -> None: match elem.typ: case "node": node = Node() node.calls = {} skip = False for k, v in elem.attrs.items(): match k: case "title": node.funcname = QName(v) case "label": if elem.attrs.get("shape", "") != "ellipse": m = re_node_label.fullmatch(v) if not m: raise ValueError(f"unexpected label value {v!r}") node.location = m.group("location") node.usage_kind = typing.cast( UsageKind, m.group("usage_kind") ) node.nstatic = int(m.group("nstatic")) node.ndynamic = int(m.group("ndynamic")) case "shape": if v != "ellipse": raise ValueError(f"unexpected shape value {v!r}") skip = True case _: raise ValueError(f"unknown edge key {k!r}") if not skip: if node.funcname in graph: raise ValueError(f"duplicate node {node.funcname}") graph[node.funcname] = node if ":" in str(node.funcname): basename = node.funcname.base() if basename not in qualified: qualified[basename] = set() qualified[basename].add(node.funcname) case "edge": caller: QName | None = None callee: QName | None = None for k, v in elem.attrs.items(): match k: case "sourcename": caller = QName(v) case "targetname": callee = QName(v) case "label": pass case _: raise ValueError(f"unknown edge key {k!r}") if caller is None or callee is None: raise ValueError(f"incomplete edge: {elem.attrs!r}") if caller not in graph: raise ValueError(f"unknown caller: {caller}") if callee == QName("__indirect_call"): callees, missing_ok = app.indirect_callees(elem) for callee in callees: if callee not in graph[caller].calls: graph[caller].calls[callee] = missing_ok else: graph[caller].calls[callee] = False case _: raise ValueError(f"unknown elem type {elem.typ!r}") for ci_fname in ci_fnames: with open(ci_fname, "r", encoding="utf-8") as fh: for elem in vcg.parse_vcg(fh): handle_elem(elem) for node in app.extra_nodes(): if node.funcname in graph: raise ValueError(f"duplicate node {node.funcname}") graph[node.funcname] = node missing: set[QName] = set() dynamic: set[QName] = set() included_funcs: set[QName] = set() dbg = False def resolve_funcname(funcname: QName) -> QName | None: # Handle `ld --wrap` functions if QName(f"__wrap_{str(funcname)}") in graph: return QName(f"__wrap_{str(funcname)}") if ( str(funcname).startswith("__real_") and QName(str(funcname)[len("__real_") :]) in graph ): funcname = QName(str(funcname)[len("__real_") :]) # Usual case if funcname in graph: return funcname # Handle `__weak` functions if ":" not in str(funcname): qnames = qualified.get(BaseName(str(funcname)), set()) if len(qnames) == 1: return sorted(qnames)[0] return None track_inclusion: bool = True def nstatic( orig_funcname: QName, chain: typing.Sequence[QName] = (), missing_ok: bool = False, ) -> int: nonlocal dbg nonlocal track_inclusion funcname = resolve_funcname(orig_funcname) if not funcname: if chain and app.skip_call(chain, orig_funcname): if dbg: print(f"//dbg: {'- '*len(chain)}{orig_funcname}\tskip missing") return 0 if not missing_ok: missing.add(orig_funcname) if dbg: print(f"//dbg: {'- '*len(chain)}{orig_funcname}\tmissing") return 0 if chain and app.skip_call(chain, funcname): if dbg: print(f"//dbg: {'- '*len(chain)}{orig_funcname}\tskip") return 0 if len(chain) == cfg_max_call_depth: raise ValueError(f"max call depth exceeded: {[*chain, funcname]}") node = graph[funcname] if dbg: print(f"//dbg: {'- '*len(chain)}{funcname}\t{node.nstatic}") if node.usage_kind == "dynamic" or node.ndynamic > 0: dynamic.add(funcname) if track_inclusion: included_funcs.add(funcname) return node.nstatic + max( [ 0, *[ nstatic(call, [*chain, funcname], missing_ok) for call, missing_ok in node.calls.items() ], ] ) groups: dict[str, AnalyzeResultGroup] = {} for grp_name, grp_filter in app_func_filters.items(): rows: dict[QName, AnalyzeResultVal] = {} for funcname in graph: cnt, track_inclusion = grp_filter(funcname) if cnt: rows[funcname] = AnalyzeResultVal(nstatic=nstatic(funcname), cnt=cnt) groups[grp_name] = AnalyzeResultGroup(rows=rows) return AnalyzeResult( groups=groups, missing=missing, dynamic=dynamic, included_funcs=included_funcs )