summaryrefslogtreecommitdiff
path: root/build-aux/measurestack/analyze.py
diff options
context:
space:
mode:
authorLuke T. Shumaker <lukeshu@lukeshu.com>2025-03-31 04:22:52 -0600
committerLuke T. Shumaker <lukeshu@lukeshu.com>2025-03-31 04:26:10 -0600
commitee5abed3cda095115d5afb72c860819d9369fc45 (patch)
tree90e70afee34f5ce1714393d37063b3c17cbf3ad6 /build-aux/measurestack/analyze.py
parentf5d598b06cd6cfdcfa1f89819a843b2b462e7a1c (diff)
measurestack: Split into several files
Diffstat (limited to 'build-aux/measurestack/analyze.py')
-rw-r--r--build-aux/measurestack/analyze.py308
1 files changed, 308 insertions, 0 deletions
diff --git a/build-aux/measurestack/analyze.py b/build-aux/measurestack/analyze.py
new file mode 100644
index 0000000..67edd5d
--- /dev/null
+++ b/build-aux/measurestack/analyze.py
@@ -0,0 +1,308 @@
+# build-aux/measurestack/analyze.py - Analyze stack sizes for compiled objects
+#
+# Copyright (C) 2024-2025 Luke T. Shumaker <lukeshu@lukeshu.com>
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import re
+import typing
+
+from . import vcg
+
+# pylint: disable=unused-variable
+__all__ = [
+ "BaseName",
+ "QName",
+ "UsageKind",
+ "Node",
+ "AnalyzeResultVal",
+ "AnalyzeResultGroup",
+ "AnalyzeResult",
+ "analyze",
+]
+
+
+class BaseName:
+ _content: str
+
+ def __init__(self, content: str) -> None:
+ if ":" in content:
+ raise ValueError(f"invalid non-qualified name: {content!r}")
+ self._content = content
+
+ def __str__(self) -> str:
+ return self._content
+
+ def __repr__(self) -> str:
+ return f"BaseName({self._content!r})"
+
+ def __format__(self, fmt_spec: str, /) -> str:
+ return repr(self)
+
+ def __eq__(self, other: typing.Any) -> bool:
+ assert isinstance(
+ other, BaseName
+ ), f"comparing BaseName with {other.__class__.__name__}"
+ return self._content == other._content
+
+ def __lt__(self, other: "BaseName") -> bool:
+ return self._content < other._content
+
+ def __hash__(self) -> int:
+ return hash(self._content)
+
+ def as_qname(self) -> "QName":
+ return QName(self._content)
+
+
+class QName:
+ _content: str
+
+ def __init__(self, content: str) -> None:
+ self._content = content
+
+ def __str__(self) -> str:
+ return self._content
+
+ def __repr__(self) -> str:
+ return f"QName({self._content!r})"
+
+ def __format__(self, fmt_spec: str, /) -> str:
+ return repr(self)
+
+ def __eq__(self, other: typing.Any) -> bool:
+ assert isinstance(
+ other, QName
+ ), f"comparing QName with {other.__class__.__name__}"
+ return self._content == other._content
+
+ def __lt__(self, other: "QName") -> bool:
+ return self._content < other._content
+
+ def __hash__(self) -> int:
+ return hash(self._content)
+
+ def base(self) -> BaseName:
+ return BaseName(self._content.rsplit(":", 1)[-1].split(".", 1)[0])
+
+
+UsageKind: typing.TypeAlias = typing.Literal["static", "dynamic", "dynamic,bounded"]
+
+
+class Node:
+ # from .title (`static` and `__weak` functions are prefixed with
+ # the compilation unit .c file. For static functions that's fine,
+ # but we'll have to handle it specially for __weak.).
+ funcname: QName
+ # .label is "{funcname}\n{location}\n{nstatic} bytes (static}\n{ndynamic} dynamic objects"
+ location: str
+ usage_kind: UsageKind
+ nstatic: int
+ ndynamic: int
+
+ # edges with .sourcename set to this node, val is if it's
+ # OK/expected that the function be missing.
+ calls: dict[QName, bool]
+
+
+class AnalyzeResultVal(typing.NamedTuple):
+ nstatic: int
+ cnt: int
+
+
+class AnalyzeResultGroup(typing.NamedTuple):
+ rows: dict[QName, AnalyzeResultVal]
+
+
+class AnalyzeResult(typing.NamedTuple):
+ groups: dict[str, AnalyzeResultGroup]
+ missing: set[QName]
+ dynamic: set[QName]
+
+ included_funcs: set[QName]
+
+
+class Application(typing.Protocol):
+ def extra_nodes(self) -> typing.Collection[Node]: ...
+ def indirect_callees(
+ self, elem: vcg.VCGElem
+ ) -> tuple[typing.Collection[QName], bool]: ...
+ def skip_call(self, chain: typing.Sequence[QName], funcname: QName) -> bool: ...
+
+
+def analyze(
+ *,
+ ci_fnames: typing.Collection[str],
+ app_func_filters: dict[str, typing.Callable[[QName], tuple[int, bool]]],
+ app: Application,
+ cfg_max_call_depth: int,
+) -> AnalyzeResult:
+ re_node_label = re.compile(
+ r"(?P<funcname>[^\n]+)\n"
+ + r"(?P<location>[^\n]+:[0-9]+:[0-9]+)\n"
+ + r"(?P<nstatic>[0-9]+) bytes \((?P<usage_kind>static|dynamic|dynamic,bounded)\)\n"
+ + r"(?P<ndynamic>[0-9]+) dynamic objects"
+ + r"(?:\n.*)*",
+ flags=re.MULTILINE,
+ )
+
+ graph: dict[QName, Node] = {}
+ qualified: dict[BaseName, set[QName]] = {}
+
+ def handle_elem(elem: vcg.VCGElem) -> None:
+ match elem.typ:
+ case "node":
+ node = Node()
+ node.calls = {}
+ skip = False
+ for k, v in elem.attrs.items():
+ match k:
+ case "title":
+ node.funcname = QName(v)
+ case "label":
+ if elem.attrs.get("shape", "") != "ellipse":
+ m = re_node_label.fullmatch(v)
+ if not m:
+ raise ValueError(f"unexpected label value {v!r}")
+ node.location = m.group("location")
+ node.usage_kind = typing.cast(
+ UsageKind, m.group("usage_kind")
+ )
+ node.nstatic = int(m.group("nstatic"))
+ node.ndynamic = int(m.group("ndynamic"))
+ case "shape":
+ if v != "ellipse":
+ raise ValueError(f"unexpected shape value {v!r}")
+ skip = True
+ case _:
+ raise ValueError(f"unknown edge key {k!r}")
+ if not skip:
+ if node.funcname in graph:
+ raise ValueError(f"duplicate node {node.funcname}")
+ graph[node.funcname] = node
+ if ":" in str(node.funcname):
+ basename = node.funcname.base()
+ if basename not in qualified:
+ qualified[basename] = set()
+ qualified[basename].add(node.funcname)
+ case "edge":
+ caller: QName | None = None
+ callee: QName | None = None
+ for k, v in elem.attrs.items():
+ match k:
+ case "sourcename":
+ caller = QName(v)
+ case "targetname":
+ callee = QName(v)
+ case "label":
+ pass
+ case _:
+ raise ValueError(f"unknown edge key {k!r}")
+ if caller is None or callee is None:
+ raise ValueError(f"incomplete edge: {elem.attrs!r}")
+ if caller not in graph:
+ raise ValueError(f"unknown caller: {caller}")
+ if callee == QName("__indirect_call"):
+ callees, missing_ok = app.indirect_callees(elem)
+ for callee in callees:
+ if callee not in graph[caller].calls:
+ graph[caller].calls[callee] = missing_ok
+ else:
+ graph[caller].calls[callee] = False
+ case _:
+ raise ValueError(f"unknown elem type {elem.typ!r}")
+
+ for ci_fname in ci_fnames:
+ with open(ci_fname, "r", encoding="utf-8") as fh:
+ for elem in vcg.parse_vcg(fh):
+ handle_elem(elem)
+
+ for node in app.extra_nodes():
+ if node.funcname in graph:
+ raise ValueError(f"duplicate node {node.funcname}")
+ graph[node.funcname] = node
+
+ missing: set[QName] = set()
+ dynamic: set[QName] = set()
+ included_funcs: set[QName] = set()
+
+ dbg = False
+
+ def resolve_funcname(funcname: QName) -> QName | None:
+ # Handle `ld --wrap` functions
+ if QName(f"__wrap_{str(funcname)}") in graph:
+ return QName(f"__wrap_{str(funcname)}")
+ if (
+ str(funcname).startswith("__real_")
+ and QName(str(funcname)[len("__real_") :]) in graph
+ ):
+ funcname = QName(str(funcname)[len("__real_") :])
+
+ # Usual case
+ if funcname in graph:
+ return funcname
+
+ # Handle `__weak` functions
+ if ":" not in str(funcname):
+ qnames = qualified.get(BaseName(str(funcname)), set())
+ if len(qnames) == 1:
+ return sorted(qnames)[0]
+
+ return None
+
+ track_inclusion: bool = True
+
+ def nstatic(
+ orig_funcname: QName,
+ chain: typing.Sequence[QName] = (),
+ missing_ok: bool = False,
+ ) -> int:
+ nonlocal dbg
+ nonlocal track_inclusion
+ funcname = resolve_funcname(orig_funcname)
+ if not funcname:
+ if chain and app.skip_call(chain, orig_funcname):
+ if dbg:
+ print(f"//dbg: {'- '*len(chain)}{orig_funcname}\tskip missing")
+ return 0
+ if not missing_ok:
+ missing.add(orig_funcname)
+ if dbg:
+ print(f"//dbg: {'- '*len(chain)}{orig_funcname}\tmissing")
+ return 0
+ if chain and app.skip_call(chain, funcname):
+ if dbg:
+ print(f"//dbg: {'- '*len(chain)}{orig_funcname}\tskip")
+ return 0
+
+ if len(chain) == cfg_max_call_depth:
+ raise ValueError(f"max call depth exceeded: {[*chain, funcname]}")
+
+ node = graph[funcname]
+ if dbg:
+ print(f"//dbg: {'- '*len(chain)}{funcname}\t{node.nstatic}")
+ if node.usage_kind == "dynamic" or node.ndynamic > 0:
+ dynamic.add(funcname)
+ if track_inclusion:
+ included_funcs.add(funcname)
+ return node.nstatic + max(
+ [
+ 0,
+ *[
+ nstatic(call, [*chain, funcname], missing_ok)
+ for call, missing_ok in node.calls.items()
+ ],
+ ]
+ )
+
+ groups: dict[str, AnalyzeResultGroup] = {}
+ for grp_name, grp_filter in app_func_filters.items():
+ rows: dict[QName, AnalyzeResultVal] = {}
+ for funcname in graph:
+ cnt, track_inclusion = grp_filter(funcname)
+ if cnt:
+ rows[funcname] = AnalyzeResultVal(nstatic=nstatic(funcname), cnt=cnt)
+ groups[grp_name] = AnalyzeResultGroup(rows=rows)
+
+ return AnalyzeResult(
+ groups=groups, missing=missing, dynamic=dynamic, included_funcs=included_funcs
+ )