# build-aux/measurestack/analyze.py - Analyze stack sizes for compiled objects
#
# Copyright (C) 2024-2025  Luke T. Shumaker <lukeshu@lukeshu.com>
# SPDX-License-Identifier: AGPL-3.0-or-later

import re
import typing

from . import vcg

# pylint: disable=unused-variable
__all__ = [
    "BaseName",
    "QName",
    "UsageKind",
    "Node",
    "AnalyzeResultVal",
    "AnalyzeResultGroup",
    "AnalyzeResult",
    "analyze",
]


class BaseName:
    _content: str

    def __init__(self, content: str) -> None:
        if ":" in content:
            raise ValueError(f"invalid non-qualified name: {content!r}")
        self._content = content

    def __str__(self) -> str:
        return self._content

    def __repr__(self) -> str:
        return f"BaseName({self._content!r})"

    def __format__(self, fmt_spec: str, /) -> str:
        return repr(self)

    def __eq__(self, other: typing.Any) -> bool:
        assert isinstance(
            other, BaseName
        ), f"comparing BaseName with {other.__class__.__name__}"
        return self._content == other._content

    def __lt__(self, other: "BaseName") -> bool:
        return self._content < other._content

    def __hash__(self) -> int:
        return hash(self._content)

    def as_qname(self) -> "QName":
        return QName(self._content)


class QName:
    _content: str

    def __init__(self, content: str) -> None:
        self._content = content

    def __str__(self) -> str:
        return self._content

    def __repr__(self) -> str:
        return f"QName({self._content!r})"

    def __format__(self, fmt_spec: str, /) -> str:
        return repr(self)

    def __eq__(self, other: typing.Any) -> bool:
        assert isinstance(
            other, QName
        ), f"comparing QName with {other.__class__.__name__}"
        return self._content == other._content

    def __lt__(self, other: "QName") -> bool:
        return self._content < other._content

    def __hash__(self) -> int:
        return hash(self._content)

    def base(self) -> BaseName:
        return BaseName(self._content.rsplit(":", 1)[-1].split(".", 1)[0])


UsageKind: typing.TypeAlias = typing.Literal["static", "dynamic", "dynamic,bounded"]


class Node:
    # from .title (`static` and `__weak` functions are prefixed with
    # the compilation unit .c file.  For static functions that's fine,
    # but we'll have to handle it specially for __weak.).
    funcname: QName
    # .label is "{funcname}\n{location}\n{nstatic} bytes (static}\n{ndynamic} dynamic objects"
    location: str
    usage_kind: UsageKind
    nstatic: int
    ndynamic: int

    # edges with .sourcename set to this node, val is if it's
    # OK/expected that the function be missing.
    calls: dict[QName, bool]


class AnalyzeResultVal(typing.NamedTuple):
    nstatic: int
    cnt: int


class AnalyzeResultGroup(typing.NamedTuple):
    rows: dict[QName, AnalyzeResultVal]


class AnalyzeResult(typing.NamedTuple):
    groups: dict[str, AnalyzeResultGroup]
    missing: set[QName]
    dynamic: set[QName]

    included_funcs: set[QName]


class Application(typing.Protocol):
    def extra_nodes(self) -> typing.Collection[Node]: ...
    def indirect_callees(
        self, elem: vcg.VCGElem
    ) -> tuple[typing.Collection[QName], bool]: ...
    def skip_call(self, chain: typing.Sequence[QName], funcname: QName) -> bool: ...


def analyze(
    *,
    ci_fnames: typing.Collection[str],
    app_func_filters: dict[str, typing.Callable[[QName], tuple[int, bool]]],
    app: Application,
    cfg_max_call_depth: int,
) -> AnalyzeResult:
    re_node_label = re.compile(
        r"(?P<funcname>[^\n]+)\n"
        + r"(?P<location>[^\n]+:[0-9]+:[0-9]+)\n"
        + r"(?P<nstatic>[0-9]+) bytes \((?P<usage_kind>static|dynamic|dynamic,bounded)\)\n"
        + r"(?P<ndynamic>[0-9]+) dynamic objects"
        + r"(?:\n.*)*",
        flags=re.MULTILINE,
    )

    graph: dict[QName, Node] = {}
    qualified: dict[BaseName, set[QName]] = {}

    def handle_elem(elem: vcg.VCGElem) -> None:
        match elem.typ:
            case "node":
                node = Node()
                node.calls = {}
                skip = False
                for k, v in elem.attrs.items():
                    match k:
                        case "title":
                            node.funcname = QName(v)
                        case "label":
                            if elem.attrs.get("shape", "") != "ellipse":
                                m = re_node_label.fullmatch(v)
                                if not m:
                                    raise ValueError(f"unexpected label value {v!r}")
                                node.location = m.group("location")
                                node.usage_kind = typing.cast(
                                    UsageKind, m.group("usage_kind")
                                )
                                node.nstatic = int(m.group("nstatic"))
                                node.ndynamic = int(m.group("ndynamic"))
                        case "shape":
                            if v != "ellipse":
                                raise ValueError(f"unexpected shape value {v!r}")
                            skip = True
                        case _:
                            raise ValueError(f"unknown edge key {k!r}")
                if not skip:
                    if node.funcname in graph:
                        raise ValueError(f"duplicate node {node.funcname}")
                    graph[node.funcname] = node
                    if ":" in str(node.funcname):
                        basename = node.funcname.base()
                        if basename not in qualified:
                            qualified[basename] = set()
                        qualified[basename].add(node.funcname)
            case "edge":
                caller: QName | None = None
                callee: QName | None = None
                for k, v in elem.attrs.items():
                    match k:
                        case "sourcename":
                            caller = QName(v)
                        case "targetname":
                            callee = QName(v)
                        case "label":
                            pass
                        case _:
                            raise ValueError(f"unknown edge key {k!r}")
                if caller is None or callee is None:
                    raise ValueError(f"incomplete edge: {elem.attrs!r}")
                if caller not in graph:
                    raise ValueError(f"unknown caller: {caller}")
                if callee == QName("__indirect_call"):
                    callees, missing_ok = app.indirect_callees(elem)
                    for callee in callees:
                        if callee not in graph[caller].calls:
                            graph[caller].calls[callee] = missing_ok
                else:
                    graph[caller].calls[callee] = False
            case _:
                raise ValueError(f"unknown elem type {elem.typ!r}")

    for ci_fname in ci_fnames:
        with open(ci_fname, "r", encoding="utf-8") as fh:
            for elem in vcg.parse_vcg(fh):
                handle_elem(elem)

    for node in app.extra_nodes():
        if node.funcname in graph:
            raise ValueError(f"duplicate node {node.funcname}")
        graph[node.funcname] = node

    missing: set[QName] = set()
    dynamic: set[QName] = set()
    included_funcs: set[QName] = set()

    dbg = False

    def resolve_funcname(funcname: QName) -> QName | None:
        # Handle `ld --wrap` functions
        if QName(f"__wrap_{str(funcname)}") in graph:
            return QName(f"__wrap_{str(funcname)}")
        if (
            str(funcname).startswith("__real_")
            and QName(str(funcname)[len("__real_") :]) in graph
        ):
            funcname = QName(str(funcname)[len("__real_") :])

        # Usual case
        if funcname in graph:
            return funcname

        # Handle `__weak` functions
        if ":" not in str(funcname):
            qnames = qualified.get(BaseName(str(funcname)), set())
            if len(qnames) == 1:
                return sorted(qnames)[0]

        return None

    track_inclusion: bool = True

    def nstatic(
        orig_funcname: QName,
        chain: typing.Sequence[QName] = (),
        missing_ok: bool = False,
    ) -> int:
        nonlocal dbg
        nonlocal track_inclusion
        funcname = resolve_funcname(orig_funcname)
        if not funcname:
            if chain and app.skip_call(chain, orig_funcname):
                if dbg:
                    print(f"//dbg: {'- '*len(chain)}{orig_funcname}\tskip missing")
                return 0
            if not missing_ok:
                missing.add(orig_funcname)
            if dbg:
                print(f"//dbg: {'- '*len(chain)}{orig_funcname}\tmissing")
            return 0
        if chain and app.skip_call(chain, funcname):
            if dbg:
                print(f"//dbg: {'- '*len(chain)}{orig_funcname}\tskip")
            return 0

        if len(chain) == cfg_max_call_depth:
            raise ValueError(f"max call depth exceeded: {[*chain, funcname]}")

        node = graph[funcname]
        if dbg:
            print(f"//dbg: {'- '*len(chain)}{funcname}\t{node.nstatic}")
        if node.usage_kind == "dynamic" or node.ndynamic > 0:
            dynamic.add(funcname)
        if track_inclusion:
            included_funcs.add(funcname)
        return node.nstatic + max(
            [
                0,
                *[
                    nstatic(call, [*chain, funcname], missing_ok)
                    for call, missing_ok in node.calls.items()
                ],
            ]
        )

    groups: dict[str, AnalyzeResultGroup] = {}
    for grp_name, grp_filter in app_func_filters.items():
        rows: dict[QName, AnalyzeResultVal] = {}
        for funcname in graph:
            cnt, track_inclusion = grp_filter(funcname)
            if cnt:
                rows[funcname] = AnalyzeResultVal(nstatic=nstatic(funcname), cnt=cnt)
        groups[grp_name] = AnalyzeResultGroup(rows=rows)

    return AnalyzeResult(
        groups=groups, missing=missing, dynamic=dynamic, included_funcs=included_funcs
    )