gabriel / muse public
api_surface.py python
281 lines 9.7 KB
b4e8aaf2 feat(code): Phase 1 — lineage, api-surface, codemap, clones, checkout-s… Gabriel Cardona <cgcardona@gmail.com> 5d ago
1 """muse api-surface — public API surface tracking.
2
3 Shows which symbols in a snapshot are part of the public API, and how the
4 public API changed between two commits.
5
6 A symbol is **public** when all of the following hold:
7
8 * ``kind`` is one of: ``function``, ``async_function``, ``class``,
9 ``method``, ``async_method``
10 * ``name`` does not start with ``_`` (Python convention for private/internal)
11 * ``kind`` is not ``import``
12
13 Git cannot answer "what changed in the public API between v1.0 and v1.1?"
14 without an external diffing tool. Muse answers this in O(1) against committed
15 snapshots — no checkout required, no working-tree needed.
16
17 Usage::
18
19 muse api-surface
20 muse api-surface --commit HEAD~5
21 muse api-surface --diff main
22 muse api-surface --language Python
23 muse api-surface --json
24
25 With ``--diff REF``, shows a three-section report::
26
27 Public API surface — commit a1b2c3d4 vs commit e5f6a7b8
28 ──────────────────────────────────────────────────────────────
29
30 Added (3):
31 + src/billing.py::compute_tax function
32 + src/auth.py::refresh_token function
33 + src/models.py::User.to_json method
34
35 Removed (1):
36 - src/billing.py::compute_total function
37
38 Changed (2):
39 ~ src/billing.py::Invoice.pay method (signature_change)
40 ~ src/auth.py::validate_token function (impl_only)
41
42 Flags:
43
44 ``--commit, -c REF``
45 Show or compare from this commit (default: HEAD).
46
47 ``--diff REF``
48 Compare the commit from ``--commit`` against this ref.
49
50 ``--language LANG``
51 Filter to symbols in files of this language.
52
53 ``--json``
54 Emit results as JSON with a ``schema_version`` wrapper.
55 """
56 from __future__ import annotations
57
58 import json
59 import logging
60 import pathlib
61
62 import typer
63
64 from muse.core.errors import ExitCode
65 from muse.core.repo import require_repo
66 from muse.core.store import get_commit_snapshot_manifest, resolve_commit_ref
67 from muse.plugins.code._query import language_of, symbols_for_snapshot
68 from muse.plugins.code.ast_parser import SymbolRecord
69
70 logger = logging.getLogger(__name__)
71
72 app = typer.Typer()
73
74 _PUBLIC_KINDS: frozenset[str] = frozenset({
75 "function", "async_function", "class", "method", "async_method",
76 })
77
78
79 def _read_repo_id(root: pathlib.Path) -> str:
80 return str(json.loads((root / ".muse" / "repo.json").read_text())["repo_id"])
81
82
83 def _read_branch(root: pathlib.Path) -> str:
84 head_ref = (root / ".muse" / "HEAD").read_text().strip()
85 return head_ref.removeprefix("refs/heads/").strip()
86
87
88 def _is_public(name: str, kind: str) -> bool:
89 return kind in _PUBLIC_KINDS and not name.split(".")[-1].startswith("_")
90
91
92 def _public_symbols(
93 root: pathlib.Path,
94 manifest: dict[str, str],
95 language_filter: str | None,
96 ) -> dict[str, SymbolRecord]:
97 """Return all public symbols from *manifest* as a flat address → SymbolRecord dict."""
98 result: dict[str, SymbolRecord] = {}
99 sym_map = symbols_for_snapshot(root, manifest, language_filter=language_filter)
100 for _file, tree in sym_map.items():
101 for address, rec in tree.items():
102 if _is_public(rec["name"], rec["kind"]):
103 result[address] = rec
104 return result
105
106
107 def _classify_change(old: SymbolRecord, new: SymbolRecord) -> str:
108 """Return a human-readable classification of what changed."""
109 if old["content_id"] == new["content_id"]:
110 return "unchanged"
111 if old["signature_id"] != new["signature_id"]:
112 if old["body_hash"] != new["body_hash"]:
113 return "signature+impl"
114 return "signature_change"
115 return "impl_only"
116
117
118 class _ApiEntry:
119 def __init__(self, address: str, rec: SymbolRecord, language: str) -> None:
120 self.address = address
121 self.rec = rec
122 self.language = language
123
124 def to_dict(self) -> dict[str, str]:
125 return {
126 "address": self.address,
127 "kind": self.rec["kind"],
128 "name": self.rec["name"],
129 "qualified_name": self.rec["qualified_name"],
130 "language": self.language,
131 "content_id": self.rec["content_id"][:8],
132 "signature_id": self.rec["signature_id"][:8],
133 "body_hash": self.rec["body_hash"][:8],
134 }
135
136
137 @app.callback(invoke_without_command=True)
138 def api_surface(
139 ctx: typer.Context,
140 ref: str | None = typer.Option(
141 None, "--commit", "-c", metavar="REF",
142 help="Show surface at this commit (default: HEAD).",
143 ),
144 diff_ref: str | None = typer.Option(
145 None, "--diff", metavar="REF",
146 help="Compare HEAD (or --commit) against this ref.",
147 ),
148 language: str | None = typer.Option(
149 None, "--language", "-l", metavar="LANG",
150 help="Filter to this language (Python, Go, Rust, …).",
151 ),
152 as_json: bool = typer.Option(False, "--json", help="Emit results as JSON."),
153 ) -> None:
154 """Show the public API surface and how it changed between two commits.
155
156 A symbol is public when its kind is function/class/method (not import) and
157 its bare name does not start with ``_``.
158
159 With ``--diff REF``, shows three sections: Added, Removed, Changed.
160 Without ``--diff``, lists all public symbols at the given commit.
161
162 This command runs against committed snapshots only — no working-tree
163 parsing, no test execution.
164 """
165 root = require_repo()
166 repo_id = _read_repo_id(root)
167 branch = _read_branch(root)
168
169 commit = resolve_commit_ref(root, repo_id, branch, ref)
170 if commit is None:
171 typer.echo(f"❌ Commit '{ref or 'HEAD'}' not found.", err=True)
172 raise typer.Exit(code=ExitCode.USER_ERROR)
173
174 manifest = get_commit_snapshot_manifest(root, commit.commit_id) or {}
175 current_surface = _public_symbols(root, manifest, language)
176
177 if diff_ref is None:
178 # Just list the current surface.
179 entries = [
180 _ApiEntry(addr, rec, language_of(addr.split("::")[0]))
181 for addr, rec in sorted(current_surface.items())
182 ]
183 if as_json:
184 typer.echo(json.dumps(
185 {
186 "schema_version": 1,
187 "commit": commit.commit_id[:8],
188 "language_filter": language,
189 "total": len(entries),
190 "symbols": [e.to_dict() for e in entries],
191 },
192 indent=2,
193 ))
194 return
195
196 typer.echo(f"\nPublic API surface — commit {commit.commit_id[:8]}")
197 if language:
198 typer.echo(f" (language: {language})")
199 typer.echo("─" * 62)
200 if not entries:
201 typer.echo(" (no public symbols found)")
202 return
203 max_addr = max(len(e.address) for e in entries)
204 for e in entries:
205 typer.echo(f" {e.address:<{max_addr}} {e.rec['kind']}")
206 typer.echo(f"\n {len(entries)} public symbol(s)")
207 return
208
209 # Diff mode.
210 base_commit = resolve_commit_ref(root, repo_id, branch, diff_ref)
211 if base_commit is None:
212 typer.echo(f"❌ Diff ref '{diff_ref}' not found.", err=True)
213 raise typer.Exit(code=ExitCode.USER_ERROR)
214
215 base_manifest = get_commit_snapshot_manifest(root, base_commit.commit_id) or {}
216 base_surface = _public_symbols(root, base_manifest, language)
217
218 added = {a: r for a, r in current_surface.items() if a not in base_surface}
219 removed = {a: r for a, r in base_surface.items() if a not in current_surface}
220 changed: dict[str, tuple[SymbolRecord, SymbolRecord, str]] = {}
221 for addr in current_surface:
222 if addr in base_surface:
223 cls = _classify_change(base_surface[addr], current_surface[addr])
224 if cls != "unchanged":
225 changed[addr] = (base_surface[addr], current_surface[addr], cls)
226
227 if as_json:
228 typer.echo(json.dumps(
229 {
230 "schema_version": 1,
231 "commit": commit.commit_id[:8],
232 "base_commit": base_commit.commit_id[:8],
233 "language_filter": language,
234 "added": [
235 _ApiEntry(a, r, language_of(a.split("::")[0])).to_dict()
236 for a, r in sorted(added.items())
237 ],
238 "removed": [
239 _ApiEntry(a, r, language_of(a.split("::")[0])).to_dict()
240 for a, r in sorted(removed.items())
241 ],
242 "changed": [
243 {**_ApiEntry(a, new, language_of(a.split("::")[0])).to_dict(),
244 "change": cls}
245 for a, (_, new, cls) in sorted(changed.items())
246 ],
247 },
248 indent=2,
249 ))
250 return
251
252 typer.echo(
253 f"\nPublic API surface — commit {commit.commit_id[:8]} vs {base_commit.commit_id[:8]}"
254 )
255 if language:
256 typer.echo(f" (language: {language})")
257 typer.echo("─" * 62)
258
259 all_addrs = sorted(set(list(added) + list(removed) + list(changed)))
260 max_addr = max((len(a) for a in all_addrs), default=40)
261
262 if added:
263 typer.echo(f"\nAdded ({len(added)}):")
264 for addr, rec in sorted(added.items()):
265 typer.echo(f" + {addr:<{max_addr}} {rec['kind']}")
266
267 if removed:
268 typer.echo(f"\nRemoved ({len(removed)}):")
269 for addr, rec in sorted(removed.items()):
270 typer.echo(f" - {addr:<{max_addr}} {rec['kind']}")
271
272 if changed:
273 typer.echo(f"\nChanged ({len(changed)}):")
274 for addr, (_, new, cls) in sorted(changed.items()):
275 typer.echo(f" ~ {addr:<{max_addr}} {new['kind']} ({cls})")
276
277 if not added and not removed and not changed:
278 typer.echo("\n ✅ No public API changes detected.")
279 else:
280 n = len(added) + len(removed) + len(changed)
281 typer.echo(f"\n {n} public API change(s)")