gabriel / muse public
symbols.py python
265 lines 8.6 KB
bda49bdb feat: redesign .museignore as TOML with domain-scoped sections (#100) Gabriel Cardona <cgcardona@gmail.com> 5d ago
1 """muse symbols — list every semantic symbol in a snapshot.
2
3 This command is unique to Muse: Git stores files as blobs of text and has no
4 concept of the functions, classes, or methods inside them. ``muse symbols``
5 exposes the *semantic interior* of every source file in a commit — the full
6 symbol graph that the code plugin builds at commit time.
7
8 Output (default — human-readable table)::
9
10 src/utils.py
11 function calculate_total line 12 a3f2c9..
12 function _validate_amount line 28 cb4afa..
13 class Invoice line 45 1d2e3f..
14 method Invoice.to_dict line 52 4a5b6c..
15 method Invoice.from_dict line 61 7d8e9f..
16
17 src/models.py
18 class User line 8 b1c2d3..
19 method User.__init__ line 10 e4f5a6..
20 method User.save line 19 b7c8d9..
21
22 12 symbols across 2 files (Python: 12)
23
24 Flags:
25
26 ``--commit <ref>``
27 Inspect a specific commit instead of HEAD.
28
29 ``--kind <kind>``
30 Filter to symbols of a specific kind (``function``, ``class``,
31 ``method``, ``async_method``, ``variable``, ``import``).
32
33 ``--file <path>``
34 Show symbols from a single file only.
35
36 ``--count``
37 Print only the total symbol count and per-language breakdown.
38
39 ``--json``
40 Emit the full symbol table as JSON for tooling integration.
41 """
42
43 from __future__ import annotations
44
45 import json
46 import logging
47 import pathlib
48 from typing import Literal
49
50 import typer
51
52 from muse.core.errors import ExitCode
53 from muse.core.object_store import read_object
54 from muse.core.repo import require_repo
55 from muse.core.store import (
56 get_commit_snapshot_manifest,
57 read_commit,
58 resolve_commit_ref,
59 )
60 from muse.plugins.code.ast_parser import (
61 SEMANTIC_EXTENSIONS,
62 SymbolRecord,
63 SymbolTree,
64 parse_symbols,
65 )
66
67 logger = logging.getLogger(__name__)
68
69 app = typer.Typer()
70
71 _KindFilter = Literal[
72 "function", "async_function", "class", "method", "async_method",
73 "variable", "import",
74 ]
75
76 _KIND_ICON: dict[str, str] = {
77 "function": "fn",
78 "async_function": "fn~",
79 "class": "class",
80 "method": "method",
81 "async_method": "method~",
82 "variable": "var",
83 "import": "import",
84 }
85
86
87 def _read_repo_id(root: pathlib.Path) -> str:
88 return str(json.loads((root / ".muse" / "repo.json").read_text())["repo_id"])
89
90
91 def _read_branch(root: pathlib.Path) -> str:
92 head_ref = (root / ".muse" / "HEAD").read_text().strip()
93 return head_ref.removeprefix("refs/heads/").strip()
94
95
96 def _is_semantic(file_path: str) -> bool:
97 suffix = pathlib.PurePosixPath(file_path).suffix.lower()
98 return suffix in SEMANTIC_EXTENSIONS
99
100
101 def _symbols_for_snapshot(
102 root: pathlib.Path,
103 manifest: dict[str, str],
104 kind_filter: str | None,
105 file_filter: str | None,
106 ) -> dict[str, SymbolTree]:
107 """Extract symbol trees for all semantic files in *manifest*.
108
109 Returns a dict mapping file_path → SymbolTree, with empty trees omitted.
110 """
111 result: dict[str, SymbolTree] = {}
112 for file_path, object_id in sorted(manifest.items()):
113 if not _is_semantic(file_path):
114 continue
115 if file_filter and file_path != file_filter:
116 continue
117 raw = read_object(root, object_id)
118 if raw is None:
119 logger.debug("Object %s missing from store — skipping %s", object_id[:8], file_path)
120 continue
121 tree = parse_symbols(raw, file_path)
122 if kind_filter:
123 tree = {addr: rec for addr, rec in tree.items() if rec["kind"] == kind_filter}
124 if tree:
125 result[file_path] = tree
126 return result
127
128
129 def _language_of(file_path: str) -> str:
130 suffix = pathlib.PurePosixPath(file_path).suffix.lower()
131 _SUFFIX_LANG: dict[str, str] = {
132 ".py": "Python", ".pyi": "Python",
133 ".ts": "TypeScript", ".tsx": "TypeScript",
134 ".js": "JavaScript", ".jsx": "JavaScript",
135 ".mjs": "JavaScript", ".cjs": "JavaScript",
136 ".go": "Go",
137 ".rs": "Rust",
138 ".java": "Java",
139 ".cs": "C#",
140 ".c": "C", ".h": "C",
141 ".cpp": "C++", ".cc": "C++", ".cxx": "C++", ".hpp": "C++", ".hxx": "C++",
142 ".rb": "Ruby",
143 ".kt": "Kotlin", ".kts": "Kotlin",
144 }
145 return _SUFFIX_LANG.get(suffix, suffix)
146
147
148 def _print_human(
149 symbol_map: dict[str, SymbolTree],
150 show_hashes: bool,
151 ) -> None:
152 total = 0
153 lang_counts: dict[str, int] = {}
154
155 for file_path, tree in symbol_map.items():
156 lang = _language_of(file_path)
157 lang_counts[lang] = lang_counts.get(lang, 0) + len(tree)
158 total += len(tree)
159
160 typer.echo(f"\n{file_path}")
161 for addr, rec in sorted(tree.items(), key=lambda kv: kv[1]["lineno"]):
162 icon = _KIND_ICON.get(rec["kind"], rec["kind"])
163 name = rec["qualified_name"]
164 line = rec["lineno"]
165 hash_suffix = f" {rec['content_id'][:8]}.." if show_hashes else ""
166 typer.echo(f" {icon:<10} {name:<40} line {line:>4}{hash_suffix}")
167
168 if not symbol_map:
169 typer.echo(" (no semantic symbols found)")
170 return
171
172 lang_str = " ".join(f"{lang}: {count}" for lang, count in sorted(lang_counts.items()))
173 typer.echo(f"\n{total} symbol(s) across {len(symbol_map)} file(s) ({lang_str})")
174
175
176 def _emit_json(symbol_map: dict[str, SymbolTree]) -> None:
177 out: dict[str, list[dict[str, str | int]]] = {}
178 for file_path, tree in symbol_map.items():
179 entries: list[dict[str, str | int]] = []
180 for addr, rec in sorted(tree.items(), key=lambda kv: kv[1]["lineno"]):
181 entries.append({
182 "address": addr,
183 "kind": rec["kind"],
184 "name": rec["name"],
185 "qualified_name": rec["qualified_name"],
186 "lineno": rec["lineno"],
187 "end_lineno": rec["end_lineno"],
188 "content_id": rec["content_id"],
189 "body_hash": rec["body_hash"],
190 "signature_id": rec["signature_id"],
191 })
192 out[file_path] = entries
193 typer.echo(json.dumps(out, indent=2))
194
195
196 @app.callback(invoke_without_command=True)
197 def symbols(
198 ctx: typer.Context,
199 ref: str | None = typer.Option(
200 None, "--commit", "-c", metavar="REF",
201 help="Commit ID or branch to inspect (default: HEAD).",
202 ),
203 kind_filter: str | None = typer.Option(
204 None, "--kind", "-k", metavar="KIND",
205 help="Filter to symbols of a specific kind "
206 "(function, class, method, async_method, variable, import).",
207 ),
208 file_filter: str | None = typer.Option(
209 None, "--file", "-f", metavar="PATH",
210 help="Show symbols from a single file only.",
211 ),
212 count_only: bool = typer.Option(
213 False, "--count", help="Print only the total count and language breakdown.",
214 ),
215 show_hashes: bool = typer.Option(
216 False, "--hashes", help="Include content hashes in the output.",
217 ),
218 as_json: bool = typer.Option(
219 False, "--json", help="Emit the full symbol table as JSON.",
220 ),
221 ) -> None:
222 """List every semantic symbol (function, class, method…) in a snapshot.
223
224 Unlike ``git grep`` or ``ctags``, ``muse symbols`` reads the semantic
225 symbol graph produced by the domain plugin's AST analysis — stable,
226 content-addressed identities for every symbol, independent of line numbers
227 or formatting.
228
229 Use ``--commit <ref>`` to inspect a historical snapshot. Use ``--kind``
230 and ``--file`` to narrow the output. Use ``--json`` for tooling
231 integration.
232 """
233 root = require_repo()
234 repo_id = _read_repo_id(root)
235 branch = _read_branch(root)
236
237 commit = resolve_commit_ref(root, repo_id, branch, ref)
238 if commit is None:
239 label = ref or "HEAD"
240 typer.echo(f"❌ Commit '{label}' not found.", err=True)
241 raise typer.Exit(code=ExitCode.USER_ERROR)
242
243 manifest = get_commit_snapshot_manifest(root, commit.commit_id) or {}
244 if not manifest:
245 typer.echo(f"❌ Snapshot for commit {commit.commit_id[:8]} has no files.", err=True)
246 raise typer.Exit(code=ExitCode.USER_ERROR)
247
248 symbol_map = _symbols_for_snapshot(root, manifest, kind_filter, file_filter)
249
250 if count_only:
251 total = sum(len(t) for t in symbol_map.values())
252 lang_counts: dict[str, int] = {}
253 for file_path, tree in symbol_map.items():
254 lang = _language_of(file_path)
255 lang_counts[lang] = lang_counts.get(lang, 0) + len(tree)
256 lang_str = " ".join(f"{lang}: {count}" for lang, count in sorted(lang_counts.items()))
257 typer.echo(f"{total} symbol(s) ({lang_str})")
258 return
259
260 if as_json:
261 _emit_json(symbol_map)
262 return
263
264 typer.echo(f'commit {commit.commit_id[:8]} "{commit.message}"')
265 _print_human(symbol_map, show_hashes)