symbols.py
python
| 1 | """muse symbols — list every semantic symbol in a snapshot. |
| 2 | |
| 3 | This command is unique to Muse: Git stores files as blobs of text and has no |
| 4 | concept of the functions, classes, or methods inside them. ``muse symbols`` |
| 5 | exposes the *semantic interior* of every source file in a commit — the full |
| 6 | symbol graph that the code plugin builds at commit time. |
| 7 | |
| 8 | Output (default — human-readable table):: |
| 9 | |
| 10 | src/utils.py |
| 11 | function calculate_total line 12 a3f2c9.. |
| 12 | function _validate_amount line 28 cb4afa.. |
| 13 | class Invoice line 45 1d2e3f.. |
| 14 | method Invoice.to_dict line 52 4a5b6c.. |
| 15 | method Invoice.from_dict line 61 7d8e9f.. |
| 16 | |
| 17 | src/models.py |
| 18 | class User line 8 b1c2d3.. |
| 19 | method User.__init__ line 10 e4f5a6.. |
| 20 | method User.save line 19 b7c8d9.. |
| 21 | |
| 22 | 12 symbols across 2 files (Python: 12) |
| 23 | |
| 24 | Flags: |
| 25 | |
| 26 | ``--commit <ref>`` |
| 27 | Inspect a specific commit instead of HEAD. |
| 28 | |
| 29 | ``--kind <kind>`` |
| 30 | Filter to symbols of a specific kind (``function``, ``class``, |
| 31 | ``method``, ``async_method``, ``variable``, ``import``). |
| 32 | |
| 33 | ``--file <path>`` |
| 34 | Show symbols from a single file only. |
| 35 | |
| 36 | ``--count`` |
| 37 | Print only the total symbol count and per-language breakdown. |
| 38 | |
| 39 | ``--json`` |
| 40 | Emit the full symbol table as JSON for tooling integration. |
| 41 | """ |
| 42 | |
| 43 | from __future__ import annotations |
| 44 | |
| 45 | import json |
| 46 | import logging |
| 47 | import pathlib |
| 48 | from typing import Literal |
| 49 | |
| 50 | import typer |
| 51 | |
| 52 | from muse.core.errors import ExitCode |
| 53 | from muse.core.object_store import read_object |
| 54 | from muse.core.repo import require_repo |
| 55 | from muse.core.store import ( |
| 56 | get_commit_snapshot_manifest, |
| 57 | read_commit, |
| 58 | read_current_branch, |
| 59 | resolve_commit_ref, |
| 60 | ) |
| 61 | from muse.plugins.code.ast_parser import ( |
| 62 | SEMANTIC_EXTENSIONS, |
| 63 | SymbolRecord, |
| 64 | SymbolTree, |
| 65 | parse_symbols, |
| 66 | ) |
| 67 | |
| 68 | logger = logging.getLogger(__name__) |
| 69 | |
| 70 | app = typer.Typer() |
| 71 | |
| 72 | _KindFilter = Literal[ |
| 73 | "function", "async_function", "class", "method", "async_method", |
| 74 | "variable", "import", |
| 75 | ] |
| 76 | |
| 77 | _KIND_ICON: dict[str, str] = { |
| 78 | "function": "fn", |
| 79 | "async_function": "fn~", |
| 80 | "class": "class", |
| 81 | "method": "method", |
| 82 | "async_method": "method~", |
| 83 | "variable": "var", |
| 84 | "import": "import", |
| 85 | } |
| 86 | |
| 87 | |
| 88 | def _read_repo_id(root: pathlib.Path) -> str: |
| 89 | return str(json.loads((root / ".muse" / "repo.json").read_text())["repo_id"]) |
| 90 | |
| 91 | |
| 92 | def _read_branch(root: pathlib.Path) -> str: |
| 93 | return read_current_branch(root) |
| 94 | |
| 95 | |
| 96 | def _is_semantic(file_path: str) -> bool: |
| 97 | suffix = pathlib.PurePosixPath(file_path).suffix.lower() |
| 98 | return suffix in SEMANTIC_EXTENSIONS |
| 99 | |
| 100 | |
| 101 | def _symbols_for_snapshot( |
| 102 | root: pathlib.Path, |
| 103 | manifest: dict[str, str], |
| 104 | kind_filter: str | None, |
| 105 | file_filter: str | None, |
| 106 | ) -> dict[str, SymbolTree]: |
| 107 | """Extract symbol trees for all semantic files in *manifest*. |
| 108 | |
| 109 | Returns a dict mapping file_path → SymbolTree, with empty trees omitted. |
| 110 | """ |
| 111 | result: dict[str, SymbolTree] = {} |
| 112 | for file_path, object_id in sorted(manifest.items()): |
| 113 | if not _is_semantic(file_path): |
| 114 | continue |
| 115 | if file_filter and file_path != file_filter: |
| 116 | continue |
| 117 | raw = read_object(root, object_id) |
| 118 | if raw is None: |
| 119 | logger.debug("Object %s missing from store — skipping %s", object_id[:8], file_path) |
| 120 | continue |
| 121 | tree = parse_symbols(raw, file_path) |
| 122 | if kind_filter: |
| 123 | tree = {addr: rec for addr, rec in tree.items() if rec["kind"] == kind_filter} |
| 124 | if tree: |
| 125 | result[file_path] = tree |
| 126 | return result |
| 127 | |
| 128 | |
| 129 | def _language_of(file_path: str) -> str: |
| 130 | suffix = pathlib.PurePosixPath(file_path).suffix.lower() |
| 131 | _SUFFIX_LANG: dict[str, str] = { |
| 132 | ".py": "Python", ".pyi": "Python", |
| 133 | ".ts": "TypeScript", ".tsx": "TypeScript", |
| 134 | ".js": "JavaScript", ".jsx": "JavaScript", |
| 135 | ".mjs": "JavaScript", ".cjs": "JavaScript", |
| 136 | ".go": "Go", |
| 137 | ".rs": "Rust", |
| 138 | ".java": "Java", |
| 139 | ".cs": "C#", |
| 140 | ".c": "C", ".h": "C", |
| 141 | ".cpp": "C++", ".cc": "C++", ".cxx": "C++", ".hpp": "C++", ".hxx": "C++", |
| 142 | ".rb": "Ruby", |
| 143 | ".kt": "Kotlin", ".kts": "Kotlin", |
| 144 | } |
| 145 | return _SUFFIX_LANG.get(suffix, suffix) |
| 146 | |
| 147 | |
| 148 | def _print_human( |
| 149 | symbol_map: dict[str, SymbolTree], |
| 150 | show_hashes: bool, |
| 151 | ) -> None: |
| 152 | total = 0 |
| 153 | lang_counts: dict[str, int] = {} |
| 154 | |
| 155 | for file_path, tree in symbol_map.items(): |
| 156 | lang = _language_of(file_path) |
| 157 | lang_counts[lang] = lang_counts.get(lang, 0) + len(tree) |
| 158 | total += len(tree) |
| 159 | |
| 160 | typer.echo(f"\n{file_path}") |
| 161 | for addr, rec in sorted(tree.items(), key=lambda kv: kv[1]["lineno"]): |
| 162 | icon = _KIND_ICON.get(rec["kind"], rec["kind"]) |
| 163 | name = rec["qualified_name"] |
| 164 | line = rec["lineno"] |
| 165 | hash_suffix = f" {rec['content_id'][:8]}.." if show_hashes else "" |
| 166 | typer.echo(f" {icon:<10} {name:<40} line {line:>4}{hash_suffix}") |
| 167 | |
| 168 | if not symbol_map: |
| 169 | typer.echo(" (no semantic symbols found)") |
| 170 | return |
| 171 | |
| 172 | lang_str = " ".join(f"{lang}: {count}" for lang, count in sorted(lang_counts.items())) |
| 173 | typer.echo(f"\n{total} symbol(s) across {len(symbol_map)} file(s) ({lang_str})") |
| 174 | |
| 175 | |
| 176 | def _emit_json(symbol_map: dict[str, SymbolTree]) -> None: |
| 177 | out: dict[str, list[dict[str, str | int]]] = {} |
| 178 | for file_path, tree in symbol_map.items(): |
| 179 | entries: list[dict[str, str | int]] = [] |
| 180 | for addr, rec in sorted(tree.items(), key=lambda kv: kv[1]["lineno"]): |
| 181 | entries.append({ |
| 182 | "address": addr, |
| 183 | "kind": rec["kind"], |
| 184 | "name": rec["name"], |
| 185 | "qualified_name": rec["qualified_name"], |
| 186 | "lineno": rec["lineno"], |
| 187 | "end_lineno": rec["end_lineno"], |
| 188 | "content_id": rec["content_id"], |
| 189 | "body_hash": rec["body_hash"], |
| 190 | "signature_id": rec["signature_id"], |
| 191 | }) |
| 192 | out[file_path] = entries |
| 193 | typer.echo(json.dumps(out, indent=2)) |
| 194 | |
| 195 | |
| 196 | @app.callback(invoke_without_command=True) |
| 197 | def symbols( |
| 198 | ctx: typer.Context, |
| 199 | ref: str | None = typer.Option( |
| 200 | None, "--commit", "-c", metavar="REF", |
| 201 | help="Commit ID or branch to inspect (default: HEAD).", |
| 202 | ), |
| 203 | kind_filter: str | None = typer.Option( |
| 204 | None, "--kind", "-k", metavar="KIND", |
| 205 | help="Filter to symbols of a specific kind " |
| 206 | "(function, class, method, async_method, variable, import).", |
| 207 | ), |
| 208 | file_filter: str | None = typer.Option( |
| 209 | None, "--file", "-f", metavar="PATH", |
| 210 | help="Show symbols from a single file only.", |
| 211 | ), |
| 212 | count_only: bool = typer.Option( |
| 213 | False, "--count", help="Print only the total count and language breakdown.", |
| 214 | ), |
| 215 | show_hashes: bool = typer.Option( |
| 216 | False, "--hashes", help="Include content hashes in the output.", |
| 217 | ), |
| 218 | as_json: bool = typer.Option( |
| 219 | False, "--json", help="Emit the full symbol table as JSON.", |
| 220 | ), |
| 221 | ) -> None: |
| 222 | """List every semantic symbol (function, class, method…) in a snapshot. |
| 223 | |
| 224 | Unlike ``git grep`` or ``ctags``, ``muse symbols`` reads the semantic |
| 225 | symbol graph produced by the domain plugin's AST analysis — stable, |
| 226 | content-addressed identities for every symbol, independent of line numbers |
| 227 | or formatting. |
| 228 | |
| 229 | Use ``--commit <ref>`` to inspect a historical snapshot. Use ``--kind`` |
| 230 | and ``--file`` to narrow the output. Use ``--json`` for tooling |
| 231 | integration. |
| 232 | """ |
| 233 | root = require_repo() |
| 234 | repo_id = _read_repo_id(root) |
| 235 | branch = _read_branch(root) |
| 236 | |
| 237 | commit = resolve_commit_ref(root, repo_id, branch, ref) |
| 238 | if commit is None: |
| 239 | label = ref or "HEAD" |
| 240 | typer.echo(f"❌ Commit '{label}' not found.", err=True) |
| 241 | raise typer.Exit(code=ExitCode.USER_ERROR) |
| 242 | |
| 243 | manifest = get_commit_snapshot_manifest(root, commit.commit_id) or {} |
| 244 | if not manifest: |
| 245 | typer.echo(f"❌ Snapshot for commit {commit.commit_id[:8]} has no files.", err=True) |
| 246 | raise typer.Exit(code=ExitCode.USER_ERROR) |
| 247 | |
| 248 | symbol_map = _symbols_for_snapshot(root, manifest, kind_filter, file_filter) |
| 249 | |
| 250 | if count_only: |
| 251 | total = sum(len(t) for t in symbol_map.values()) |
| 252 | lang_counts: dict[str, int] = {} |
| 253 | for file_path, tree in symbol_map.items(): |
| 254 | lang = _language_of(file_path) |
| 255 | lang_counts[lang] = lang_counts.get(lang, 0) + len(tree) |
| 256 | lang_str = " ".join(f"{lang}: {count}" for lang, count in sorted(lang_counts.items())) |
| 257 | typer.echo(f"{total} symbol(s) ({lang_str})") |
| 258 | return |
| 259 | |
| 260 | if as_json: |
| 261 | _emit_json(symbol_map) |
| 262 | return |
| 263 | |
| 264 | typer.echo(f'commit {commit.commit_id[:8]} "{commit.message}"') |
| 265 | _print_human(symbol_map, show_hashes) |