cgcardona / muse public
grep.py python
168 lines 5.8 KB
bda49bdb feat: redesign .museignore as TOML with domain-scoped sections (#100) Gabriel Cardona <cgcardona@gmail.com> 1d ago
1 """muse grep — semantic symbol search across the symbol graph.
2
3 Unlike ``git grep`` which searches raw text lines, ``muse grep`` searches
4 the *typed symbol graph* — only returning actual symbol declarations with
5 their kind, file, line number, and stable content hash.
6
7 No false positives from comments, string literals, or call sites. Every
8 result is a real symbol that exists in the repository.
9
10 Usage::
11
12 muse grep "validate" # all symbols whose name contains "validate"
13 muse grep "^handle" --regex # names matching regex "^handle"
14 muse grep "Invoice" --kind class # only class symbols
15 muse grep "compute" --language Go # only Go symbols
16 muse grep "total" --commit HEAD~5 # search a historical snapshot
17
18 Output::
19
20 src/billing.py::validate_amount function line 8 a3f2c9..
21 src/auth.py::validate_token function line 14 cb4afa..
22 src/auth.py::Validator class line 22 1d2e3f..
23 src/auth.py::Validator.validate method line 28 4a5b6c..
24
25 4 match(es) across 2 files
26 """
27
28 from __future__ import annotations
29
30 import json
31 import logging
32 import pathlib
33 import re
34
35 import typer
36
37 from muse.core.errors import ExitCode
38 from muse.core.repo import require_repo
39 from muse.core.store import get_commit_snapshot_manifest, resolve_commit_ref
40 from muse.plugins.code._query import language_of, symbols_for_snapshot
41 from muse.plugins.code.ast_parser import SymbolRecord
42
43 logger = logging.getLogger(__name__)
44
45 app = typer.Typer()
46
47 _KIND_ICON: dict[str, str] = {
48 "function": "fn",
49 "async_function": "fn~",
50 "class": "class",
51 "method": "method",
52 "async_method": "method~",
53 "variable": "var",
54 "import": "import",
55 }
56
57
58 def _read_repo_id(root: pathlib.Path) -> str:
59 return str(json.loads((root / ".muse" / "repo.json").read_text())["repo_id"])
60
61
62 def _read_branch(root: pathlib.Path) -> str:
63 head_ref = (root / ".muse" / "HEAD").read_text().strip()
64 return head_ref.removeprefix("refs/heads/").strip()
65
66
67 @app.callback(invoke_without_command=True)
68 def grep(
69 ctx: typer.Context,
70 pattern: str = typer.Argument(..., metavar="PATTERN", help="Name pattern to search for."),
71 use_regex: bool = typer.Option(
72 False, "--regex", "-e",
73 help="Treat PATTERN as a regular expression (default: substring match).",
74 ),
75 kind_filter: str | None = typer.Option(
76 None, "--kind", "-k", metavar="KIND",
77 help="Restrict to symbols of this kind (function, class, method, …).",
78 ),
79 language_filter: str | None = typer.Option(
80 None, "--language", "-l", metavar="LANG",
81 help="Restrict to symbols from files of this language (Python, Go, …).",
82 ),
83 ref: str | None = typer.Option(
84 None, "--commit", "-c", metavar="REF",
85 help="Search a historical commit instead of HEAD.",
86 ),
87 show_hashes: bool = typer.Option(
88 False, "--hashes", help="Include content hashes in output.",
89 ),
90 as_json: bool = typer.Option(
91 False, "--json", help="Emit results as JSON.",
92 ),
93 ) -> None:
94 """Search the symbol graph by name — not file text.
95
96 ``muse grep`` searches the typed, content-addressed symbol graph.
97 Every result is a real symbol declaration — no false positives from
98 comments, string literals, or call sites.
99
100 The ``--regex`` flag enables full Python regex syntax. Without it,
101 PATTERN is matched as a case-insensitive substring of the symbol name.
102
103 The ``--hashes`` flag adds the 8-character content-ID prefix to each
104 result, enabling downstream filtering by identity (e.g. find clones
105 with ``muse query hash=<prefix>``).
106 """
107 root = require_repo()
108 repo_id = _read_repo_id(root)
109 branch = _read_branch(root)
110
111 commit = resolve_commit_ref(root, repo_id, branch, ref)
112 if commit is None:
113 typer.echo(f"❌ Commit '{ref or 'HEAD'}' not found.", err=True)
114 raise typer.Exit(code=ExitCode.USER_ERROR)
115
116 manifest = get_commit_snapshot_manifest(root, commit.commit_id) or {}
117
118 try:
119 regex = re.compile(pattern, re.IGNORECASE) if use_regex else re.compile(
120 re.escape(pattern), re.IGNORECASE
121 )
122 except re.error as exc:
123 typer.echo(f"❌ Invalid regex pattern: {exc}", err=True)
124 raise typer.Exit(code=ExitCode.USER_ERROR)
125
126 symbol_map = symbols_for_snapshot(
127 root, manifest,
128 kind_filter=kind_filter,
129 language_filter=language_filter,
130 )
131
132 # Filter by name pattern.
133 matches: list[tuple[str, str, SymbolRecord]] = []
134 for file_path, tree in sorted(symbol_map.items()):
135 for addr, rec in sorted(tree.items(), key=lambda kv: kv[1]["lineno"]):
136 if regex.search(rec["name"]):
137 matches.append((file_path, addr, rec))
138
139 if as_json:
140 out: list[dict[str, str | int]] = []
141 for _fp, addr, rec in matches:
142 out.append({
143 "address": addr,
144 "kind": rec["kind"],
145 "name": rec["name"],
146 "qualified_name": rec["qualified_name"],
147 "file": addr.split("::")[0],
148 "lineno": rec["lineno"],
149 "language": language_of(addr.split("::")[0]),
150 "content_id": rec["content_id"],
151 })
152 typer.echo(json.dumps(out, indent=2))
153 return
154
155 if not matches:
156 typer.echo(f" (no symbols matching '{pattern}')")
157 return
158
159 files_seen: set[str] = set()
160 for file_path, addr, rec in matches:
161 files_seen.add(file_path)
162 icon = _KIND_ICON.get(rec["kind"], rec["kind"])
163 name = rec["qualified_name"]
164 line = rec["lineno"]
165 hash_part = f" {rec['content_id'][:8]}.." if show_hashes else ""
166 typer.echo(f" {addr:<60} {icon:<10} line {line:>4}{hash_part}")
167
168 typer.echo(f"\n{len(matches)} match(es) across {len(files_seen)} file(s)")