muse/cli/commands/query.py · gabriel/muse

query.py python

335 lines 12.0 KB

e35a0a2e feat(code): Phase 4 — metadata_id, canonical_key, composite refactor cl… Gabriel Cardona <cgcardona@gmail.com> 5d ago

1	"""muse query — symbol graph predicate query (v2).
2
3	SQL for your codebase. A full predicate DSL over the typed, content-addressed
4	symbol graph — with OR, NOT, grouping, and an expanded field set.
5
6	v2 grammar::
7
8	expr = or_expr
9	or_expr = and_expr ( OR and_expr )*
10	and_expr = not_expr ( [AND] not_expr )* # implicit AND
11	not_expr = NOT primary \| primary
12	primary = "(" expr ")" \| atom
13	atom = KEY OP VALUE
14
15	Supported operators::
16
17	= exact match
18	~= contains (case-insensitive)
19	^= starts with (case-insensitive)
20	$= ends with (case-insensitive)
21	!= not equal
22
23	Supported keys::
24
25	kind function \| class \| method \| variable \| import \| …
26	language Python \| Go \| Rust \| TypeScript \| …
27	name bare symbol name
28	qualified_name dotted name (User.save)
29	file file path
30	hash content_id prefix (exact-body match)
31	body_hash body_hash prefix
32	signature_id signature_id prefix
33	lineno_gt symbol starts after line N
34	lineno_lt symbol starts before line N
35
36	Usage::
37
38	muse query "kind=function" "language=Python" "name~=validate"
39	muse query "(kind=function OR kind=method) name^=_"
40	muse query "NOT kind=import" "file~=billing"
41	muse query "hash=a3f2c9"
42	muse query "kind=function" "name$=_test" --commit HEAD~10
43	muse query "kind=function" "name~=validate" --all-commits
44	"""
45	from __future__ import annotations
46
47	import json
48	import logging
49	import pathlib
50
51	import typer
52
53	from muse.core.errors import ExitCode
54	from muse.core.repo import require_repo
55	from muse.core.store import CommitRecord, get_all_commits, get_commit_snapshot_manifest, resolve_commit_ref
56	from muse.plugins.code._predicate import Predicate, PredicateError, parse_query
57	from muse.plugins.code._query import language_of, symbols_for_snapshot
58	from muse.plugins.code.ast_parser import SymbolRecord # used in _query_all_commits signature
59
60	logger = logging.getLogger(__name__)
61
62	app = typer.Typer()
63
64	_KIND_ICON: dict[str, str] = {
65	"function": "fn",
66	"async_function": "fn~",
67	"class": "class",
68	"method": "method",
69	"async_method": "method~",
70	"variable": "var",
71	"import": "import",
72	}
73
74
75	def _read_repo_id(root: pathlib.Path) -> str:
76	return str(json.loads((root / ".muse" / "repo.json").read_text())["repo_id"])
77
78
79	def _read_branch(root: pathlib.Path) -> str:
80	head_ref = (root / ".muse" / "HEAD").read_text().strip()
81	return head_ref.removeprefix("refs/heads/").strip()
82
83
84	# Predicate parsing is handled by muse.plugins.code._predicate (v2 grammar).
85
86
87	class _HistoricalMatch:
88	"""A symbol match found in a historical commit (--all-commits mode)."""
89
90	def __init__(
91	self,
92	address: str,
93	rec: SymbolRecord,
94	commit: CommitRecord,
95	first_seen: bool,
96	) -> None:
97	self.address = address
98	self.rec = rec
99	self.commit = commit
100	self.first_seen = first_seen # True when this is the oldest appearance
101
102	def to_dict(self) -> dict[str, str \| int \| bool]:
103	return {
104	"address": self.address,
105	"kind": self.rec["kind"],
106	"name": self.rec["name"],
107	"content_id": self.rec["content_id"],
108	"first_seen": self.first_seen,
109	"commit_id": self.commit.commit_id,
110	"commit_message": self.commit.message,
111	"committed_at": self.commit.committed_at.isoformat(),
112	"branch": self.commit.branch,
113	}
114
115
116	def _query_all_commits(
117	root: pathlib.Path,
118	filters: list[Predicate],
119	) -> list[_HistoricalMatch]:
120	"""Walk every commit oldest-first, apply predicates against each snapshot.
121
122	Returns one entry per (address, commit) pair that matches. The
123	``first_seen`` flag is True on the oldest commit where each
124	(content_id, address) pair appears.
125	"""
126	all_commits = get_all_commits(root)
127	if not all_commits:
128	return []
129	sorted_commits = sorted(all_commits, key=lambda c: c.committed_at)
130
131	results: list[_HistoricalMatch] = []
132	# Track content_id → first commit_id for first_seen annotation.
133	first_seen_map: dict[str, str] = {}
134
135	for commit in sorted_commits:
136	manifest = _manifest_for_commit(root, commit)
137	if not manifest:
138	continue
139	symbol_map = symbols_for_snapshot(root, manifest)
140	for file_path, tree in sorted(symbol_map.items()):
141	for addr, rec in sorted(tree.items(), key=lambda kv: kv[1]["lineno"]):
142	if not all(f(file_path, rec) for f in filters):
143	continue
144	cid = rec["content_id"]
145	is_first = cid not in first_seen_map
146	if is_first:
147	first_seen_map[cid] = commit.commit_id
148	results.append(_HistoricalMatch(addr, rec, commit, is_first))
149
150	return results
151
152
153	def _manifest_for_commit(
154	root: pathlib.Path,
155	commit: CommitRecord,
156	) -> dict[str, str]:
157	"""Load the snapshot manifest for commit, returning empty dict on failure."""
158	snap_path = root / ".muse" / "snapshots" / f"{commit.snapshot_id}.json"
159	if not snap_path.exists():
160	return {}
161	try:
162	return dict(json.loads(snap_path.read_text()).get("manifest", {}))
163	except (json.JSONDecodeError, KeyError):
164	return {}
165
166
167	@app.callback(invoke_without_command=True)
168	def query(
169	ctx: typer.Context,
170	predicates: list[str] = typer.Argument(
171	..., metavar="PREDICATE...",
172	help="One or more predicates, e.g. \"kind=function\" \"name~=validate\".",
173	),
174	ref: str \| None = typer.Option(
175	None, "--commit", "-c", metavar="REF",
176	help="Query a historical snapshot instead of HEAD.",
177	),
178	all_commits: bool = typer.Option(
179	False, "--all-commits",
180	help=(
181	"Search across ALL commits (every branch). "
182	"Enables temporal hash= queries: find when a function body first appeared. "
183	"Mutually exclusive with --commit."
184	),
185	),
186	show_hashes: bool = typer.Option(
187	False, "--hashes", help="Include content hashes in output.",
188	),
189	as_json: bool = typer.Option(
190	False, "--json", help="Emit results as JSON.",
191	),
192	) -> None:
193	"""Query the symbol graph with a predicate DSL.
194
195	``muse query`` is SQL for your codebase. Every predicate is evaluated
196	against the typed, content-addressed symbol graph — not raw text.
197
198	Predicate syntax: ``key=value`` (exact), ``key~=value`` (contains),
199	``key^=value`` (starts with), ``key$=value`` (ends with).
200
201	The ``hash`` predicate finds every symbol whose normalized AST matches
202	that content hash — duplicate function detection, clone tracking, and
203	cross-module copy detection in one query.
204
205	With ``--all-commits``, the query searches every commit ever recorded
206	(across all branches), ordered oldest-first. The first time each unique
207	``content_id`` appears is marked. This enables temporal queries:
208	"when did this function body first enter the repository?"
209
210	\\b
211	Examples::
212
213	muse query "kind=function" "language=Python"
214	muse query "hash=a3f2c9"
215	muse query "hash=a3f2c9" --all-commits # when did it first appear?
216	muse query "name~=validate" --all-commits --json
217	"""
218	root = require_repo()
219	repo_id = _read_repo_id(root)
220	branch = _read_branch(root)
221
222	if not predicates:
223	typer.echo("❌ At least one predicate is required.", err=True)
224	raise typer.Exit(code=ExitCode.USER_ERROR)
225
226	if all_commits and ref is not None:
227	typer.echo("❌ --all-commits and --commit are mutually exclusive.", err=True)
228	raise typer.Exit(code=ExitCode.USER_ERROR)
229
230	# Parse predicates using the v2 grammar (OR / NOT / grouping supported).
231	# Each CLI argument is joined with implicit AND; a single argument may
232	# contain OR/NOT/parentheses.
233	try:
234	combined_predicate: Predicate = parse_query(predicates)
235	except PredicateError as exc:
236	typer.echo(f"❌ {exc}", err=True)
237	raise typer.Exit(code=ExitCode.USER_ERROR)
238	filters: list[Predicate] = [combined_predicate]
239
240	# ----------------------------------------------------------------
241	# --all-commits mode: temporal search across every recorded commit
242	# ----------------------------------------------------------------
243	if all_commits:
244	historical = _query_all_commits(root, filters)
245	if as_json:
246	typer.echo(json.dumps(
247	{
248	"schema_version": 2,
249	"mode": "all-commits",
250	"results": [h.to_dict() for h in historical],
251	},
252	indent=2,
253	))
254	return
255	if not historical:
256	pred_display = " AND ".join(predicates)
257	typer.echo(f" (no symbols matching: {pred_display} [searched all commits])")
258	return
259	# Deduplicate for display: show unique addresses with their first-seen commit.
260	seen_addrs: set[str] = set()
261	unique: list[_HistoricalMatch] = []
262	for h in historical:
263	if h.first_seen and h.address not in seen_addrs:
264	seen_addrs.add(h.address)
265	unique.append(h)
266	pred_display = " AND ".join(predicates)
267	typer.echo(f"\n{len(unique)} unique symbol(s) matching [{pred_display}] across all commits\n")
268	for h in unique:
269	date_str = h.commit.committed_at.strftime("%Y-%m-%d")
270	short_id = h.commit.commit_id[:8]
271	icon = _KIND_ICON.get(h.rec["kind"], h.rec["kind"])
272	hash_part = f" {h.rec['content_id'][:8]}.." if show_hashes else ""
273	branch_label = f" [{h.commit.branch}]" if h.commit.branch else ""
274	typer.echo(
275	f" {h.address:<60} {icon:<8}"
276	f" first seen {short_id} {date_str}{branch_label}{hash_part}"
277	)
278	return
279
280	# ----------------------------------------------------------------
281	# Single-snapshot mode (default)
282	# ----------------------------------------------------------------
283	commit = resolve_commit_ref(root, repo_id, branch, ref)
284	if commit is None:
285	typer.echo(f"❌ Commit '{ref or 'HEAD'}' not found.", err=True)
286	raise typer.Exit(code=ExitCode.USER_ERROR)
287
288	manifest = get_commit_snapshot_manifest(root, commit.commit_id) or {}
289	symbol_map = symbols_for_snapshot(root, manifest)
290
291	# Apply all predicates.
292	matches: list[tuple[str, str, SymbolRecord]] = []
293	for file_path, tree in sorted(symbol_map.items()):
294	for addr, rec in sorted(tree.items(), key=lambda kv: kv[1]["lineno"]):
295	if all(f(file_path, rec) for f in filters):
296	matches.append((file_path, addr, rec))
297
298	if as_json:
299	out: list[dict[str, str \| int]] = []
300	for fp, addr, rec in matches:
301	out.append({
302	"address": addr,
303	"kind": rec["kind"],
304	"name": rec["name"],
305	"qualified_name": rec["qualified_name"],
306	"file": fp,
307	"lineno": rec["lineno"],
308	"end_lineno": rec["end_lineno"],
309	"language": language_of(fp),
310	"content_id": rec["content_id"],
311	"body_hash": rec["body_hash"],
312	"signature_id": rec["signature_id"],
313	})
314	typer.echo(json.dumps(
315	{"schema_version": 2, "commit": commit.commit_id[:8], "results": out},
316	indent=2,
317	))
318	return
319
320	if not matches:
321	pred_str = " AND ".join(predicates)
322	typer.echo(f" (no symbols matching: {pred_str})")
323	return
324
325	files_seen: set[str] = set()
326	for fp, addr, rec in matches:
327	files_seen.add(fp)
328	icon = _KIND_ICON.get(rec["kind"], rec["kind"])
329	name = rec["qualified_name"]
330	line = rec["lineno"]
331	hash_part = f" {rec['content_id'][:8]}.." if show_hashes else ""
332	typer.echo(f" {addr:<60} {icon:<10} line {line:>4}{hash_part}")
333
334	pred_display = " AND ".join(predicates)
335	typer.echo(f"\n{len(matches)} match(es) across {len(files_seen)} file(s) [{pred_display}]")

Content Address

Object ID (SHA-256)

063b1d052bdca014d575c26b244b9eeab890abb4e0bbcfb5a05606c9da526106

This file is immutable and content-addressed. The same SHA always refers to the same bytes, across every clone and every time.

File Info

Path muse/cli/commands/query.py

Lines 335

Size 12.0 KB

Language python

Ref e51a1033

Snapshot 6ac450532b66…

Last Modified

e35a0a2e

feat(code): Phase 4 — metadata_id, canonical_key, composite refactor classifica…

Gabriel Cardona <cgcardona@gmail.com> 5d ago

View commit →

Links

Browse tree at e51a1033 All commits View raw