cgcardona / muse public
query_history.py python
244 lines 8.6 KB
d855b718 refactor: strip phase/v2 workflow labels from all source, tests, and docs Gabriel Cardona <cgcardona@gmail.com> 1d ago
1 """muse query-history — temporal symbol search across commit history.
2
3 Searches the commit history for symbols matching a predicate expression,
4 bounded by a commit range. Unlike ``muse query --all-commits``, this command
5 is focused on *change events* — it shows when each symbol first appeared, when
6 it was last seen, how many commits it survived, and what changes occurred.
7
8 It answers questions that are impossible in Git:
9
10 * "Find all public Python functions introduced after tag v1.0"
11 * "Show me every class whose signature changed in the last 50 commits"
12 * "Which functions were present in tag v1.0 but are gone in tag v2.0?"
13 * "Find methods renamed between two refs"
14
15 Usage::
16
17 muse query-history "kind=function" "language=Python"
18 muse query-history "name~=validate" --from v1.0 --to HEAD
19 muse query-history "kind=class" --from abc12345
20 muse query-history "file~=billing" "kind=function" --json
21
22 Output::
23
24 Symbol history — kind=function language=Python (42 commits)
25 ──────────────────────────────────────────────────────────────
26
27 src/billing.py::compute_total function [12 commits] 2026-01-01..2026-03-10
28 src/billing.py::compute_tax function [ 8 commits] 2026-01-15..2026-03-10
29 └─ introduced: a1b2c3d4 2026-01-15
30 └─ last seen: f7a8b9c0 2026-03-10
31
32 Flags:
33
34 ``--from REF``
35 Start of the commit range (exclusive; default: initial commit).
36
37 ``--to REF``
38 End of the commit range (inclusive; default: HEAD).
39
40 ``--json``
41 Emit results as JSON.
42 """
43 from __future__ import annotations
44
45 import json
46 import logging
47 import pathlib
48
49 import typer
50
51 from muse.core.errors import ExitCode
52 from muse.core.repo import require_repo
53 from muse.core.store import (
54 get_commit_snapshot_manifest,
55 resolve_commit_ref,
56 walk_commits_between,
57 )
58 from muse.plugins.code._predicate import PredicateError, parse_query
59 from muse.plugins.code._query import language_of, symbols_for_snapshot
60 from muse.plugins.code.ast_parser import SymbolRecord
61
62 logger = logging.getLogger(__name__)
63
64 app = typer.Typer()
65
66
67 def _read_repo_id(root: pathlib.Path) -> str:
68 return str(json.loads((root / ".muse" / "repo.json").read_text())["repo_id"])
69
70
71 def _read_branch(root: pathlib.Path) -> str:
72 head_ref = (root / ".muse" / "HEAD").read_text().strip()
73 return head_ref.removeprefix("refs/heads/").strip()
74
75
76 class _SymbolHistory:
77 """Accumulated history of one symbol across a commit range."""
78
79 def __init__(self, address: str, kind: str, language: str) -> None:
80 self.address = address
81 self.kind = kind
82 self.language = language
83 self.first_commit_id: str = ""
84 self.first_committed_at: str = ""
85 self.last_commit_id: str = ""
86 self.last_committed_at: str = ""
87 self.commit_count: int = 0
88 self.content_ids: set[str] = set()
89
90 @property
91 def change_count(self) -> int:
92 """Number of distinct content_ids seen — 1 means unchanged."""
93 return len(self.content_ids)
94
95 def record(self, commit_id: str, committed_at: str, content_id: str) -> None:
96 if not self.first_commit_id:
97 self.first_commit_id = commit_id
98 self.first_committed_at = committed_at
99 self.last_commit_id = commit_id
100 self.last_committed_at = committed_at
101 self.commit_count += 1
102 self.content_ids.add(content_id)
103
104 def to_dict(self) -> dict[str, str | int]:
105 return {
106 "address": self.address,
107 "kind": self.kind,
108 "language": self.language,
109 "commit_count": self.commit_count,
110 "change_count": self.change_count,
111 "first_commit_id": self.first_commit_id[:8],
112 "first_committed_at": self.first_committed_at[:10],
113 "last_commit_id": self.last_commit_id[:8],
114 "last_committed_at": self.last_committed_at[:10],
115 }
116
117
118 @app.callback(invoke_without_command=True)
119 def query_history(
120 ctx: typer.Context,
121 predicates: list[str] = typer.Argument(
122 ..., metavar="PREDICATE...",
123 help='One or more predicates, e.g. "kind=function" "language=Python".',
124 ),
125 from_ref: str | None = typer.Option(
126 None, "--from", metavar="REF",
127 help="Start of range (exclusive; default: initial commit).",
128 ),
129 to_ref: str | None = typer.Option(
130 None, "--to", metavar="REF",
131 help="End of range (inclusive; default: HEAD).",
132 ),
133 as_json: bool = typer.Option(False, "--json", help="Emit results as JSON."),
134 ) -> None:
135 """Search commit history for symbols matching a predicate expression.
136
137 Walks the commit range from ``--from`` to ``--to`` (oldest-first),
138 collecting every snapshot where each matching symbol is present.
139
140 Summarises: first appearance, last appearance, commit count, and number
141 of distinct implementations (content_id changes).
142
143 The predicate grammar is the same as ``muse query`` — supports OR, NOT,
144 and parentheses.
145
146 Examples::
147
148 muse query-history "kind=function" "language=Python"
149 muse query-history "name~=validate" --from v1.0 --to HEAD
150 muse query-history "kind=class" --json
151 """
152 root = require_repo()
153 repo_id = _read_repo_id(root)
154 branch = _read_branch(root)
155
156 if not predicates:
157 typer.echo("❌ At least one predicate is required.", err=True)
158 raise typer.Exit(code=ExitCode.USER_ERROR)
159
160 try:
161 predicate = parse_query(predicates)
162 except PredicateError as exc:
163 typer.echo(f"❌ {exc}", err=True)
164 raise typer.Exit(code=ExitCode.USER_ERROR)
165
166 # Resolve range endpoints.
167 to_commit = resolve_commit_ref(root, repo_id, branch, to_ref)
168 if to_commit is None:
169 typer.echo(f"❌ --to ref '{to_ref or 'HEAD'}' not found.", err=True)
170 raise typer.Exit(code=ExitCode.USER_ERROR)
171
172 from_commit_id: str | None = None
173 if from_ref is not None:
174 from_c = resolve_commit_ref(root, repo_id, branch, from_ref)
175 if from_c is None:
176 typer.echo(f"❌ --from ref '{from_ref}' not found.", err=True)
177 raise typer.Exit(code=ExitCode.USER_ERROR)
178 from_commit_id = from_c.commit_id
179
180 # Walk commits oldest-first within the range.
181 commits = sorted(
182 walk_commits_between(root, to_commit.commit_id, from_commit_id),
183 key=lambda c: c.committed_at,
184 )
185
186 # Accumulate per-symbol history.
187 history: dict[str, _SymbolHistory] = {}
188 for commit in commits:
189 manifest = get_commit_snapshot_manifest(root, commit.commit_id) or {}
190 sym_map = symbols_for_snapshot(root, manifest)
191 for file_path, tree in sym_map.items():
192 for addr, rec in tree.items():
193 if not predicate(file_path, rec):
194 continue
195 if addr not in history:
196 history[addr] = _SymbolHistory(
197 address=addr,
198 kind=rec["kind"],
199 language=language_of(file_path),
200 )
201 history[addr].record(
202 commit.commit_id,
203 commit.committed_at.isoformat(),
204 rec["content_id"],
205 )
206
207 results = sorted(history.values(), key=lambda h: h.address)
208
209 if as_json:
210 typer.echo(json.dumps(
211 {
212 "schema_version": 2,
213 "to_commit": to_commit.commit_id[:8],
214 "from_commit": from_commit_id[:8] if from_commit_id else None,
215 "commits_scanned": len(commits),
216 "symbols_found": len(results),
217 "results": [r.to_dict() for r in results],
218 },
219 indent=2,
220 ))
221 return
222
223 pred_display = " AND ".join(predicates)
224 typer.echo(f"\nSymbol history — {pred_display} ({len(commits)} commit(s) scanned)")
225 typer.echo("─" * 62)
226
227 if not results:
228 typer.echo(" (no matching symbols found in range)")
229 return
230
231 max_addr = max(len(r.address) for r in results)
232 for r in results:
233 change_label = f"{r.change_count} version(s)" if r.change_count > 1 else "stable"
234 span = f"{r.first_committed_at[:10]}..{r.last_committed_at[:10]}"
235 typer.echo(
236 f" {r.address:<{max_addr}} {r.kind:<14} "
237 f"[{r.commit_count:>3} commit(s)] {span} {change_label}"
238 )
239 if r.first_commit_id:
240 typer.echo(f" └─ introduced: {r.first_commit_id[:8]}")
241 if r.first_commit_id != r.last_commit_id:
242 typer.echo(f" └─ last seen: {r.last_commit_id[:8]}")
243
244 typer.echo(f"\n {len(results)} symbol(s) found")