query_history.py
python
| 1 | """muse query-history — temporal symbol search across commit history. |
| 2 | |
| 3 | Searches the commit history for symbols matching a predicate expression, |
| 4 | bounded by a commit range. Unlike ``muse query --all-commits``, this command |
| 5 | is focused on *change events* — it shows when each symbol first appeared, when |
| 6 | it was last seen, how many commits it survived, and what changes occurred. |
| 7 | |
| 8 | It answers questions that are impossible in Git: |
| 9 | |
| 10 | * "Find all public Python functions introduced after tag v1.0" |
| 11 | * "Show me every class whose signature changed in the last 50 commits" |
| 12 | * "Which functions were present in v1.0 but are gone in v2.0?" |
| 13 | * "Find methods renamed between two refs" |
| 14 | |
| 15 | Usage:: |
| 16 | |
| 17 | muse query-history "kind=function" "language=Python" |
| 18 | muse query-history "name~=validate" --from v1.0 --to HEAD |
| 19 | muse query-history "kind=class" --from abc12345 |
| 20 | muse query-history "file~=billing" "kind=function" --json |
| 21 | |
| 22 | Output:: |
| 23 | |
| 24 | Symbol history — kind=function language=Python (42 commits) |
| 25 | ────────────────────────────────────────────────────────────── |
| 26 | |
| 27 | src/billing.py::compute_total function [12 commits] 2026-01-01..2026-03-10 |
| 28 | src/billing.py::compute_tax function [ 8 commits] 2026-01-15..2026-03-10 |
| 29 | └─ introduced: a1b2c3d4 2026-01-15 |
| 30 | └─ last seen: f7a8b9c0 2026-03-10 |
| 31 | |
| 32 | Flags: |
| 33 | |
| 34 | ``--from REF`` |
| 35 | Start of the commit range (exclusive; default: initial commit). |
| 36 | |
| 37 | ``--to REF`` |
| 38 | End of the commit range (inclusive; default: HEAD). |
| 39 | |
| 40 | ``--json`` |
| 41 | Emit results as JSON with ``schema_version: 2``. |
| 42 | """ |
| 43 | from __future__ import annotations |
| 44 | |
| 45 | import json |
| 46 | import logging |
| 47 | import pathlib |
| 48 | |
| 49 | import typer |
| 50 | |
| 51 | from muse.core.errors import ExitCode |
| 52 | from muse.core.repo import require_repo |
| 53 | from muse.core.store import ( |
| 54 | get_commit_snapshot_manifest, |
| 55 | resolve_commit_ref, |
| 56 | walk_commits_between, |
| 57 | ) |
| 58 | from muse.plugins.code._predicate import PredicateError, parse_query |
| 59 | from muse.plugins.code._query import language_of, symbols_for_snapshot |
| 60 | from muse.plugins.code.ast_parser import SymbolRecord |
| 61 | |
| 62 | logger = logging.getLogger(__name__) |
| 63 | |
| 64 | app = typer.Typer() |
| 65 | |
| 66 | |
| 67 | def _read_repo_id(root: pathlib.Path) -> str: |
| 68 | return str(json.loads((root / ".muse" / "repo.json").read_text())["repo_id"]) |
| 69 | |
| 70 | |
| 71 | def _read_branch(root: pathlib.Path) -> str: |
| 72 | head_ref = (root / ".muse" / "HEAD").read_text().strip() |
| 73 | return head_ref.removeprefix("refs/heads/").strip() |
| 74 | |
| 75 | |
| 76 | class _SymbolHistory: |
| 77 | """Accumulated history of one symbol across a commit range.""" |
| 78 | |
| 79 | def __init__(self, address: str, kind: str, language: str) -> None: |
| 80 | self.address = address |
| 81 | self.kind = kind |
| 82 | self.language = language |
| 83 | self.first_commit_id: str = "" |
| 84 | self.first_committed_at: str = "" |
| 85 | self.last_commit_id: str = "" |
| 86 | self.last_committed_at: str = "" |
| 87 | self.commit_count: int = 0 |
| 88 | self.content_ids: set[str] = set() |
| 89 | |
| 90 | @property |
| 91 | def change_count(self) -> int: |
| 92 | """Number of distinct content_ids seen — 1 means unchanged.""" |
| 93 | return len(self.content_ids) |
| 94 | |
| 95 | def record(self, commit_id: str, committed_at: str, content_id: str) -> None: |
| 96 | if not self.first_commit_id: |
| 97 | self.first_commit_id = commit_id |
| 98 | self.first_committed_at = committed_at |
| 99 | self.last_commit_id = commit_id |
| 100 | self.last_committed_at = committed_at |
| 101 | self.commit_count += 1 |
| 102 | self.content_ids.add(content_id) |
| 103 | |
| 104 | def to_dict(self) -> dict[str, str | int]: |
| 105 | return { |
| 106 | "address": self.address, |
| 107 | "kind": self.kind, |
| 108 | "language": self.language, |
| 109 | "commit_count": self.commit_count, |
| 110 | "change_count": self.change_count, |
| 111 | "first_commit_id": self.first_commit_id[:8], |
| 112 | "first_committed_at": self.first_committed_at[:10], |
| 113 | "last_commit_id": self.last_commit_id[:8], |
| 114 | "last_committed_at": self.last_committed_at[:10], |
| 115 | } |
| 116 | |
| 117 | |
| 118 | @app.callback(invoke_without_command=True) |
| 119 | def query_history( |
| 120 | ctx: typer.Context, |
| 121 | predicates: list[str] = typer.Argument( |
| 122 | ..., metavar="PREDICATE...", |
| 123 | help='One or more predicates, e.g. "kind=function" "language=Python".', |
| 124 | ), |
| 125 | from_ref: str | None = typer.Option( |
| 126 | None, "--from", metavar="REF", |
| 127 | help="Start of range (exclusive; default: initial commit).", |
| 128 | ), |
| 129 | to_ref: str | None = typer.Option( |
| 130 | None, "--to", metavar="REF", |
| 131 | help="End of range (inclusive; default: HEAD).", |
| 132 | ), |
| 133 | as_json: bool = typer.Option(False, "--json", help="Emit results as JSON."), |
| 134 | ) -> None: |
| 135 | """Search commit history for symbols matching a predicate expression. |
| 136 | |
| 137 | Walks the commit range from ``--from`` to ``--to`` (oldest-first), |
| 138 | collecting every snapshot where each matching symbol is present. |
| 139 | |
| 140 | Summarises: first appearance, last appearance, commit count, and number |
| 141 | of distinct implementations (content_id changes). |
| 142 | |
| 143 | The predicate grammar is the same as ``muse query`` v2 — supports OR, NOT, |
| 144 | and parentheses. |
| 145 | |
| 146 | Examples:: |
| 147 | |
| 148 | muse query-history "kind=function" "language=Python" |
| 149 | muse query-history "name~=validate" --from v1.0 --to HEAD |
| 150 | muse query-history "kind=class" --json |
| 151 | """ |
| 152 | root = require_repo() |
| 153 | repo_id = _read_repo_id(root) |
| 154 | branch = _read_branch(root) |
| 155 | |
| 156 | if not predicates: |
| 157 | typer.echo("❌ At least one predicate is required.", err=True) |
| 158 | raise typer.Exit(code=ExitCode.USER_ERROR) |
| 159 | |
| 160 | try: |
| 161 | predicate = parse_query(predicates) |
| 162 | except PredicateError as exc: |
| 163 | typer.echo(f"❌ {exc}", err=True) |
| 164 | raise typer.Exit(code=ExitCode.USER_ERROR) |
| 165 | |
| 166 | # Resolve range endpoints. |
| 167 | to_commit = resolve_commit_ref(root, repo_id, branch, to_ref) |
| 168 | if to_commit is None: |
| 169 | typer.echo(f"❌ --to ref '{to_ref or 'HEAD'}' not found.", err=True) |
| 170 | raise typer.Exit(code=ExitCode.USER_ERROR) |
| 171 | |
| 172 | from_commit_id: str | None = None |
| 173 | if from_ref is not None: |
| 174 | from_c = resolve_commit_ref(root, repo_id, branch, from_ref) |
| 175 | if from_c is None: |
| 176 | typer.echo(f"❌ --from ref '{from_ref}' not found.", err=True) |
| 177 | raise typer.Exit(code=ExitCode.USER_ERROR) |
| 178 | from_commit_id = from_c.commit_id |
| 179 | |
| 180 | # Walk commits oldest-first within the range. |
| 181 | commits = sorted( |
| 182 | walk_commits_between(root, to_commit.commit_id, from_commit_id), |
| 183 | key=lambda c: c.committed_at, |
| 184 | ) |
| 185 | |
| 186 | # Accumulate per-symbol history. |
| 187 | history: dict[str, _SymbolHistory] = {} |
| 188 | for commit in commits: |
| 189 | manifest = get_commit_snapshot_manifest(root, commit.commit_id) or {} |
| 190 | sym_map = symbols_for_snapshot(root, manifest) |
| 191 | for file_path, tree in sym_map.items(): |
| 192 | for addr, rec in tree.items(): |
| 193 | if not predicate(file_path, rec): |
| 194 | continue |
| 195 | if addr not in history: |
| 196 | history[addr] = _SymbolHistory( |
| 197 | address=addr, |
| 198 | kind=rec["kind"], |
| 199 | language=language_of(file_path), |
| 200 | ) |
| 201 | history[addr].record( |
| 202 | commit.commit_id, |
| 203 | commit.committed_at.isoformat(), |
| 204 | rec["content_id"], |
| 205 | ) |
| 206 | |
| 207 | results = sorted(history.values(), key=lambda h: h.address) |
| 208 | |
| 209 | if as_json: |
| 210 | typer.echo(json.dumps( |
| 211 | { |
| 212 | "schema_version": 2, |
| 213 | "to_commit": to_commit.commit_id[:8], |
| 214 | "from_commit": from_commit_id[:8] if from_commit_id else None, |
| 215 | "commits_scanned": len(commits), |
| 216 | "symbols_found": len(results), |
| 217 | "results": [r.to_dict() for r in results], |
| 218 | }, |
| 219 | indent=2, |
| 220 | )) |
| 221 | return |
| 222 | |
| 223 | pred_display = " AND ".join(predicates) |
| 224 | typer.echo(f"\nSymbol history — {pred_display} ({len(commits)} commit(s) scanned)") |
| 225 | typer.echo("─" * 62) |
| 226 | |
| 227 | if not results: |
| 228 | typer.echo(" (no matching symbols found in range)") |
| 229 | return |
| 230 | |
| 231 | max_addr = max(len(r.address) for r in results) |
| 232 | for r in results: |
| 233 | change_label = f"{r.change_count} version(s)" if r.change_count > 1 else "stable" |
| 234 | span = f"{r.first_committed_at[:10]}..{r.last_committed_at[:10]}" |
| 235 | typer.echo( |
| 236 | f" {r.address:<{max_addr}} {r.kind:<14} " |
| 237 | f"[{r.commit_count:>3} commit(s)] {span} {change_label}" |
| 238 | ) |
| 239 | if r.first_commit_id: |
| 240 | typer.echo(f" └─ introduced: {r.first_commit_id[:8]}") |
| 241 | if r.first_commit_id != r.last_commit_id: |
| 242 | typer.echo(f" └─ last seen: {r.last_commit_id[:8]}") |
| 243 | |
| 244 | typer.echo(f"\n {len(results)} symbol(s) found") |