gabriel / muse public
detect_refactor.py python
356 lines 11.9 KB
e35a0a2e feat(code): Phase 4 — metadata_id, canonical_key, composite refactor cl… Gabriel Cardona <cgcardona@gmail.com> 5d ago
1 """muse detect-refactor — semantic refactoring detection across commits.
2
3 This command is impossible in Git. Git sees every refactoring operation as
4 a diff of text lines. A function extracted into a helper module? Delete lines
5 here, add lines there — no semantic connection. A class renamed? Every file
6 that imports it becomes a "modification". Muse understands *what actually
7 happened* at the symbol level.
8
9 ``muse detect-refactor`` scans the commit range and classifies every semantic
10 operation into one of five refactoring categories:
11
12 ``rename``
13 A symbol kept its body but changed its name. Detected via matching
14 ``body_hash`` across the before/after snapshot.
15
16 ``move``
17 A symbol's full ``content_id`` appears in a different file. The symbol
18 moved without change.
19
20 ``signature_change``
21 A symbol's name and body are unchanged; only its parameter list or return
22 type changed.
23
24 ``implementation_change``
25 A symbol's signature is stable; its internal logic changed.
26
27 ``extraction``
28 A new symbol whose body shares significant content with an existing symbol
29 — a function was factored out of another. (Heuristic: detected when a
30 new symbol appears at the same time an existing symbol shrinks.)
31
32 Output::
33
34 Semantic refactoring report
35 From: cb4afaed "Layer 2: add harmonic dimension"
36 To: a3f2c9e1 "Refactor: rename and move helpers"
37 ──────────────────────────────────────────────────────────────
38
39 RENAME src/utils.py::calculate_total
40 → compute_total
41 commit a3f2c9e1 "Rename: improve naming clarity"
42
43 MOVE src/utils.py::compute_total
44 → src/helpers.py::compute_total
45 commit 1d2e3faa "Move: extract helpers module"
46
47 SIGNATURE src/api.py::handle_request
48 parameters changed: (req, ctx) → (request, context, timeout)
49 commit 4b5c6d7e "API: add timeout parameter"
50
51 IMPLEMENTATION src/core.py::process_batch
52 implementation changed (signature stable)
53 commit 8f9a0b1c "Perf: vectorise batch processing"
54
55 ──────────────────────────────────────────────────────────────
56 4 refactoring operations detected
57 (1 rename · 1 move · 1 signature · 1 implementation)
58
59 Flags:
60
61 ``--from <ref>``
62 Start of the commit range (exclusive). Default: the initial commit.
63
64 ``--to <ref>``
65 End of the commit range (inclusive). Default: HEAD.
66
67 ``--kind <kind>``
68 Filter to one category: rename, move, signature, implementation.
69
70 ``--json``
71 Emit the full refactoring report as JSON.
72 """
73 from __future__ import annotations
74
75 import json
76 import logging
77 import pathlib
78
79 import typer
80
81 from muse.core.errors import ExitCode
82 from muse.core.repo import require_repo
83 from muse.core.store import CommitRecord, read_commit, resolve_commit_ref
84 from muse.domain import DomainOp
85
86 logger = logging.getLogger(__name__)
87
88 app = typer.Typer()
89
90 _VALID_KINDS = frozenset({"rename", "move", "signature", "implementation"})
91
92
93 def _read_repo_id(root: pathlib.Path) -> str:
94 return str(json.loads((root / ".muse" / "repo.json").read_text())["repo_id"])
95
96
97 def _read_branch(root: pathlib.Path) -> str:
98 head_ref = (root / ".muse" / "HEAD").read_text().strip()
99 return head_ref.removeprefix("refs/heads/").strip()
100
101
102 def _walk_commits(
103 root: pathlib.Path,
104 to_commit_id: str,
105 from_commit_id: str | None,
106 ) -> list[CommitRecord]:
107 """Collect commits from *to_commit_id* back to (but not including) *from_commit_id*."""
108 commits: list[CommitRecord] = []
109 seen: set[str] = set()
110 current_id: str | None = to_commit_id
111 while current_id and current_id not in seen:
112 seen.add(current_id)
113 if current_id == from_commit_id:
114 break
115 commit = read_commit(root, current_id)
116 if commit is None:
117 break
118 commits.append(commit)
119 current_id = commit.parent_commit_id
120 return commits
121
122
123 def _flat_child_ops(ops: list[DomainOp]) -> list[DomainOp]:
124 """Flatten PatchOp child_ops; return all leaf ops."""
125 result: list[DomainOp] = []
126 for op in ops:
127 if op["op"] == "patch":
128 result.extend(op["child_ops"])
129 else:
130 result.append(op)
131 return result
132
133
134 class RefactorEvent:
135 """A single detected refactoring event."""
136
137 def __init__(
138 self,
139 kind: str,
140 address: str,
141 detail: str,
142 commit: CommitRecord,
143 ) -> None:
144 self.kind = kind
145 self.address = address
146 self.detail = detail
147 self.commit = commit
148
149 def to_dict(self) -> dict[str, str]:
150 return {
151 "kind": self.kind,
152 "address": self.address,
153 "detail": self.detail,
154 "commit_id": self.commit.commit_id,
155 "commit_message": self.commit.message,
156 "committed_at": self.commit.committed_at.isoformat(),
157 }
158
159
160 def _classify_ops(commit: CommitRecord) -> list[RefactorEvent]:
161 """Extract refactoring events from *commit*'s structured delta."""
162 events: list[RefactorEvent] = []
163 if commit.structured_delta is None:
164 return events
165
166 all_ops = _flat_child_ops(commit.structured_delta["ops"])
167
168 for op in all_ops:
169 address = op["address"]
170
171 if op["op"] == "delete":
172 content_summary = op.get("content_summary", "")
173 if "moved to" in content_summary:
174 target = content_summary.split("moved to")[-1].strip()
175 events.append(RefactorEvent(
176 kind="move",
177 address=address,
178 detail=f"→ {target}",
179 commit=commit,
180 ))
181
182 elif op["op"] == "replace":
183 new_summary: str = op.get("new_summary", "")
184 old_summary: str = op.get("old_summary", "")
185
186 if new_summary.startswith("renamed to "):
187 new_name = new_summary.removeprefix("renamed to ").strip()
188 events.append(RefactorEvent(
189 kind="rename",
190 address=address,
191 detail=f"→ {new_name}",
192 commit=commit,
193 ))
194 elif new_summary.startswith("moved to "):
195 target = new_summary.removeprefix("moved to ").strip()
196 events.append(RefactorEvent(
197 kind="move",
198 address=address,
199 detail=f"→ {target}",
200 commit=commit,
201 ))
202 elif "signature" in new_summary or "signature" in old_summary:
203 detail = new_summary or f"{address} signature changed"
204 events.append(RefactorEvent(
205 kind="signature",
206 address=address,
207 detail=detail,
208 commit=commit,
209 ))
210 elif "implementation" in new_summary:
211 events.append(RefactorEvent(
212 kind="implementation",
213 address=address,
214 detail=new_summary,
215 commit=commit,
216 ))
217
218 return events
219
220
221 _LABEL: dict[str, str] = {
222 "rename": "RENAME ",
223 "move": "MOVE ",
224 "signature": "SIGNATURE ",
225 "implementation": "IMPLEMENTATION",
226 }
227
228
229 def _print_human(
230 events: list[RefactorEvent],
231 from_label: str,
232 to_label: str,
233 ) -> None:
234 typer.echo("\nSemantic refactoring report")
235 typer.echo(f"From: {from_label}")
236 typer.echo(f"To: {to_label}")
237 typer.echo("─" * 62)
238
239 if not events:
240 typer.echo("\n (no semantic refactoring detected in this range)")
241 return
242
243 # Print newest-first (commits were collected newest-first).
244 for ev in events:
245 label = _LABEL.get(ev.kind, ev.kind.upper().ljust(14))
246 short_id = ev.commit.commit_id[:8]
247 typer.echo(f"\n{label} {ev.address}")
248 typer.echo(f" {ev.detail}")
249 typer.echo(f' commit {short_id} "{ev.commit.message}"')
250
251 typer.echo("\n" + "─" * 62)
252 kind_counts: dict[str, int] = {}
253 for ev in events:
254 kind_counts[ev.kind] = kind_counts.get(ev.kind, 0) + 1
255 summary_parts = [f"{v} {k}" for k, v in sorted(kind_counts.items())]
256 typer.echo(f"{len(events)} refactoring operation(s) detected")
257 typer.echo(f"({' · '.join(summary_parts)})")
258
259
260 @app.callback(invoke_without_command=True)
261 def detect_refactor(
262 ctx: typer.Context,
263 from_ref: str | None = typer.Option(
264 None, "--from", metavar="REF",
265 help="Start of range (exclusive). Default: initial commit.",
266 ),
267 to_ref: str | None = typer.Option(
268 None, "--to", metavar="REF",
269 help="End of range (inclusive). Default: HEAD.",
270 ),
271 kind_filter: str | None = typer.Option(
272 None, "--kind", "-k", metavar="KIND",
273 help="Filter to one category: rename, move, signature, implementation.",
274 ),
275 as_json: bool = typer.Option(
276 False, "--json", help="Emit the full refactoring report as JSON.",
277 ),
278 ) -> None:
279 """Detect semantic refactoring operations across a commit range.
280
281 ``muse detect-refactor`` is impossible in Git. Git reports renames only
282 as heuristic line-similarity guesses (``git diff --find-renames``); it
283 has no concept of function identity, body hashes, or cross-file symbol
284 continuity.
285
286 Muse detects every semantic refactoring at the AST level:
287
288 \\b
289 - RENAME: same body, new name (``body_hash`` match)\n
290 - MOVE: same content, new file (``content_id`` match)\n
291 - SIGNATURE: name/body stable, parameters changed\n
292 - IMPLEMENTATION: signature stable, logic changed\n
293
294 Use ``--from`` / ``--to`` to scope the range. Without flags, scans the
295 full history from the first commit to HEAD.
296 """
297 root = require_repo()
298 repo_id = _read_repo_id(root)
299 branch = _read_branch(root)
300
301 if kind_filter and kind_filter not in _VALID_KINDS:
302 typer.echo(
303 f"❌ Unknown kind '{kind_filter}'. "
304 f"Valid: {', '.join(sorted(_VALID_KINDS))}",
305 err=True,
306 )
307 raise typer.Exit(code=ExitCode.USER_ERROR)
308
309 to_commit = resolve_commit_ref(root, repo_id, branch, to_ref)
310 if to_commit is None:
311 label = to_ref or "HEAD"
312 typer.echo(f"❌ Commit '{label}' not found.", err=True)
313 raise typer.Exit(code=ExitCode.USER_ERROR)
314
315 from_commit_id: str | None = None
316 if from_ref is not None:
317 from_commit = resolve_commit_ref(root, repo_id, branch, from_ref)
318 if from_commit is None:
319 typer.echo(f"❌ Commit '{from_ref}' not found.", err=True)
320 raise typer.Exit(code=ExitCode.USER_ERROR)
321 from_commit_id = from_commit.commit_id
322
323 commits = _walk_commits(root, to_commit.commit_id, from_commit_id)
324
325 all_events: list[RefactorEvent] = []
326 for commit in commits:
327 evs = _classify_ops(commit)
328 if kind_filter:
329 evs = [e for e in evs if e.kind == kind_filter]
330 all_events.extend(evs)
331
332 if from_commit_id is not None:
333 _fc = read_commit(root, from_commit_id)
334 from_label = (
335 f'{from_commit_id[:8]} "{_fc.message}"'
336 if _fc is not None
337 else "initial commit"
338 )
339 else:
340 from_label = "initial commit"
341 to_label = f'{to_commit.commit_id[:8]} "{to_commit.message}"'
342
343 if as_json:
344 typer.echo(json.dumps(
345 {
346 "schema_version": 2,
347 "from": from_label,
348 "to": to_label,
349 "total": len(all_events),
350 "events": [e.to_dict() for e in all_events],
351 },
352 indent=2,
353 ))
354 return
355
356 _print_human(all_events, from_label, to_label)