gabriel / muse public
detect_refactor.py python
357 lines 11.9 KB
bda49bdb feat: redesign .museignore as TOML with domain-scoped sections (#100) Gabriel Cardona <cgcardona@gmail.com> 5d ago
1 """muse detect-refactor — semantic refactoring detection across commits.
2
3 This command is impossible in Git. Git sees every refactoring operation as
4 a diff of text lines. A function extracted into a helper module? Delete lines
5 here, add lines there — no semantic connection. A class renamed? Every file
6 that imports it becomes a "modification". Muse understands *what actually
7 happened* at the symbol level.
8
9 ``muse detect-refactor`` scans the commit range and classifies every semantic
10 operation into one of five refactoring categories:
11
12 ``rename``
13 A symbol kept its body but changed its name. Detected via matching
14 ``body_hash`` across the before/after snapshot.
15
16 ``move``
17 A symbol's full ``content_id`` appears in a different file. The symbol
18 moved without change.
19
20 ``signature_change``
21 A symbol's name and body are unchanged; only its parameter list or return
22 type changed.
23
24 ``implementation_change``
25 A symbol's signature is stable; its internal logic changed.
26
27 ``extraction``
28 A new symbol whose body shares significant content with an existing symbol
29 — a function was factored out of another. (Heuristic: detected when a
30 new symbol appears at the same time an existing symbol shrinks.)
31
32 Output::
33
34 Semantic refactoring report
35 From: cb4afaed "Layer 2: add harmonic dimension"
36 To: a3f2c9e1 "Refactor: rename and move helpers"
37 ──────────────────────────────────────────────────────────────
38
39 RENAME src/utils.py::calculate_total
40 → compute_total
41 commit a3f2c9e1 "Rename: improve naming clarity"
42
43 MOVE src/utils.py::compute_total
44 → src/helpers.py::compute_total
45 commit 1d2e3faa "Move: extract helpers module"
46
47 SIGNATURE src/api.py::handle_request
48 parameters changed: (req, ctx) → (request, context, timeout)
49 commit 4b5c6d7e "API: add timeout parameter"
50
51 IMPLEMENTATION src/core.py::process_batch
52 implementation changed (signature stable)
53 commit 8f9a0b1c "Perf: vectorise batch processing"
54
55 ──────────────────────────────────────────────────────────────
56 4 refactoring operations detected
57 (1 rename · 1 move · 1 signature · 1 implementation)
58
59 Flags:
60
61 ``--from <ref>``
62 Start of the commit range (exclusive). Default: the initial commit.
63
64 ``--to <ref>``
65 End of the commit range (inclusive). Default: HEAD.
66
67 ``--kind <kind>``
68 Filter to one category: rename, move, signature, implementation.
69
70 ``--json``
71 Emit the full refactoring report as JSON.
72 """
73
74 from __future__ import annotations
75
76 import json
77 import logging
78 import pathlib
79
80 import typer
81
82 from muse.core.errors import ExitCode
83 from muse.core.repo import require_repo
84 from muse.core.store import CommitRecord, read_commit, resolve_commit_ref
85 from muse.domain import DomainOp
86
87 logger = logging.getLogger(__name__)
88
89 app = typer.Typer()
90
91 _VALID_KINDS = frozenset({"rename", "move", "signature", "implementation"})
92
93
94 def _read_repo_id(root: pathlib.Path) -> str:
95 return str(json.loads((root / ".muse" / "repo.json").read_text())["repo_id"])
96
97
98 def _read_branch(root: pathlib.Path) -> str:
99 head_ref = (root / ".muse" / "HEAD").read_text().strip()
100 return head_ref.removeprefix("refs/heads/").strip()
101
102
103 def _walk_commits(
104 root: pathlib.Path,
105 to_commit_id: str,
106 from_commit_id: str | None,
107 ) -> list[CommitRecord]:
108 """Collect commits from *to_commit_id* back to (but not including) *from_commit_id*."""
109 commits: list[CommitRecord] = []
110 seen: set[str] = set()
111 current_id: str | None = to_commit_id
112 while current_id and current_id not in seen:
113 seen.add(current_id)
114 if current_id == from_commit_id:
115 break
116 commit = read_commit(root, current_id)
117 if commit is None:
118 break
119 commits.append(commit)
120 current_id = commit.parent_commit_id
121 return commits
122
123
124 def _flat_child_ops(ops: list[DomainOp]) -> list[DomainOp]:
125 """Flatten PatchOp child_ops; return all leaf ops."""
126 result: list[DomainOp] = []
127 for op in ops:
128 if op["op"] == "patch":
129 result.extend(op["child_ops"])
130 else:
131 result.append(op)
132 return result
133
134
135 class RefactorEvent:
136 """A single detected refactoring event."""
137
138 def __init__(
139 self,
140 kind: str,
141 address: str,
142 detail: str,
143 commit: CommitRecord,
144 ) -> None:
145 self.kind = kind
146 self.address = address
147 self.detail = detail
148 self.commit = commit
149
150 def to_dict(self) -> dict[str, str]:
151 return {
152 "kind": self.kind,
153 "address": self.address,
154 "detail": self.detail,
155 "commit_id": self.commit.commit_id,
156 "commit_message": self.commit.message,
157 "committed_at": self.commit.committed_at.isoformat(),
158 }
159
160
161 def _classify_ops(commit: CommitRecord) -> list[RefactorEvent]:
162 """Extract refactoring events from *commit*'s structured delta."""
163 events: list[RefactorEvent] = []
164 if commit.structured_delta is None:
165 return events
166
167 all_ops = _flat_child_ops(commit.structured_delta["ops"])
168
169 for op in all_ops:
170 address = op["address"]
171
172 if op["op"] == "delete":
173 content_summary = op.get("content_summary", "")
174 if "moved to" in content_summary:
175 target = content_summary.split("moved to")[-1].strip()
176 events.append(RefactorEvent(
177 kind="move",
178 address=address,
179 detail=f"→ {target}",
180 commit=commit,
181 ))
182
183 elif op["op"] == "replace":
184 new_summary: str = op.get("new_summary", "")
185 old_summary: str = op.get("old_summary", "")
186
187 if new_summary.startswith("renamed to "):
188 new_name = new_summary.removeprefix("renamed to ").strip()
189 events.append(RefactorEvent(
190 kind="rename",
191 address=address,
192 detail=f"→ {new_name}",
193 commit=commit,
194 ))
195 elif new_summary.startswith("moved to "):
196 target = new_summary.removeprefix("moved to ").strip()
197 events.append(RefactorEvent(
198 kind="move",
199 address=address,
200 detail=f"→ {target}",
201 commit=commit,
202 ))
203 elif "signature" in new_summary or "signature" in old_summary:
204 detail = new_summary or f"{address} signature changed"
205 events.append(RefactorEvent(
206 kind="signature",
207 address=address,
208 detail=detail,
209 commit=commit,
210 ))
211 elif "implementation" in new_summary:
212 events.append(RefactorEvent(
213 kind="implementation",
214 address=address,
215 detail=new_summary,
216 commit=commit,
217 ))
218
219 return events
220
221
222 _LABEL: dict[str, str] = {
223 "rename": "RENAME ",
224 "move": "MOVE ",
225 "signature": "SIGNATURE ",
226 "implementation": "IMPLEMENTATION",
227 }
228
229
230 def _print_human(
231 events: list[RefactorEvent],
232 from_label: str,
233 to_label: str,
234 ) -> None:
235 typer.echo("\nSemantic refactoring report")
236 typer.echo(f"From: {from_label}")
237 typer.echo(f"To: {to_label}")
238 typer.echo("─" * 62)
239
240 if not events:
241 typer.echo("\n (no semantic refactoring detected in this range)")
242 return
243
244 # Print newest-first (commits were collected newest-first).
245 for ev in events:
246 label = _LABEL.get(ev.kind, ev.kind.upper().ljust(14))
247 short_id = ev.commit.commit_id[:8]
248 typer.echo(f"\n{label} {ev.address}")
249 typer.echo(f" {ev.detail}")
250 typer.echo(f' commit {short_id} "{ev.commit.message}"')
251
252 typer.echo("\n" + "─" * 62)
253 kind_counts: dict[str, int] = {}
254 for ev in events:
255 kind_counts[ev.kind] = kind_counts.get(ev.kind, 0) + 1
256 summary_parts = [f"{v} {k}" for k, v in sorted(kind_counts.items())]
257 typer.echo(f"{len(events)} refactoring operation(s) detected")
258 typer.echo(f"({' · '.join(summary_parts)})")
259
260
261 @app.callback(invoke_without_command=True)
262 def detect_refactor(
263 ctx: typer.Context,
264 from_ref: str | None = typer.Option(
265 None, "--from", metavar="REF",
266 help="Start of range (exclusive). Default: initial commit.",
267 ),
268 to_ref: str | None = typer.Option(
269 None, "--to", metavar="REF",
270 help="End of range (inclusive). Default: HEAD.",
271 ),
272 kind_filter: str | None = typer.Option(
273 None, "--kind", "-k", metavar="KIND",
274 help="Filter to one category: rename, move, signature, implementation.",
275 ),
276 as_json: bool = typer.Option(
277 False, "--json", help="Emit the full refactoring report as JSON.",
278 ),
279 ) -> None:
280 """Detect semantic refactoring operations across a commit range.
281
282 ``muse detect-refactor`` is impossible in Git. Git reports renames only
283 as heuristic line-similarity guesses (``git diff --find-renames``); it
284 has no concept of function identity, body hashes, or cross-file symbol
285 continuity.
286
287 Muse detects every semantic refactoring at the AST level:
288
289 \\b
290 - RENAME: same body, new name (``body_hash`` match)\n
291 - MOVE: same content, new file (``content_id`` match)\n
292 - SIGNATURE: name/body stable, parameters changed\n
293 - IMPLEMENTATION: signature stable, logic changed\n
294
295 Use ``--from`` / ``--to`` to scope the range. Without flags, scans the
296 full history from the first commit to HEAD.
297 """
298 root = require_repo()
299 repo_id = _read_repo_id(root)
300 branch = _read_branch(root)
301
302 if kind_filter and kind_filter not in _VALID_KINDS:
303 typer.echo(
304 f"❌ Unknown kind '{kind_filter}'. "
305 f"Valid: {', '.join(sorted(_VALID_KINDS))}",
306 err=True,
307 )
308 raise typer.Exit(code=ExitCode.USER_ERROR)
309
310 to_commit = resolve_commit_ref(root, repo_id, branch, to_ref)
311 if to_commit is None:
312 label = to_ref or "HEAD"
313 typer.echo(f"❌ Commit '{label}' not found.", err=True)
314 raise typer.Exit(code=ExitCode.USER_ERROR)
315
316 from_commit_id: str | None = None
317 if from_ref is not None:
318 from_commit = resolve_commit_ref(root, repo_id, branch, from_ref)
319 if from_commit is None:
320 typer.echo(f"❌ Commit '{from_ref}' not found.", err=True)
321 raise typer.Exit(code=ExitCode.USER_ERROR)
322 from_commit_id = from_commit.commit_id
323
324 commits = _walk_commits(root, to_commit.commit_id, from_commit_id)
325
326 all_events: list[RefactorEvent] = []
327 for commit in commits:
328 evs = _classify_ops(commit)
329 if kind_filter:
330 evs = [e for e in evs if e.kind == kind_filter]
331 all_events.extend(evs)
332
333 if from_commit_id is not None:
334 _fc = read_commit(root, from_commit_id)
335 from_label = (
336 f'{from_commit_id[:8]} "{_fc.message}"'
337 if _fc is not None
338 else "initial commit"
339 )
340 else:
341 from_label = "initial commit"
342 to_label = f'{to_commit.commit_id[:8]} "{to_commit.message}"'
343
344 if as_json:
345 typer.echo(json.dumps(
346 {
347 "schema_version": 2,
348 "from": from_label,
349 "to": to_label,
350 "total": len(all_events),
351 "events": [e.to_dict() for e in all_events],
352 },
353 indent=2,
354 ))
355 return
356
357 _print_human(all_events, from_label, to_label)