detect_refactor.py
python
| 1 | """muse detect-refactor — semantic refactoring detection across commits. |
| 2 | |
| 3 | This command is impossible in Git. Git sees every refactoring operation as |
| 4 | a diff of text lines. A function extracted into a helper module? Delete lines |
| 5 | here, add lines there — no semantic connection. A class renamed? Every file |
| 6 | that imports it becomes a "modification". Muse understands *what actually |
| 7 | happened* at the symbol level. |
| 8 | |
| 9 | ``muse detect-refactor`` scans the commit range and classifies every semantic |
| 10 | operation into one of five refactoring categories: |
| 11 | |
| 12 | ``rename`` |
| 13 | A symbol kept its body but changed its name. Detected via matching |
| 14 | ``body_hash`` across the before/after snapshot. |
| 15 | |
| 16 | ``move`` |
| 17 | A symbol's full ``content_id`` appears in a different file. The symbol |
| 18 | moved without change. |
| 19 | |
| 20 | ``signature_change`` |
| 21 | A symbol's name and body are unchanged; only its parameter list or return |
| 22 | type changed. |
| 23 | |
| 24 | ``implementation_change`` |
| 25 | A symbol's signature is stable; its internal logic changed. |
| 26 | |
| 27 | ``extraction`` |
| 28 | A new symbol whose body shares significant content with an existing symbol |
| 29 | — a function was factored out of another. (Heuristic: detected when a |
| 30 | new symbol appears at the same time an existing symbol shrinks.) |
| 31 | |
| 32 | Output:: |
| 33 | |
| 34 | Semantic refactoring report |
| 35 | From: cb4afaed "Layer 2: add harmonic dimension" |
| 36 | To: a3f2c9e1 "Refactor: rename and move helpers" |
| 37 | ────────────────────────────────────────────────────────────── |
| 38 | |
| 39 | RENAME src/utils.py::calculate_total |
| 40 | → compute_total |
| 41 | commit a3f2c9e1 "Rename: improve naming clarity" |
| 42 | |
| 43 | MOVE src/utils.py::compute_total |
| 44 | → src/helpers.py::compute_total |
| 45 | commit 1d2e3faa "Move: extract helpers module" |
| 46 | |
| 47 | SIGNATURE src/api.py::handle_request |
| 48 | parameters changed: (req, ctx) → (request, context, timeout) |
| 49 | commit 4b5c6d7e "API: add timeout parameter" |
| 50 | |
| 51 | IMPLEMENTATION src/core.py::process_batch |
| 52 | implementation changed (signature stable) |
| 53 | commit 8f9a0b1c "Perf: vectorise batch processing" |
| 54 | |
| 55 | ────────────────────────────────────────────────────────────── |
| 56 | 4 refactoring operations detected |
| 57 | (1 rename · 1 move · 1 signature · 1 implementation) |
| 58 | |
| 59 | Flags: |
| 60 | |
| 61 | ``--from <ref>`` |
| 62 | Start of the commit range (exclusive). Default: the initial commit. |
| 63 | |
| 64 | ``--to <ref>`` |
| 65 | End of the commit range (inclusive). Default: HEAD. |
| 66 | |
| 67 | ``--kind <kind>`` |
| 68 | Filter to one category: rename, move, signature, implementation. |
| 69 | |
| 70 | ``--json`` |
| 71 | Emit the full refactoring report as JSON. |
| 72 | """ |
| 73 | from __future__ import annotations |
| 74 | |
| 75 | import json |
| 76 | import logging |
| 77 | import pathlib |
| 78 | |
| 79 | import typer |
| 80 | |
| 81 | from muse.core.errors import ExitCode |
| 82 | from muse.core.repo import require_repo |
| 83 | from muse.core.store import CommitRecord, read_commit, resolve_commit_ref |
| 84 | from muse.domain import DomainOp |
| 85 | |
| 86 | logger = logging.getLogger(__name__) |
| 87 | |
| 88 | app = typer.Typer() |
| 89 | |
| 90 | _VALID_KINDS = frozenset({"rename", "move", "signature", "implementation"}) |
| 91 | |
| 92 | |
| 93 | def _read_repo_id(root: pathlib.Path) -> str: |
| 94 | return str(json.loads((root / ".muse" / "repo.json").read_text())["repo_id"]) |
| 95 | |
| 96 | |
| 97 | def _read_branch(root: pathlib.Path) -> str: |
| 98 | head_ref = (root / ".muse" / "HEAD").read_text().strip() |
| 99 | return head_ref.removeprefix("refs/heads/").strip() |
| 100 | |
| 101 | |
| 102 | def _walk_commits( |
| 103 | root: pathlib.Path, |
| 104 | to_commit_id: str, |
| 105 | from_commit_id: str | None, |
| 106 | ) -> list[CommitRecord]: |
| 107 | """Collect commits from *to_commit_id* back to (but not including) *from_commit_id*.""" |
| 108 | commits: list[CommitRecord] = [] |
| 109 | seen: set[str] = set() |
| 110 | current_id: str | None = to_commit_id |
| 111 | while current_id and current_id not in seen: |
| 112 | seen.add(current_id) |
| 113 | if current_id == from_commit_id: |
| 114 | break |
| 115 | commit = read_commit(root, current_id) |
| 116 | if commit is None: |
| 117 | break |
| 118 | commits.append(commit) |
| 119 | current_id = commit.parent_commit_id |
| 120 | return commits |
| 121 | |
| 122 | |
| 123 | def _flat_child_ops(ops: list[DomainOp]) -> list[DomainOp]: |
| 124 | """Flatten PatchOp child_ops; return all leaf ops.""" |
| 125 | result: list[DomainOp] = [] |
| 126 | for op in ops: |
| 127 | if op["op"] == "patch": |
| 128 | result.extend(op["child_ops"]) |
| 129 | else: |
| 130 | result.append(op) |
| 131 | return result |
| 132 | |
| 133 | |
| 134 | class RefactorEvent: |
| 135 | """A single detected refactoring event.""" |
| 136 | |
| 137 | def __init__( |
| 138 | self, |
| 139 | kind: str, |
| 140 | address: str, |
| 141 | detail: str, |
| 142 | commit: CommitRecord, |
| 143 | ) -> None: |
| 144 | self.kind = kind |
| 145 | self.address = address |
| 146 | self.detail = detail |
| 147 | self.commit = commit |
| 148 | |
| 149 | def to_dict(self) -> dict[str, str]: |
| 150 | return { |
| 151 | "kind": self.kind, |
| 152 | "address": self.address, |
| 153 | "detail": self.detail, |
| 154 | "commit_id": self.commit.commit_id, |
| 155 | "commit_message": self.commit.message, |
| 156 | "committed_at": self.commit.committed_at.isoformat(), |
| 157 | } |
| 158 | |
| 159 | |
| 160 | def _classify_ops(commit: CommitRecord) -> list[RefactorEvent]: |
| 161 | """Extract refactoring events from *commit*'s structured delta.""" |
| 162 | events: list[RefactorEvent] = [] |
| 163 | if commit.structured_delta is None: |
| 164 | return events |
| 165 | |
| 166 | all_ops = _flat_child_ops(commit.structured_delta["ops"]) |
| 167 | |
| 168 | for op in all_ops: |
| 169 | address = op["address"] |
| 170 | |
| 171 | if op["op"] == "delete": |
| 172 | content_summary = op.get("content_summary", "") |
| 173 | if "moved to" in content_summary: |
| 174 | target = content_summary.split("moved to")[-1].strip() |
| 175 | events.append(RefactorEvent( |
| 176 | kind="move", |
| 177 | address=address, |
| 178 | detail=f"→ {target}", |
| 179 | commit=commit, |
| 180 | )) |
| 181 | |
| 182 | elif op["op"] == "replace": |
| 183 | new_summary: str = op.get("new_summary", "") |
| 184 | old_summary: str = op.get("old_summary", "") |
| 185 | |
| 186 | if new_summary.startswith("renamed to "): |
| 187 | new_name = new_summary.removeprefix("renamed to ").strip() |
| 188 | events.append(RefactorEvent( |
| 189 | kind="rename", |
| 190 | address=address, |
| 191 | detail=f"→ {new_name}", |
| 192 | commit=commit, |
| 193 | )) |
| 194 | elif new_summary.startswith("moved to "): |
| 195 | target = new_summary.removeprefix("moved to ").strip() |
| 196 | events.append(RefactorEvent( |
| 197 | kind="move", |
| 198 | address=address, |
| 199 | detail=f"→ {target}", |
| 200 | commit=commit, |
| 201 | )) |
| 202 | elif "signature" in new_summary or "signature" in old_summary: |
| 203 | detail = new_summary or f"{address} signature changed" |
| 204 | events.append(RefactorEvent( |
| 205 | kind="signature", |
| 206 | address=address, |
| 207 | detail=detail, |
| 208 | commit=commit, |
| 209 | )) |
| 210 | elif "implementation" in new_summary: |
| 211 | events.append(RefactorEvent( |
| 212 | kind="implementation", |
| 213 | address=address, |
| 214 | detail=new_summary, |
| 215 | commit=commit, |
| 216 | )) |
| 217 | |
| 218 | return events |
| 219 | |
| 220 | |
| 221 | _LABEL: dict[str, str] = { |
| 222 | "rename": "RENAME ", |
| 223 | "move": "MOVE ", |
| 224 | "signature": "SIGNATURE ", |
| 225 | "implementation": "IMPLEMENTATION", |
| 226 | } |
| 227 | |
| 228 | |
| 229 | def _print_human( |
| 230 | events: list[RefactorEvent], |
| 231 | from_label: str, |
| 232 | to_label: str, |
| 233 | ) -> None: |
| 234 | typer.echo("\nSemantic refactoring report") |
| 235 | typer.echo(f"From: {from_label}") |
| 236 | typer.echo(f"To: {to_label}") |
| 237 | typer.echo("─" * 62) |
| 238 | |
| 239 | if not events: |
| 240 | typer.echo("\n (no semantic refactoring detected in this range)") |
| 241 | return |
| 242 | |
| 243 | # Print newest-first (commits were collected newest-first). |
| 244 | for ev in events: |
| 245 | label = _LABEL.get(ev.kind, ev.kind.upper().ljust(14)) |
| 246 | short_id = ev.commit.commit_id[:8] |
| 247 | typer.echo(f"\n{label} {ev.address}") |
| 248 | typer.echo(f" {ev.detail}") |
| 249 | typer.echo(f' commit {short_id} "{ev.commit.message}"') |
| 250 | |
| 251 | typer.echo("\n" + "─" * 62) |
| 252 | kind_counts: dict[str, int] = {} |
| 253 | for ev in events: |
| 254 | kind_counts[ev.kind] = kind_counts.get(ev.kind, 0) + 1 |
| 255 | summary_parts = [f"{v} {k}" for k, v in sorted(kind_counts.items())] |
| 256 | typer.echo(f"{len(events)} refactoring operation(s) detected") |
| 257 | typer.echo(f"({' · '.join(summary_parts)})") |
| 258 | |
| 259 | |
| 260 | @app.callback(invoke_without_command=True) |
| 261 | def detect_refactor( |
| 262 | ctx: typer.Context, |
| 263 | from_ref: str | None = typer.Option( |
| 264 | None, "--from", metavar="REF", |
| 265 | help="Start of range (exclusive). Default: initial commit.", |
| 266 | ), |
| 267 | to_ref: str | None = typer.Option( |
| 268 | None, "--to", metavar="REF", |
| 269 | help="End of range (inclusive). Default: HEAD.", |
| 270 | ), |
| 271 | kind_filter: str | None = typer.Option( |
| 272 | None, "--kind", "-k", metavar="KIND", |
| 273 | help="Filter to one category: rename, move, signature, implementation.", |
| 274 | ), |
| 275 | as_json: bool = typer.Option( |
| 276 | False, "--json", help="Emit the full refactoring report as JSON.", |
| 277 | ), |
| 278 | ) -> None: |
| 279 | """Detect semantic refactoring operations across a commit range. |
| 280 | |
| 281 | ``muse detect-refactor`` is impossible in Git. Git reports renames only |
| 282 | as heuristic line-similarity guesses (``git diff --find-renames``); it |
| 283 | has no concept of function identity, body hashes, or cross-file symbol |
| 284 | continuity. |
| 285 | |
| 286 | Muse detects every semantic refactoring at the AST level: |
| 287 | |
| 288 | \\b |
| 289 | - RENAME: same body, new name (``body_hash`` match)\n |
| 290 | - MOVE: same content, new file (``content_id`` match)\n |
| 291 | - SIGNATURE: name/body stable, parameters changed\n |
| 292 | - IMPLEMENTATION: signature stable, logic changed\n |
| 293 | |
| 294 | Use ``--from`` / ``--to`` to scope the range. Without flags, scans the |
| 295 | full history from the first commit to HEAD. |
| 296 | """ |
| 297 | root = require_repo() |
| 298 | repo_id = _read_repo_id(root) |
| 299 | branch = _read_branch(root) |
| 300 | |
| 301 | if kind_filter and kind_filter not in _VALID_KINDS: |
| 302 | typer.echo( |
| 303 | f"❌ Unknown kind '{kind_filter}'. " |
| 304 | f"Valid: {', '.join(sorted(_VALID_KINDS))}", |
| 305 | err=True, |
| 306 | ) |
| 307 | raise typer.Exit(code=ExitCode.USER_ERROR) |
| 308 | |
| 309 | to_commit = resolve_commit_ref(root, repo_id, branch, to_ref) |
| 310 | if to_commit is None: |
| 311 | label = to_ref or "HEAD" |
| 312 | typer.echo(f"❌ Commit '{label}' not found.", err=True) |
| 313 | raise typer.Exit(code=ExitCode.USER_ERROR) |
| 314 | |
| 315 | from_commit_id: str | None = None |
| 316 | if from_ref is not None: |
| 317 | from_commit = resolve_commit_ref(root, repo_id, branch, from_ref) |
| 318 | if from_commit is None: |
| 319 | typer.echo(f"❌ Commit '{from_ref}' not found.", err=True) |
| 320 | raise typer.Exit(code=ExitCode.USER_ERROR) |
| 321 | from_commit_id = from_commit.commit_id |
| 322 | |
| 323 | commits = _walk_commits(root, to_commit.commit_id, from_commit_id) |
| 324 | |
| 325 | all_events: list[RefactorEvent] = [] |
| 326 | for commit in commits: |
| 327 | evs = _classify_ops(commit) |
| 328 | if kind_filter: |
| 329 | evs = [e for e in evs if e.kind == kind_filter] |
| 330 | all_events.extend(evs) |
| 331 | |
| 332 | if from_commit_id is not None: |
| 333 | _fc = read_commit(root, from_commit_id) |
| 334 | from_label = ( |
| 335 | f'{from_commit_id[:8]} "{_fc.message}"' |
| 336 | if _fc is not None |
| 337 | else "initial commit" |
| 338 | ) |
| 339 | else: |
| 340 | from_label = "initial commit" |
| 341 | to_label = f'{to_commit.commit_id[:8]} "{to_commit.message}"' |
| 342 | |
| 343 | if as_json: |
| 344 | typer.echo(json.dumps( |
| 345 | { |
| 346 | "schema_version": 2, |
| 347 | "from": from_label, |
| 348 | "to": to_label, |
| 349 | "total": len(all_events), |
| 350 | "events": [e.to_dict() for e in all_events], |
| 351 | }, |
| 352 | indent=2, |
| 353 | )) |
| 354 | return |
| 355 | |
| 356 | _print_human(all_events, from_label, to_label) |