coupling.py
python
| 1 | """muse coupling — file co-change analysis. |
| 2 | |
| 3 | Identifies files that change together most often. High co-change frequency |
| 4 | between two files signals a hidden dependency — they are logically coupled |
| 5 | even if there is no explicit import between them. |
| 6 | |
| 7 | This is structurally impossible in Git at the semantic level: Git could |
| 8 | count raw file modifications, but ``muse coupling`` counts only *semantic* |
| 9 | co-changes — commits where both files had AST-level symbol modifications, |
| 10 | not formatting-only edits (which Muse already separates from real changes). |
| 11 | |
| 12 | Usage:: |
| 13 | |
| 14 | muse coupling |
| 15 | muse coupling --top 20 |
| 16 | muse coupling --from HEAD~30 |
| 17 | |
| 18 | Output:: |
| 19 | |
| 20 | File co-change analysis — top 10 most coupled pairs |
| 21 | Commits analysed: 47 |
| 22 | |
| 23 | 1 src/billing.py ↔ src/models.py co-changed in 18 commits |
| 24 | 2 src/api.py ↔ src/auth.py co-changed in 12 commits |
| 25 | 3 src/billing.py ↔ tests/test_billing.py co-changed in 11 commits |
| 26 | |
| 27 | High coupling = hidden dependency. Consider extracting a shared interface. |
| 28 | """ |
| 29 | |
| 30 | from __future__ import annotations |
| 31 | |
| 32 | import argparse |
| 33 | import json |
| 34 | import logging |
| 35 | import pathlib |
| 36 | import sys |
| 37 | |
| 38 | from muse.core.errors import ExitCode |
| 39 | from muse.core.repo import require_repo |
| 40 | from muse.core.store import read_current_branch, resolve_commit_ref |
| 41 | from muse.plugins.code._query import file_pairs, touched_files, walk_commits_range |
| 42 | |
| 43 | logger = logging.getLogger(__name__) |
| 44 | |
| 45 | |
| 46 | def _read_repo_id(root: pathlib.Path) -> str: |
| 47 | return str(json.loads((root / ".muse" / "repo.json").read_text())["repo_id"]) |
| 48 | |
| 49 | |
| 50 | def _read_branch(root: pathlib.Path) -> str: |
| 51 | return read_current_branch(root) |
| 52 | |
| 53 | |
| 54 | def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None: |
| 55 | """Register the coupling subcommand.""" |
| 56 | parser = subparsers.add_parser( |
| 57 | "coupling", |
| 58 | help="Find files that change together most often — hidden dependencies.", |
| 59 | description=__doc__, |
| 60 | ) |
| 61 | parser.add_argument( |
| 62 | "--top", "-n", type=int, default=20, metavar="N", |
| 63 | help="Number of pairs to show (default: 20).", |
| 64 | ) |
| 65 | parser.add_argument( |
| 66 | "--from", default=None, metavar="REF", dest="from_ref", |
| 67 | help="Exclusive start of the commit range (default: initial commit).", |
| 68 | ) |
| 69 | parser.add_argument( |
| 70 | "--to", default=None, metavar="REF", dest="to_ref", |
| 71 | help="Inclusive end of the commit range (default: HEAD).", |
| 72 | ) |
| 73 | parser.add_argument( |
| 74 | "--min", type=int, default=2, metavar="N", dest="min_count", |
| 75 | help="Minimum co-change count to include in results (default: 2).", |
| 76 | ) |
| 77 | parser.add_argument( |
| 78 | "--json", action="store_true", dest="as_json", |
| 79 | help="Emit results as JSON.", |
| 80 | ) |
| 81 | parser.set_defaults(func=run) |
| 82 | |
| 83 | |
| 84 | def run(args: argparse.Namespace) -> None: |
| 85 | """Find files that change together most often — hidden dependencies. |
| 86 | |
| 87 | ``muse coupling`` identifies semantic co-change: file pairs that had |
| 88 | AST-level symbol modifications in the same commit. This is stricter |
| 89 | than raw file co-change — formatting-only edits and non-code files |
| 90 | are excluded. |
| 91 | |
| 92 | High coupling between two files means they share unstated dependencies. |
| 93 | Consider extracting a shared interface, a common module, or an |
| 94 | explicit contract between them. |
| 95 | |
| 96 | Use ``--from`` / ``--to`` to scope the analysis to a sprint or release. |
| 97 | Use ``--min`` to raise the minimum co-change threshold. |
| 98 | """ |
| 99 | top: int = args.top |
| 100 | from_ref: str | None = args.from_ref |
| 101 | to_ref: str | None = args.to_ref |
| 102 | min_count: int = args.min_count |
| 103 | as_json: bool = args.as_json |
| 104 | |
| 105 | root = require_repo() |
| 106 | repo_id = _read_repo_id(root) |
| 107 | branch = _read_branch(root) |
| 108 | |
| 109 | to_commit = resolve_commit_ref(root, repo_id, branch, to_ref) |
| 110 | if to_commit is None: |
| 111 | print(f"❌ Commit '{to_ref or 'HEAD'}' not found.", file=sys.stderr) |
| 112 | raise SystemExit(ExitCode.USER_ERROR) |
| 113 | |
| 114 | from_commit_id: str | None = None |
| 115 | if from_ref is not None: |
| 116 | from_commit = resolve_commit_ref(root, repo_id, branch, from_ref) |
| 117 | if from_commit is None: |
| 118 | print(f"❌ Commit '{from_ref}' not found.", file=sys.stderr) |
| 119 | raise SystemExit(ExitCode.USER_ERROR) |
| 120 | from_commit_id = from_commit.commit_id |
| 121 | |
| 122 | commits = walk_commits_range(root, to_commit.commit_id, from_commit_id) |
| 123 | |
| 124 | pair_counts: dict[tuple[str, str], int] = {} |
| 125 | for commit in commits: |
| 126 | if commit.structured_delta is None: |
| 127 | continue |
| 128 | files = touched_files(commit.structured_delta["ops"]) |
| 129 | if len(files) < 2: |
| 130 | continue |
| 131 | for a, b in file_pairs(files): |
| 132 | key = (a, b) |
| 133 | pair_counts[key] = pair_counts.get(key, 0) + 1 |
| 134 | |
| 135 | filtered = {pair: cnt for pair, cnt in pair_counts.items() if cnt >= min_count} |
| 136 | ranked = sorted(filtered.items(), key=lambda kv: kv[1], reverse=True)[:top] |
| 137 | |
| 138 | if as_json: |
| 139 | print(json.dumps( |
| 140 | { |
| 141 | "commits_analysed": len(commits), |
| 142 | "pairs": [{"file_a": a, "file_b": b, "co_changes": c} for (a, b), c in ranked], |
| 143 | }, |
| 144 | indent=2, |
| 145 | )) |
| 146 | return |
| 147 | |
| 148 | print(f"\nFile co-change analysis — top {len(ranked)} most coupled pairs") |
| 149 | print(f"Commits analysed: {len(commits)}") |
| 150 | print("") |
| 151 | |
| 152 | if not ranked: |
| 153 | print(f" (no file pairs co-changed {min_count}+ times)") |
| 154 | return |
| 155 | |
| 156 | width = len(str(len(ranked))) |
| 157 | # Align the ↔ separator. |
| 158 | max_a = max(len(a) for (a, _), _ in ranked) |
| 159 | for rank, ((a, b), count) in enumerate(ranked, 1): |
| 160 | label = "commit" if count == 1 else "commits" |
| 161 | print( |
| 162 | f" {rank:>{width}} {a:<{max_a}} ↔ {b:<50} " |
| 163 | f"co-changed in {count:>3} {label}" |
| 164 | ) |
| 165 | |
| 166 | print("") |
| 167 | print("High coupling = hidden dependency. Consider extracting a shared interface.") |