gabriel / muse public
coupling.py python
167 lines 5.7 KB
00373ad0 feat: migrate CLI from typer to argparse (POSIX-compliant, order-independent) Gabriel Cardona <gabriel@tellurstori.com> 1d ago
1 """muse coupling — file co-change analysis.
2
3 Identifies files that change together most often. High co-change frequency
4 between two files signals a hidden dependency — they are logically coupled
5 even if there is no explicit import between them.
6
7 This is structurally impossible in Git at the semantic level: Git could
8 count raw file modifications, but ``muse coupling`` counts only *semantic*
9 co-changes — commits where both files had AST-level symbol modifications,
10 not formatting-only edits (which Muse already separates from real changes).
11
12 Usage::
13
14 muse coupling
15 muse coupling --top 20
16 muse coupling --from HEAD~30
17
18 Output::
19
20 File co-change analysis — top 10 most coupled pairs
21 Commits analysed: 47
22
23 1 src/billing.py ↔ src/models.py co-changed in 18 commits
24 2 src/api.py ↔ src/auth.py co-changed in 12 commits
25 3 src/billing.py ↔ tests/test_billing.py co-changed in 11 commits
26
27 High coupling = hidden dependency. Consider extracting a shared interface.
28 """
29
30 from __future__ import annotations
31
32 import argparse
33 import json
34 import logging
35 import pathlib
36 import sys
37
38 from muse.core.errors import ExitCode
39 from muse.core.repo import require_repo
40 from muse.core.store import read_current_branch, resolve_commit_ref
41 from muse.plugins.code._query import file_pairs, touched_files, walk_commits_range
42
43 logger = logging.getLogger(__name__)
44
45
46 def _read_repo_id(root: pathlib.Path) -> str:
47 return str(json.loads((root / ".muse" / "repo.json").read_text())["repo_id"])
48
49
50 def _read_branch(root: pathlib.Path) -> str:
51 return read_current_branch(root)
52
53
54 def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None:
55 """Register the coupling subcommand."""
56 parser = subparsers.add_parser(
57 "coupling",
58 help="Find files that change together most often — hidden dependencies.",
59 description=__doc__,
60 )
61 parser.add_argument(
62 "--top", "-n", type=int, default=20, metavar="N",
63 help="Number of pairs to show (default: 20).",
64 )
65 parser.add_argument(
66 "--from", default=None, metavar="REF", dest="from_ref",
67 help="Exclusive start of the commit range (default: initial commit).",
68 )
69 parser.add_argument(
70 "--to", default=None, metavar="REF", dest="to_ref",
71 help="Inclusive end of the commit range (default: HEAD).",
72 )
73 parser.add_argument(
74 "--min", type=int, default=2, metavar="N", dest="min_count",
75 help="Minimum co-change count to include in results (default: 2).",
76 )
77 parser.add_argument(
78 "--json", action="store_true", dest="as_json",
79 help="Emit results as JSON.",
80 )
81 parser.set_defaults(func=run)
82
83
84 def run(args: argparse.Namespace) -> None:
85 """Find files that change together most often — hidden dependencies.
86
87 ``muse coupling`` identifies semantic co-change: file pairs that had
88 AST-level symbol modifications in the same commit. This is stricter
89 than raw file co-change — formatting-only edits and non-code files
90 are excluded.
91
92 High coupling between two files means they share unstated dependencies.
93 Consider extracting a shared interface, a common module, or an
94 explicit contract between them.
95
96 Use ``--from`` / ``--to`` to scope the analysis to a sprint or release.
97 Use ``--min`` to raise the minimum co-change threshold.
98 """
99 top: int = args.top
100 from_ref: str | None = args.from_ref
101 to_ref: str | None = args.to_ref
102 min_count: int = args.min_count
103 as_json: bool = args.as_json
104
105 root = require_repo()
106 repo_id = _read_repo_id(root)
107 branch = _read_branch(root)
108
109 to_commit = resolve_commit_ref(root, repo_id, branch, to_ref)
110 if to_commit is None:
111 print(f"❌ Commit '{to_ref or 'HEAD'}' not found.", file=sys.stderr)
112 raise SystemExit(ExitCode.USER_ERROR)
113
114 from_commit_id: str | None = None
115 if from_ref is not None:
116 from_commit = resolve_commit_ref(root, repo_id, branch, from_ref)
117 if from_commit is None:
118 print(f"❌ Commit '{from_ref}' not found.", file=sys.stderr)
119 raise SystemExit(ExitCode.USER_ERROR)
120 from_commit_id = from_commit.commit_id
121
122 commits = walk_commits_range(root, to_commit.commit_id, from_commit_id)
123
124 pair_counts: dict[tuple[str, str], int] = {}
125 for commit in commits:
126 if commit.structured_delta is None:
127 continue
128 files = touched_files(commit.structured_delta["ops"])
129 if len(files) < 2:
130 continue
131 for a, b in file_pairs(files):
132 key = (a, b)
133 pair_counts[key] = pair_counts.get(key, 0) + 1
134
135 filtered = {pair: cnt for pair, cnt in pair_counts.items() if cnt >= min_count}
136 ranked = sorted(filtered.items(), key=lambda kv: kv[1], reverse=True)[:top]
137
138 if as_json:
139 print(json.dumps(
140 {
141 "commits_analysed": len(commits),
142 "pairs": [{"file_a": a, "file_b": b, "co_changes": c} for (a, b), c in ranked],
143 },
144 indent=2,
145 ))
146 return
147
148 print(f"\nFile co-change analysis — top {len(ranked)} most coupled pairs")
149 print(f"Commits analysed: {len(commits)}")
150 print("")
151
152 if not ranked:
153 print(f" (no file pairs co-changed {min_count}+ times)")
154 return
155
156 width = len(str(len(ranked)))
157 # Align the ↔ separator.
158 max_a = max(len(a) for (a, _), _ in ranked)
159 for rank, ((a, b), count) in enumerate(ranked, 1):
160 label = "commit" if count == 1 else "commits"
161 print(
162 f" {rank:>{width}} {a:<{max_a}} ↔ {b:<50} "
163 f"co-changed in {count:>3} {label}"
164 )
165
166 print("")
167 print("High coupling = hidden dependency. Consider extracting a shared interface.")