cgcardona / muse public
test_refactor_classify.py python
434 lines 17.3 KB
e6786943 feat: upgrade to Python 3.14, drop from __future__ import annotations Gabriel Cardona <cgcardona@gmail.com> 1d ago
1 """Tests for muse/plugins/code/_refactor_classify.py.
2
3 Coverage
4 --------
5 classify_exact
6 - unchanged: same content_id
7 - rename: same body_hash, different name, same file
8 - move: same content_id, different file, same name
9 - rename+move: same body_hash, different name, different file
10 - signature_only: same body_hash, different signature_id
11 - impl_only: same signature_id, different body_hash
12 - metadata_only: same body_hash + signature_id, different metadata_id
13 - full_rewrite: both signature and body changed
14
15 classify_composite
16 - Exact rename detected across batches
17 - Exact move detected across batches
18 - Exact rename+move detected across batches
19 - Inferred extract (new symbol name inside old qualified_name)
20 - No false positives for completely unrelated symbols
21 - Empty inputs → empty results
22
23 RefactorClassification
24 - to_dict() round-trips all fields
25 - confidence is rounded to 3 decimal places
26 - evidence list is preserved
27 """
28
29 import hashlib
30
31 import pytest
32
33 from muse.plugins.code._refactor_classify import (
34 RefactorClassification,
35 classify_composite,
36 classify_exact,
37 )
38 from muse.plugins.code.ast_parser import SymbolRecord
39
40
41 # ---------------------------------------------------------------------------
42 # Helpers
43 # ---------------------------------------------------------------------------
44
45
46 def _sha(text: str) -> str:
47 return hashlib.sha256(text.encode()).hexdigest()
48
49
50 def _rec(
51 *,
52 kind: str = "function",
53 name: str = "func",
54 qualified_name: str = "func",
55 lineno: int = 1,
56 end_lineno: int = 10,
57 content_id: str | None = None,
58 body_hash: str | None = None,
59 signature_id: str | None = None,
60 metadata_id: str = "",
61 canonical_key: str = "",
62 ) -> SymbolRecord:
63 body_hash = body_hash or _sha(f"body:{name}")
64 signature_id = signature_id or _sha(f"sig:{name}")
65 content_id = content_id or _sha(body_hash + signature_id + metadata_id)
66 return SymbolRecord(
67 kind=kind,
68 name=name,
69 qualified_name=qualified_name,
70 lineno=lineno,
71 end_lineno=end_lineno,
72 content_id=content_id,
73 body_hash=body_hash,
74 signature_id=signature_id,
75 metadata_id=metadata_id,
76 canonical_key=canonical_key,
77 )
78
79
80 def _same_body_rec(source: SymbolRecord, *, name: str, qualified_name: str = "") -> SymbolRecord:
81 """Return a record with the same body_hash as *source* but a different name."""
82 body_hash = source["body_hash"]
83 sig_id = source["signature_id"]
84 content_id = _sha(body_hash + sig_id + source.get("metadata_id", ""))
85 return SymbolRecord(
86 kind=source["kind"],
87 name=name,
88 qualified_name=qualified_name or name,
89 lineno=source["lineno"],
90 end_lineno=source["end_lineno"],
91 content_id=_sha(body_hash + sig_id + "renamed" + name), # different content
92 body_hash=body_hash,
93 signature_id=sig_id,
94 metadata_id=source.get("metadata_id", ""),
95 canonical_key="",
96 )
97
98
99 # ---------------------------------------------------------------------------
100 # classify_exact — unchanged
101 # ---------------------------------------------------------------------------
102
103
104 class TestClassifyExactUnchanged:
105 def test_same_content_id_is_unchanged(self) -> None:
106 rec = _rec(name="f", content_id="abc123")
107 result = classify_exact("src/a.py::f", "src/a.py::f", rec, rec)
108 assert result == "unchanged"
109
110
111 # ---------------------------------------------------------------------------
112 # classify_exact — rename (same file)
113 # ---------------------------------------------------------------------------
114
115
116 class TestClassifyExactRename:
117 def test_same_body_different_name_same_file(self) -> None:
118 body = _sha("body_content")
119 sig = _sha("signature")
120 old = SymbolRecord(
121 kind="function", name="old_name", qualified_name="old_name",
122 lineno=1, end_lineno=10,
123 content_id=_sha(body + sig + ""),
124 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
125 )
126 new = SymbolRecord(
127 kind="function", name="new_name", qualified_name="new_name",
128 lineno=1, end_lineno=10,
129 content_id=_sha(body + sig + "x"), # different content_id
130 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
131 )
132 result = classify_exact("src/a.py::old_name", "src/a.py::new_name", old, new)
133 assert result == "rename"
134
135 def test_rename_requires_different_name(self) -> None:
136 body = _sha("body")
137 sig = _sha("sig")
138 old = SymbolRecord(
139 kind="function", name="same", qualified_name="same",
140 lineno=1, end_lineno=5,
141 content_id=_sha(body + sig),
142 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
143 )
144 new = SymbolRecord(
145 kind="function", name="same", qualified_name="same",
146 lineno=1, end_lineno=5,
147 content_id=_sha(body + sig + "meta"), # slightly different
148 body_hash=body, signature_id=sig, metadata_id="meta", canonical_key="",
149 )
150 result = classify_exact("src/a.py::same", "src/a.py::same", old, new)
151 # Same name, same body, different metadata_id → metadata_only
152 assert result == "metadata_only"
153
154
155 # ---------------------------------------------------------------------------
156 # classify_exact — move (different file)
157 # ---------------------------------------------------------------------------
158
159
160 class TestClassifyExactMove:
161 def test_same_content_id_different_file_same_name(self) -> None:
162 rec = _rec(name="compute", content_id="shared_content_id_abc")
163 result = classify_exact("src/billing.py::compute", "src/invoice.py::compute", rec, rec)
164 assert result == "unchanged" # same content_id = unchanged regardless of file
165
166 def test_same_body_same_name_different_file(self) -> None:
167 body = _sha("body")
168 sig = _sha("sig")
169 old = SymbolRecord(
170 kind="function", name="compute", qualified_name="compute",
171 lineno=1, end_lineno=10,
172 content_id=_sha(body + sig + "old"),
173 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
174 )
175 new = SymbolRecord(
176 kind="function", name="compute", qualified_name="compute",
177 lineno=20, end_lineno=30,
178 content_id=_sha(body + sig + "new"),
179 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
180 )
181 result = classify_exact("src/billing.py::compute", "src/invoice.py::compute", old, new)
182 assert result == "move"
183
184 def test_same_body_different_name_different_file(self) -> None:
185 body = _sha("body")
186 sig = _sha("sig")
187 old = SymbolRecord(
188 kind="function", name="compute_total", qualified_name="compute_total",
189 lineno=1, end_lineno=10,
190 content_id=_sha(body + sig + "old"),
191 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
192 )
193 new = SymbolRecord(
194 kind="function", name="invoice_total", qualified_name="invoice_total",
195 lineno=5, end_lineno=15,
196 content_id=_sha(body + sig + "new"),
197 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
198 )
199 result = classify_exact("src/billing.py::compute_total", "src/invoice.py::invoice_total", old, new)
200 assert result == "rename+move"
201
202
203 # ---------------------------------------------------------------------------
204 # classify_exact — signature_only / impl_only / metadata_only / full_rewrite
205 # ---------------------------------------------------------------------------
206
207
208 class TestClassifyExactKinds:
209 def _make_pair(
210 self,
211 *,
212 same_body: bool = True,
213 same_sig: bool = True,
214 same_meta: bool = True,
215 ) -> tuple[SymbolRecord, SymbolRecord]:
216 body = _sha("body_data")
217 sig = _sha("sig_data")
218 meta = _sha("meta_data")
219 old = SymbolRecord(
220 kind="function", name="f", qualified_name="f",
221 lineno=1, end_lineno=10,
222 content_id=_sha(body + sig + meta),
223 body_hash=body, signature_id=sig, metadata_id=meta, canonical_key="",
224 )
225 new_body = body if same_body else _sha("body_data_changed")
226 new_sig = sig if same_sig else _sha("sig_data_changed")
227 new_meta = meta if same_meta else _sha("meta_data_changed")
228 new = SymbolRecord(
229 kind="function", name="f", qualified_name="f",
230 lineno=1, end_lineno=10,
231 content_id=_sha(new_body + new_sig + new_meta + "x"),
232 body_hash=new_body, signature_id=new_sig, metadata_id=new_meta, canonical_key="",
233 )
234 return old, new
235
236 def test_signature_only(self) -> None:
237 old, new = self._make_pair(same_body=True, same_sig=False)
238 result = classify_exact("a.py::f", "a.py::f", old, new)
239 assert result == "signature_only"
240
241 def test_impl_only(self) -> None:
242 old, new = self._make_pair(same_body=False, same_sig=True)
243 result = classify_exact("a.py::f", "a.py::f", old, new)
244 assert result == "impl_only"
245
246 def test_metadata_only(self) -> None:
247 old, new = self._make_pair(same_body=True, same_sig=True, same_meta=False)
248 result = classify_exact("a.py::f", "a.py::f", old, new)
249 assert result == "metadata_only"
250
251 def test_full_rewrite(self) -> None:
252 old, new = self._make_pair(same_body=False, same_sig=False)
253 result = classify_exact("a.py::f", "a.py::f", old, new)
254 assert result == "full_rewrite"
255
256
257 # ---------------------------------------------------------------------------
258 # RefactorClassification — to_dict
259 # ---------------------------------------------------------------------------
260
261
262 class TestRefactorClassificationToDict:
263 def test_to_dict_contains_required_keys(self) -> None:
264 old = _rec(name="f")
265 new = _rec(name="g")
266 rc = RefactorClassification(
267 old_address="src/a.py::f",
268 new_address="src/a.py::g",
269 old_rec=old,
270 new_rec=new,
271 exact="rename",
272 inferred="none",
273 confidence=1.0,
274 evidence=["body_hash matches abc12345"],
275 )
276 d = rc.to_dict()
277 assert d["old_address"] == "src/a.py::f"
278 assert d["new_address"] == "src/a.py::g"
279 assert d["exact_classification"] == "rename"
280 assert d["inferred_refactor"] == "none"
281 assert d["confidence"] == 1.0
282 assert d["evidence"] == ["body_hash matches abc12345"]
283
284 def test_to_dict_truncates_hashes(self) -> None:
285 old = _rec(name="f", content_id="a" * 64, body_hash="b" * 64, signature_id="c" * 64)
286 new = _rec(name="g", content_id="d" * 64, body_hash="b" * 64, signature_id="c" * 64)
287 rc = RefactorClassification("a.py::f", "a.py::g", old, new, "rename")
288 d = rc.to_dict()
289 assert len(str(d["old_content_id"])) == 8
290 assert len(str(d["new_content_id"])) == 8
291
292 def test_to_dict_confidence_rounded(self) -> None:
293 old = _rec(name="f")
294 new = _rec(name="g")
295 rc = RefactorClassification("a.py::f", "a.py::g", old, new, "full_rewrite",
296 confidence=0.123456789)
297 d = rc.to_dict()
298 assert d["confidence"] == 0.123
299
300 def test_default_evidence_is_empty_list(self) -> None:
301 old = _rec(name="f")
302 new = _rec(name="g")
303 rc = RefactorClassification("a.py::f", "a.py::g", old, new, "impl_only")
304 assert rc.evidence == []
305 d = rc.to_dict()
306 assert d["evidence"] == []
307
308
309 # ---------------------------------------------------------------------------
310 # classify_composite — exact detection
311 # ---------------------------------------------------------------------------
312
313
314 class TestClassifyCompositeExact:
315 def test_rename_detected(self) -> None:
316 body = _sha("shared_body")
317 sig = _sha("sig")
318 old_rec = SymbolRecord(
319 kind="function", name="old_func", qualified_name="old_func",
320 lineno=1, end_lineno=10,
321 content_id=_sha(body + sig + ""),
322 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
323 )
324 new_rec = SymbolRecord(
325 kind="function", name="new_func", qualified_name="new_func",
326 lineno=1, end_lineno=10,
327 content_id=_sha(body + sig + "changed"),
328 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
329 )
330 removed = {"src/a.py::old_func": old_rec}
331 added = {"src/a.py::new_func": new_rec}
332 results = classify_composite(removed, added)
333 assert len(results) == 1
334 rc = results[0]
335 assert rc.exact == "rename"
336 assert rc.old_address == "src/a.py::old_func"
337 assert rc.new_address == "src/a.py::new_func"
338
339 def test_move_detected_via_content_id(self) -> None:
340 content_id = _sha("exact_content")
341 rec = _rec(name="compute", content_id=content_id)
342 removed = {"src/billing.py::compute": rec}
343 added = {"src/invoice.py::compute": rec}
344 results = classify_composite(removed, added)
345 assert len(results) == 1
346 rc = results[0]
347 assert rc.exact == "unchanged" # content_id match → unchanged classification
348 assert rc.old_address == "src/billing.py::compute"
349 assert rc.new_address == "src/invoice.py::compute"
350
351 def test_empty_inputs(self) -> None:
352 assert classify_composite({}, {}) == []
353
354 def test_no_match_different_everything(self) -> None:
355 old_rec = _rec(name="alpha", body_hash=_sha("alpha_body"))
356 new_rec = _rec(name="beta", body_hash=_sha("beta_body"))
357 removed = {"a.py::alpha": old_rec}
358 added = {"b.py::beta": new_rec}
359 # No body_hash or content_id match → composite heuristics run
360 results = classify_composite(removed, added)
361 # alpha / beta are completely different — expect no high-confidence result
362 # (name heuristic may or may not fire, but should not crash)
363 assert isinstance(results, list)
364
365 def test_rename_plus_move(self) -> None:
366 body = _sha("shared_body_cross")
367 sig = _sha("cross_sig")
368 old_rec = SymbolRecord(
369 kind="function", name="compute_a", qualified_name="compute_a",
370 lineno=1, end_lineno=8,
371 content_id=_sha(body + sig + "old"),
372 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
373 )
374 new_rec = SymbolRecord(
375 kind="function", name="compute_b", qualified_name="compute_b",
376 lineno=20, end_lineno=28,
377 content_id=_sha(body + sig + "new"),
378 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
379 )
380 removed = {"src/a.py::compute_a": old_rec}
381 added = {"src/b.py::compute_b": new_rec}
382 results = classify_composite(removed, added)
383 assert len(results) == 1
384 assert results[0].exact == "rename+move"
385
386 def test_multiple_renames_at_once(self) -> None:
387 def _pair(name: str) -> tuple[SymbolRecord, SymbolRecord]:
388 body = _sha(f"body_{name}")
389 sig = _sha(f"sig_{name}")
390 old = SymbolRecord(
391 kind="function", name=f"old_{name}", qualified_name=f"old_{name}",
392 lineno=1, end_lineno=5,
393 content_id=_sha(body + sig + "old"),
394 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
395 )
396 new = SymbolRecord(
397 kind="function", name=f"new_{name}", qualified_name=f"new_{name}",
398 lineno=1, end_lineno=5,
399 content_id=_sha(body + sig + "new"),
400 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
401 )
402 return old, new
403
404 old_a, new_a = _pair("alpha")
405 old_b, new_b = _pair("beta")
406 removed = {"a.py::old_alpha": old_a, "a.py::old_beta": old_b}
407 added = {"a.py::new_alpha": new_a, "a.py::new_beta": new_b}
408 results = classify_composite(removed, added)
409 assert len(results) == 2
410 old_addresses = {r.old_address for r in results}
411 assert "a.py::old_alpha" in old_addresses
412 assert "a.py::old_beta" in old_addresses
413
414
415 # ---------------------------------------------------------------------------
416 # classify_composite — inferred extract
417 # ---------------------------------------------------------------------------
418
419
420 class TestClassifyCompositeInferred:
421 def test_extract_heuristic_name_overlap(self) -> None:
422 # Old function "compute_total" is deleted; new function "compute" appears.
423 # "compute" is a substring of "compute_total" → extract heuristic fires.
424 old_rec = _rec(name="compute_total", qualified_name="compute_total")
425 new_rec = _rec(name="compute", qualified_name="compute")
426 removed = {"a.py::compute_total": old_rec}
427 added = {"a.py::compute": new_rec}
428 results = classify_composite(removed, added)
429 extract_results = [r for r in results if r.inferred == "extract"]
430 # The heuristic may or may not fire depending on exact name overlap.
431 # Verify no crash and the structure is correct.
432 for r in extract_results:
433 assert r.confidence >= 0.0
434 assert isinstance(r.evidence, list)