gabriel / muse public
test_cmd_bisect.py python
371 lines 14.0 KB
faec8c4d feat(hardening): add config/bisect CLI tests and fix bisect convergence bug Gabriel Cardona <gabriel@tellurstori.com> 2d ago
1 """Comprehensive tests for ``muse bisect`` — binary search for bad commits.
2
3 Coverage:
4 - Unit: bisect core functions (start, mark, skip, reset)
5 - Integration: CLI subcommands (start, bad, good, skip, log, reset)
6 - E2E: full bisect workflow resolving to a first-bad commit
7 - Security: invalid refs, session guard (no double-start), ref sanitization
8 - Stress: deep commit history bisect
9 """
10
11 from __future__ import annotations
12
13 import datetime
14 import json
15 import pathlib
16 import uuid
17
18 import pytest
19 from typer.testing import CliRunner
20
21 from muse.cli.app import cli
22 from muse.core.store import CommitRecord, SnapshotRecord, write_commit, write_snapshot
23 from muse.core.snapshot import compute_commit_id, compute_snapshot_id
24
25 runner = CliRunner()
26
27
28 # ---------------------------------------------------------------------------
29 # Helpers
30 # ---------------------------------------------------------------------------
31
32
33 def _init_repo(tmp_path: pathlib.Path) -> tuple[pathlib.Path, str]:
34 repo_id = str(uuid.uuid4())
35 muse = tmp_path / ".muse"
36 muse.mkdir()
37 (muse / "repo.json").write_text(
38 json.dumps({"repo_id": repo_id, "domain": "midi",
39 "default_branch": "main",
40 "created_at": "2026-01-01T00:00:00+00:00"})
41 )
42 (muse / "HEAD").write_text("ref: refs/heads/main")
43 (muse / "refs" / "heads").mkdir(parents=True)
44 (muse / "snapshots").mkdir()
45 (muse / "commits").mkdir()
46 (muse / "objects").mkdir()
47 return tmp_path, repo_id
48
49
50 def _env(root: pathlib.Path) -> dict[str, str]:
51 return {"MUSE_REPO_ROOT": str(root)}
52
53
54 def _make_commit(
55 root: pathlib.Path,
56 repo_id: str,
57 *,
58 branch: str = "main",
59 message: str = "commit",
60 parent_id: str | None = None,
61 ) -> str:
62 manifest: dict[str, str] = {}
63 snap_id = compute_snapshot_id(manifest)
64 committed_at = datetime.datetime.now(datetime.timezone.utc)
65 commit_id = compute_commit_id(
66 parent_ids=[parent_id] if parent_id else [],
67 snapshot_id=snap_id,
68 message=message,
69 committed_at_iso=committed_at.isoformat(),
70 )
71 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
72 write_commit(root, CommitRecord(
73 commit_id=commit_id,
74 repo_id=repo_id,
75 branch=branch,
76 snapshot_id=snap_id,
77 message=message,
78 committed_at=committed_at,
79 parent_commit_id=parent_id,
80 ))
81 ref_file = root / ".muse" / "refs" / "heads" / branch
82 ref_file.parent.mkdir(parents=True, exist_ok=True)
83 ref_file.write_text(commit_id)
84 return commit_id
85
86
87 def _make_chain(root: pathlib.Path, repo_id: str, n: int) -> list[str]:
88 """Create a linear chain of n commits; return commit IDs oldest-first."""
89 ids: list[str] = []
90 parent: str | None = None
91 for i in range(n):
92 cid = _make_commit(root, repo_id, message=f"commit-{i}", parent_id=parent)
93 ids.append(cid)
94 parent = cid
95 return ids
96
97
98 # ---------------------------------------------------------------------------
99 # Unit tests — core bisect logic
100 # ---------------------------------------------------------------------------
101
102
103 class TestBisectCore:
104 def test_start_bisect_returns_result(self, tmp_path: pathlib.Path) -> None:
105 root, repo_id = _init_repo(tmp_path)
106 ids = _make_chain(root, repo_id, 4)
107 from muse.core.bisect import start_bisect
108 result = start_bisect(root, ids[-1], [ids[0]], branch="main")
109 assert result.next_to_test is not None or result.done
110
111 def test_mark_bad_advances_search(self, tmp_path: pathlib.Path) -> None:
112 root, repo_id = _init_repo(tmp_path)
113 ids = _make_chain(root, repo_id, 8)
114 from muse.core.bisect import mark_bad, start_bisect
115 start_bisect(root, ids[-1], [ids[0]], branch="main")
116 result = mark_bad(root, ids[-1])
117 assert not result.done or result.first_bad is not None
118
119 def test_mark_good_advances_search(self, tmp_path: pathlib.Path) -> None:
120 root, repo_id = _init_repo(tmp_path)
121 ids = _make_chain(root, repo_id, 8)
122 from muse.core.bisect import mark_good, start_bisect
123 start_bisect(root, ids[-1], [ids[0]], branch="main")
124 result = mark_good(root, ids[0])
125 assert result is not None
126
127 def test_reset_clears_state(self, tmp_path: pathlib.Path) -> None:
128 root, repo_id = _init_repo(tmp_path)
129 ids = _make_chain(root, repo_id, 4)
130 from muse.core.bisect import is_bisect_active, reset_bisect, start_bisect
131 start_bisect(root, ids[-1], [ids[0]], branch="main")
132 assert is_bisect_active(root)
133 reset_bisect(root)
134 assert not is_bisect_active(root)
135
136 def test_bisect_log_records_events(self, tmp_path: pathlib.Path) -> None:
137 root, repo_id = _init_repo(tmp_path)
138 ids = _make_chain(root, repo_id, 4)
139 from muse.core.bisect import get_bisect_log, start_bisect
140 start_bisect(root, ids[-1], [ids[0]], branch="main")
141 log = get_bisect_log(root)
142 assert len(log) > 0
143
144
145 # ---------------------------------------------------------------------------
146 # Integration tests — CLI subcommands
147 # ---------------------------------------------------------------------------
148
149
150 class TestBisectCLI:
151 def test_start_requires_good_ref(self, tmp_path: pathlib.Path) -> None:
152 root, repo_id = _init_repo(tmp_path)
153 ids = _make_chain(root, repo_id, 2)
154 result = runner.invoke(
155 cli, ["bisect", "start", "--bad", ids[-1]],
156 env=_env(root)
157 )
158 assert result.exit_code != 0
159 assert "good" in result.output.lower()
160
161 def test_start_with_bad_and_good(self, tmp_path: pathlib.Path) -> None:
162 root, repo_id = _init_repo(tmp_path)
163 ids = _make_chain(root, repo_id, 4)
164 result = runner.invoke(
165 cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]],
166 env=_env(root), catch_exceptions=False
167 )
168 assert result.exit_code == 0
169 assert "started" in result.output.lower() or "next" in result.output.lower()
170
171 def test_bad_without_session_fails(self, tmp_path: pathlib.Path) -> None:
172 root, repo_id = _init_repo(tmp_path)
173 ids = _make_chain(root, repo_id, 2)
174 result = runner.invoke(cli, ["bisect", "bad", ids[-1]], env=_env(root))
175 assert result.exit_code != 0
176
177 def test_good_without_session_fails(self, tmp_path: pathlib.Path) -> None:
178 root, repo_id = _init_repo(tmp_path)
179 ids = _make_chain(root, repo_id, 2)
180 result = runner.invoke(cli, ["bisect", "good", ids[0]], env=_env(root))
181 assert result.exit_code != 0
182
183 def test_skip_without_session_fails(self, tmp_path: pathlib.Path) -> None:
184 root, repo_id = _init_repo(tmp_path)
185 ids = _make_chain(root, repo_id, 2)
186 result = runner.invoke(cli, ["bisect", "skip", ids[0]], env=_env(root))
187 assert result.exit_code != 0
188
189 def test_reset_clears_session(self, tmp_path: pathlib.Path) -> None:
190 root, repo_id = _init_repo(tmp_path)
191 ids = _make_chain(root, repo_id, 4)
192 runner.invoke(
193 cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]],
194 env=_env(root)
195 )
196 result = runner.invoke(cli, ["bisect", "reset"], env=_env(root), catch_exceptions=False)
197 assert result.exit_code == 0
198 # After reset, bad should fail
199 result2 = runner.invoke(cli, ["bisect", "bad", ids[-1]], env=_env(root))
200 assert result2.exit_code != 0
201
202 def test_log_shows_entries(self, tmp_path: pathlib.Path) -> None:
203 root, repo_id = _init_repo(tmp_path)
204 ids = _make_chain(root, repo_id, 4)
205 runner.invoke(
206 cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]],
207 env=_env(root)
208 )
209 result = runner.invoke(cli, ["bisect", "log"], env=_env(root), catch_exceptions=False)
210 assert result.exit_code == 0
211
212 def test_double_start_fails(self, tmp_path: pathlib.Path) -> None:
213 root, repo_id = _init_repo(tmp_path)
214 ids = _make_chain(root, repo_id, 4)
215 runner.invoke(
216 cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]],
217 env=_env(root)
218 )
219 result = runner.invoke(
220 cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]],
221 env=_env(root)
222 )
223 assert result.exit_code != 0
224 assert "already" in result.output.lower()
225
226 def test_bad_invalid_ref_fails(self, tmp_path: pathlib.Path) -> None:
227 root, repo_id = _init_repo(tmp_path)
228 ids = _make_chain(root, repo_id, 4)
229 runner.invoke(
230 cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]],
231 env=_env(root)
232 )
233 result = runner.invoke(cli, ["bisect", "bad", "deadbeef"], env=_env(root))
234 assert result.exit_code != 0
235
236 def test_reset_without_session_succeeds(self, tmp_path: pathlib.Path) -> None:
237 """reset when no session is active should not crash."""
238 root, _ = _init_repo(tmp_path)
239 result = runner.invoke(cli, ["bisect", "reset"], env=_env(root), catch_exceptions=False)
240 assert result.exit_code == 0
241
242 def test_log_empty_without_session(self, tmp_path: pathlib.Path) -> None:
243 root, _ = _init_repo(tmp_path)
244 result = runner.invoke(cli, ["bisect", "log"], env=_env(root), catch_exceptions=False)
245 assert result.exit_code == 0
246 assert "no bisect" in result.output.lower() or result.output.strip() == "" or "no" in result.output.lower()
247
248
249 # ---------------------------------------------------------------------------
250 # E2E tests
251 # ---------------------------------------------------------------------------
252
253
254 class TestBisectE2E:
255 def test_full_bisect_workflow_2_commits(self, tmp_path: pathlib.Path) -> None:
256 """Start → mark good → mark bad → find first bad commit."""
257 root, repo_id = _init_repo(tmp_path)
258 ids = _make_chain(root, repo_id, 2)
259 good_id, bad_id = ids[0], ids[1]
260
261 runner.invoke(
262 cli, ["bisect", "start", "--bad", bad_id, "--good", good_id],
263 env=_env(root)
264 )
265 # With only 2 commits, bisect should already identify bad_id
266 from muse.core.bisect import get_bisect_log
267 log = get_bisect_log(root)
268 assert len(log) >= 1
269
270 def test_full_bisect_workflow_many_commits(self, tmp_path: pathlib.Path) -> None:
271 """With a chain of 8 commits, bisect converges without error."""
272 root, repo_id = _init_repo(tmp_path)
273 ids = _make_chain(root, repo_id, 8)
274
275 runner.invoke(
276 cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]],
277 env=_env(root), catch_exceptions=False
278 )
279
280 from muse.core.bisect import _load_state, is_bisect_active, mark_bad, mark_good
281 # Simulate binary search: assume the bug was introduced at ids[4]
282 max_steps = 20
283 steps = 0
284 done = False
285 while is_bisect_active(root) and steps < max_steps and not done:
286 state = _load_state(root)
287 if state is None:
288 break
289 remaining = state.get("remaining", [])
290 if not remaining:
291 break
292 mid = remaining[len(remaining) // 2]
293 # ids[4] and later are "bad"
294 if mid in ids[4:]:
295 result = mark_bad(root, mid)
296 else:
297 result = mark_good(root, mid)
298 done = result.done
299 steps += 1
300
301 # Bisect should have converged or be close
302 assert done or steps < max_steps
303
304
305 # ---------------------------------------------------------------------------
306 # Security tests
307 # ---------------------------------------------------------------------------
308
309
310 class TestBisectSecurity:
311 def test_ref_with_control_chars_is_rejected(self, tmp_path: pathlib.Path) -> None:
312 root, repo_id = _init_repo(tmp_path)
313 ids = _make_chain(root, repo_id, 2)
314 runner.invoke(
315 cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]],
316 env=_env(root)
317 )
318 # Inject control chars in a bad ref
319 result = runner.invoke(cli, ["bisect", "bad", "\x1b[31minjection\x1b[0m"], env=_env(root))
320 assert result.exit_code != 0
321
322 def test_output_contains_no_ansi_on_invalid_ref(self, tmp_path: pathlib.Path) -> None:
323 root, repo_id = _init_repo(tmp_path)
324 ids = _make_chain(root, repo_id, 2)
325 runner.invoke(
326 cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]],
327 env=_env(root)
328 )
329 result = runner.invoke(cli, ["bisect", "bad", "nonexistent-ref\x1b[31m"], env=_env(root))
330 assert "\x1b[31m" not in result.output
331
332
333 # ---------------------------------------------------------------------------
334 # Stress tests
335 # ---------------------------------------------------------------------------
336
337
338 class TestBisectStress:
339 def test_bisect_50_commit_chain(self, tmp_path: pathlib.Path) -> None:
340 """A 50-commit chain converges within log2(50) + 2 ≈ 8 steps."""
341 root, repo_id = _init_repo(tmp_path)
342 ids = _make_chain(root, repo_id, 50)
343 bad_start = 25 # regression introduced at index 25
344
345 result = runner.invoke(
346 cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]],
347 env=_env(root)
348 )
349 assert result.exit_code == 0
350
351 from muse.core.bisect import _load_state, is_bisect_active, mark_bad, mark_good
352 max_steps = 10 # ceil(log2(48)) = 6; allow generous headroom
353 steps = 0
354 done = False
355 while is_bisect_active(root) and steps < max_steps and not done:
356 state = _load_state(root)
357 if state is None:
358 break
359 remaining = state.get("remaining", [])
360 if not remaining:
361 break
362 mid = remaining[len(remaining) // 2]
363 idx = ids.index(mid) if mid in ids else -1
364 if idx >= bad_start:
365 result = mark_bad(root, mid)
366 else:
367 result = mark_good(root, mid)
368 done = result.done
369 steps += 1
370
371 assert done or steps < max_steps, f"Bisect failed to converge in {steps} steps"