test_cmd_bisect.py
python
| 1 | """Comprehensive tests for ``muse bisect`` — binary search for bad commits. |
| 2 | |
| 3 | Coverage: |
| 4 | - Unit: bisect core functions (start, mark, skip, reset) |
| 5 | - Integration: CLI subcommands (start, bad, good, skip, log, reset) |
| 6 | - E2E: full bisect workflow resolving to a first-bad commit |
| 7 | - Security: invalid refs, session guard (no double-start), ref sanitization |
| 8 | - Stress: deep commit history bisect |
| 9 | """ |
| 10 | |
| 11 | from __future__ import annotations |
| 12 | |
| 13 | import datetime |
| 14 | import json |
| 15 | import pathlib |
| 16 | import uuid |
| 17 | |
| 18 | import pytest |
| 19 | from typer.testing import CliRunner |
| 20 | |
| 21 | from muse.cli.app import cli |
| 22 | from muse.core.store import CommitRecord, SnapshotRecord, write_commit, write_snapshot |
| 23 | from muse.core.snapshot import compute_commit_id, compute_snapshot_id |
| 24 | |
| 25 | runner = CliRunner() |
| 26 | |
| 27 | |
| 28 | # --------------------------------------------------------------------------- |
| 29 | # Helpers |
| 30 | # --------------------------------------------------------------------------- |
| 31 | |
| 32 | |
| 33 | def _init_repo(tmp_path: pathlib.Path) -> tuple[pathlib.Path, str]: |
| 34 | repo_id = str(uuid.uuid4()) |
| 35 | muse = tmp_path / ".muse" |
| 36 | muse.mkdir() |
| 37 | (muse / "repo.json").write_text( |
| 38 | json.dumps({"repo_id": repo_id, "domain": "midi", |
| 39 | "default_branch": "main", |
| 40 | "created_at": "2026-01-01T00:00:00+00:00"}) |
| 41 | ) |
| 42 | (muse / "HEAD").write_text("ref: refs/heads/main") |
| 43 | (muse / "refs" / "heads").mkdir(parents=True) |
| 44 | (muse / "snapshots").mkdir() |
| 45 | (muse / "commits").mkdir() |
| 46 | (muse / "objects").mkdir() |
| 47 | return tmp_path, repo_id |
| 48 | |
| 49 | |
| 50 | def _env(root: pathlib.Path) -> dict[str, str]: |
| 51 | return {"MUSE_REPO_ROOT": str(root)} |
| 52 | |
| 53 | |
| 54 | def _make_commit( |
| 55 | root: pathlib.Path, |
| 56 | repo_id: str, |
| 57 | *, |
| 58 | branch: str = "main", |
| 59 | message: str = "commit", |
| 60 | parent_id: str | None = None, |
| 61 | ) -> str: |
| 62 | manifest: dict[str, str] = {} |
| 63 | snap_id = compute_snapshot_id(manifest) |
| 64 | committed_at = datetime.datetime.now(datetime.timezone.utc) |
| 65 | commit_id = compute_commit_id( |
| 66 | parent_ids=[parent_id] if parent_id else [], |
| 67 | snapshot_id=snap_id, |
| 68 | message=message, |
| 69 | committed_at_iso=committed_at.isoformat(), |
| 70 | ) |
| 71 | write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) |
| 72 | write_commit(root, CommitRecord( |
| 73 | commit_id=commit_id, |
| 74 | repo_id=repo_id, |
| 75 | branch=branch, |
| 76 | snapshot_id=snap_id, |
| 77 | message=message, |
| 78 | committed_at=committed_at, |
| 79 | parent_commit_id=parent_id, |
| 80 | )) |
| 81 | ref_file = root / ".muse" / "refs" / "heads" / branch |
| 82 | ref_file.parent.mkdir(parents=True, exist_ok=True) |
| 83 | ref_file.write_text(commit_id) |
| 84 | return commit_id |
| 85 | |
| 86 | |
| 87 | def _make_chain(root: pathlib.Path, repo_id: str, n: int) -> list[str]: |
| 88 | """Create a linear chain of n commits; return commit IDs oldest-first.""" |
| 89 | ids: list[str] = [] |
| 90 | parent: str | None = None |
| 91 | for i in range(n): |
| 92 | cid = _make_commit(root, repo_id, message=f"commit-{i}", parent_id=parent) |
| 93 | ids.append(cid) |
| 94 | parent = cid |
| 95 | return ids |
| 96 | |
| 97 | |
| 98 | # --------------------------------------------------------------------------- |
| 99 | # Unit tests — core bisect logic |
| 100 | # --------------------------------------------------------------------------- |
| 101 | |
| 102 | |
| 103 | class TestBisectCore: |
| 104 | def test_start_bisect_returns_result(self, tmp_path: pathlib.Path) -> None: |
| 105 | root, repo_id = _init_repo(tmp_path) |
| 106 | ids = _make_chain(root, repo_id, 4) |
| 107 | from muse.core.bisect import start_bisect |
| 108 | result = start_bisect(root, ids[-1], [ids[0]], branch="main") |
| 109 | assert result.next_to_test is not None or result.done |
| 110 | |
| 111 | def test_mark_bad_advances_search(self, tmp_path: pathlib.Path) -> None: |
| 112 | root, repo_id = _init_repo(tmp_path) |
| 113 | ids = _make_chain(root, repo_id, 8) |
| 114 | from muse.core.bisect import mark_bad, start_bisect |
| 115 | start_bisect(root, ids[-1], [ids[0]], branch="main") |
| 116 | result = mark_bad(root, ids[-1]) |
| 117 | assert not result.done or result.first_bad is not None |
| 118 | |
| 119 | def test_mark_good_advances_search(self, tmp_path: pathlib.Path) -> None: |
| 120 | root, repo_id = _init_repo(tmp_path) |
| 121 | ids = _make_chain(root, repo_id, 8) |
| 122 | from muse.core.bisect import mark_good, start_bisect |
| 123 | start_bisect(root, ids[-1], [ids[0]], branch="main") |
| 124 | result = mark_good(root, ids[0]) |
| 125 | assert result is not None |
| 126 | |
| 127 | def test_reset_clears_state(self, tmp_path: pathlib.Path) -> None: |
| 128 | root, repo_id = _init_repo(tmp_path) |
| 129 | ids = _make_chain(root, repo_id, 4) |
| 130 | from muse.core.bisect import is_bisect_active, reset_bisect, start_bisect |
| 131 | start_bisect(root, ids[-1], [ids[0]], branch="main") |
| 132 | assert is_bisect_active(root) |
| 133 | reset_bisect(root) |
| 134 | assert not is_bisect_active(root) |
| 135 | |
| 136 | def test_bisect_log_records_events(self, tmp_path: pathlib.Path) -> None: |
| 137 | root, repo_id = _init_repo(tmp_path) |
| 138 | ids = _make_chain(root, repo_id, 4) |
| 139 | from muse.core.bisect import get_bisect_log, start_bisect |
| 140 | start_bisect(root, ids[-1], [ids[0]], branch="main") |
| 141 | log = get_bisect_log(root) |
| 142 | assert len(log) > 0 |
| 143 | |
| 144 | |
| 145 | # --------------------------------------------------------------------------- |
| 146 | # Integration tests — CLI subcommands |
| 147 | # --------------------------------------------------------------------------- |
| 148 | |
| 149 | |
| 150 | class TestBisectCLI: |
| 151 | def test_start_requires_good_ref(self, tmp_path: pathlib.Path) -> None: |
| 152 | root, repo_id = _init_repo(tmp_path) |
| 153 | ids = _make_chain(root, repo_id, 2) |
| 154 | result = runner.invoke( |
| 155 | cli, ["bisect", "start", "--bad", ids[-1]], |
| 156 | env=_env(root) |
| 157 | ) |
| 158 | assert result.exit_code != 0 |
| 159 | assert "good" in result.output.lower() |
| 160 | |
| 161 | def test_start_with_bad_and_good(self, tmp_path: pathlib.Path) -> None: |
| 162 | root, repo_id = _init_repo(tmp_path) |
| 163 | ids = _make_chain(root, repo_id, 4) |
| 164 | result = runner.invoke( |
| 165 | cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]], |
| 166 | env=_env(root), catch_exceptions=False |
| 167 | ) |
| 168 | assert result.exit_code == 0 |
| 169 | assert "started" in result.output.lower() or "next" in result.output.lower() |
| 170 | |
| 171 | def test_bad_without_session_fails(self, tmp_path: pathlib.Path) -> None: |
| 172 | root, repo_id = _init_repo(tmp_path) |
| 173 | ids = _make_chain(root, repo_id, 2) |
| 174 | result = runner.invoke(cli, ["bisect", "bad", ids[-1]], env=_env(root)) |
| 175 | assert result.exit_code != 0 |
| 176 | |
| 177 | def test_good_without_session_fails(self, tmp_path: pathlib.Path) -> None: |
| 178 | root, repo_id = _init_repo(tmp_path) |
| 179 | ids = _make_chain(root, repo_id, 2) |
| 180 | result = runner.invoke(cli, ["bisect", "good", ids[0]], env=_env(root)) |
| 181 | assert result.exit_code != 0 |
| 182 | |
| 183 | def test_skip_without_session_fails(self, tmp_path: pathlib.Path) -> None: |
| 184 | root, repo_id = _init_repo(tmp_path) |
| 185 | ids = _make_chain(root, repo_id, 2) |
| 186 | result = runner.invoke(cli, ["bisect", "skip", ids[0]], env=_env(root)) |
| 187 | assert result.exit_code != 0 |
| 188 | |
| 189 | def test_reset_clears_session(self, tmp_path: pathlib.Path) -> None: |
| 190 | root, repo_id = _init_repo(tmp_path) |
| 191 | ids = _make_chain(root, repo_id, 4) |
| 192 | runner.invoke( |
| 193 | cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]], |
| 194 | env=_env(root) |
| 195 | ) |
| 196 | result = runner.invoke(cli, ["bisect", "reset"], env=_env(root), catch_exceptions=False) |
| 197 | assert result.exit_code == 0 |
| 198 | # After reset, bad should fail |
| 199 | result2 = runner.invoke(cli, ["bisect", "bad", ids[-1]], env=_env(root)) |
| 200 | assert result2.exit_code != 0 |
| 201 | |
| 202 | def test_log_shows_entries(self, tmp_path: pathlib.Path) -> None: |
| 203 | root, repo_id = _init_repo(tmp_path) |
| 204 | ids = _make_chain(root, repo_id, 4) |
| 205 | runner.invoke( |
| 206 | cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]], |
| 207 | env=_env(root) |
| 208 | ) |
| 209 | result = runner.invoke(cli, ["bisect", "log"], env=_env(root), catch_exceptions=False) |
| 210 | assert result.exit_code == 0 |
| 211 | |
| 212 | def test_double_start_fails(self, tmp_path: pathlib.Path) -> None: |
| 213 | root, repo_id = _init_repo(tmp_path) |
| 214 | ids = _make_chain(root, repo_id, 4) |
| 215 | runner.invoke( |
| 216 | cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]], |
| 217 | env=_env(root) |
| 218 | ) |
| 219 | result = runner.invoke( |
| 220 | cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]], |
| 221 | env=_env(root) |
| 222 | ) |
| 223 | assert result.exit_code != 0 |
| 224 | assert "already" in result.output.lower() |
| 225 | |
| 226 | def test_bad_invalid_ref_fails(self, tmp_path: pathlib.Path) -> None: |
| 227 | root, repo_id = _init_repo(tmp_path) |
| 228 | ids = _make_chain(root, repo_id, 4) |
| 229 | runner.invoke( |
| 230 | cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]], |
| 231 | env=_env(root) |
| 232 | ) |
| 233 | result = runner.invoke(cli, ["bisect", "bad", "deadbeef"], env=_env(root)) |
| 234 | assert result.exit_code != 0 |
| 235 | |
| 236 | def test_reset_without_session_succeeds(self, tmp_path: pathlib.Path) -> None: |
| 237 | """reset when no session is active should not crash.""" |
| 238 | root, _ = _init_repo(tmp_path) |
| 239 | result = runner.invoke(cli, ["bisect", "reset"], env=_env(root), catch_exceptions=False) |
| 240 | assert result.exit_code == 0 |
| 241 | |
| 242 | def test_log_empty_without_session(self, tmp_path: pathlib.Path) -> None: |
| 243 | root, _ = _init_repo(tmp_path) |
| 244 | result = runner.invoke(cli, ["bisect", "log"], env=_env(root), catch_exceptions=False) |
| 245 | assert result.exit_code == 0 |
| 246 | assert "no bisect" in result.output.lower() or result.output.strip() == "" or "no" in result.output.lower() |
| 247 | |
| 248 | |
| 249 | # --------------------------------------------------------------------------- |
| 250 | # E2E tests |
| 251 | # --------------------------------------------------------------------------- |
| 252 | |
| 253 | |
| 254 | class TestBisectE2E: |
| 255 | def test_full_bisect_workflow_2_commits(self, tmp_path: pathlib.Path) -> None: |
| 256 | """Start → mark good → mark bad → find first bad commit.""" |
| 257 | root, repo_id = _init_repo(tmp_path) |
| 258 | ids = _make_chain(root, repo_id, 2) |
| 259 | good_id, bad_id = ids[0], ids[1] |
| 260 | |
| 261 | runner.invoke( |
| 262 | cli, ["bisect", "start", "--bad", bad_id, "--good", good_id], |
| 263 | env=_env(root) |
| 264 | ) |
| 265 | # With only 2 commits, bisect should already identify bad_id |
| 266 | from muse.core.bisect import get_bisect_log |
| 267 | log = get_bisect_log(root) |
| 268 | assert len(log) >= 1 |
| 269 | |
| 270 | def test_full_bisect_workflow_many_commits(self, tmp_path: pathlib.Path) -> None: |
| 271 | """With a chain of 8 commits, bisect converges without error.""" |
| 272 | root, repo_id = _init_repo(tmp_path) |
| 273 | ids = _make_chain(root, repo_id, 8) |
| 274 | |
| 275 | runner.invoke( |
| 276 | cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]], |
| 277 | env=_env(root), catch_exceptions=False |
| 278 | ) |
| 279 | |
| 280 | from muse.core.bisect import _load_state, is_bisect_active, mark_bad, mark_good |
| 281 | # Simulate binary search: assume the bug was introduced at ids[4] |
| 282 | max_steps = 20 |
| 283 | steps = 0 |
| 284 | done = False |
| 285 | while is_bisect_active(root) and steps < max_steps and not done: |
| 286 | state = _load_state(root) |
| 287 | if state is None: |
| 288 | break |
| 289 | remaining = state.get("remaining", []) |
| 290 | if not remaining: |
| 291 | break |
| 292 | mid = remaining[len(remaining) // 2] |
| 293 | # ids[4] and later are "bad" |
| 294 | if mid in ids[4:]: |
| 295 | result = mark_bad(root, mid) |
| 296 | else: |
| 297 | result = mark_good(root, mid) |
| 298 | done = result.done |
| 299 | steps += 1 |
| 300 | |
| 301 | # Bisect should have converged or be close |
| 302 | assert done or steps < max_steps |
| 303 | |
| 304 | |
| 305 | # --------------------------------------------------------------------------- |
| 306 | # Security tests |
| 307 | # --------------------------------------------------------------------------- |
| 308 | |
| 309 | |
| 310 | class TestBisectSecurity: |
| 311 | def test_ref_with_control_chars_is_rejected(self, tmp_path: pathlib.Path) -> None: |
| 312 | root, repo_id = _init_repo(tmp_path) |
| 313 | ids = _make_chain(root, repo_id, 2) |
| 314 | runner.invoke( |
| 315 | cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]], |
| 316 | env=_env(root) |
| 317 | ) |
| 318 | # Inject control chars in a bad ref |
| 319 | result = runner.invoke(cli, ["bisect", "bad", "\x1b[31minjection\x1b[0m"], env=_env(root)) |
| 320 | assert result.exit_code != 0 |
| 321 | |
| 322 | def test_output_contains_no_ansi_on_invalid_ref(self, tmp_path: pathlib.Path) -> None: |
| 323 | root, repo_id = _init_repo(tmp_path) |
| 324 | ids = _make_chain(root, repo_id, 2) |
| 325 | runner.invoke( |
| 326 | cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]], |
| 327 | env=_env(root) |
| 328 | ) |
| 329 | result = runner.invoke(cli, ["bisect", "bad", "nonexistent-ref\x1b[31m"], env=_env(root)) |
| 330 | assert "\x1b[31m" not in result.output |
| 331 | |
| 332 | |
| 333 | # --------------------------------------------------------------------------- |
| 334 | # Stress tests |
| 335 | # --------------------------------------------------------------------------- |
| 336 | |
| 337 | |
| 338 | class TestBisectStress: |
| 339 | def test_bisect_50_commit_chain(self, tmp_path: pathlib.Path) -> None: |
| 340 | """A 50-commit chain converges within log2(50) + 2 ≈ 8 steps.""" |
| 341 | root, repo_id = _init_repo(tmp_path) |
| 342 | ids = _make_chain(root, repo_id, 50) |
| 343 | bad_start = 25 # regression introduced at index 25 |
| 344 | |
| 345 | result = runner.invoke( |
| 346 | cli, ["bisect", "start", "--bad", ids[-1], "--good", ids[0]], |
| 347 | env=_env(root) |
| 348 | ) |
| 349 | assert result.exit_code == 0 |
| 350 | |
| 351 | from muse.core.bisect import _load_state, is_bisect_active, mark_bad, mark_good |
| 352 | max_steps = 10 # ceil(log2(48)) = 6; allow generous headroom |
| 353 | steps = 0 |
| 354 | done = False |
| 355 | while is_bisect_active(root) and steps < max_steps and not done: |
| 356 | state = _load_state(root) |
| 357 | if state is None: |
| 358 | break |
| 359 | remaining = state.get("remaining", []) |
| 360 | if not remaining: |
| 361 | break |
| 362 | mid = remaining[len(remaining) // 2] |
| 363 | idx = ids.index(mid) if mid in ids else -1 |
| 364 | if idx >= bad_start: |
| 365 | result = mark_bad(root, mid) |
| 366 | else: |
| 367 | result = mark_good(root, mid) |
| 368 | done = result.done |
| 369 | steps += 1 |
| 370 | |
| 371 | assert done or steps < max_steps, f"Bisect failed to converge in {steps} steps" |