cgcardona / muse public
test_stress_merge_correctness.py python
377 lines 14.9 KB
119290fc Add mission-critical stress test suite (9 new files, 1716 tests total) (#76) Gabriel Cardona <cgcardona@gmail.com> 1d ago
1 """Adversarial stress tests for the three-way merge engine.
2
3 Covers:
4 - apply_merge edge cases: both sides delete the same file, theirs-only delete,
5 ours-only add, both add the same file with identical hash (clean).
6 - detect_conflicts: full combinatorial (empty sets, symmetric, one-sided).
7 - diff_snapshots: many files added / removed / modified.
8 - diff_snapshots then detect_conflicts → apply_merge pipeline correctness.
9 - Large manifest diffs (500 paths).
10 - MergeState round-trip with and without optional fields.
11 - Corrupt MERGE_STATE.json is silently ignored (returns None).
12 - apply_resolution raises FileNotFoundError for absent object.
13 """
14 from __future__ import annotations
15
16 import json
17 import pathlib
18 import secrets
19 import hashlib
20 import datetime
21
22 import pytest
23
24 from muse.core.merge_engine import (
25 MergeState,
26 apply_merge,
27 apply_resolution,
28 clear_merge_state,
29 detect_conflicts,
30 diff_snapshots,
31 read_merge_state,
32 write_merge_state,
33 )
34 from muse.core.object_store import write_object
35
36
37 # ---------------------------------------------------------------------------
38 # Helpers
39 # ---------------------------------------------------------------------------
40
41
42 def _h(label: str) -> str:
43 return hashlib.sha256(label.encode()).hexdigest()
44
45
46 @pytest.fixture
47 def repo(tmp_path: pathlib.Path) -> pathlib.Path:
48 muse = tmp_path / ".muse"
49 muse.mkdir()
50 (muse / "objects").mkdir()
51 return tmp_path
52
53
54 # ===========================================================================
55 # diff_snapshots — exhaustive
56 # ===========================================================================
57
58
59 class TestDiffSnapshotsExhaustive:
60 def test_identical_manifests_no_diff(self) -> None:
61 m = {f"file-{i}.mid": _h(f"content-{i}") for i in range(100)}
62 assert diff_snapshots(m, m) == set()
63
64 def test_all_files_added(self) -> None:
65 added = {f"new-{i}.mid": _h(f"new-{i}") for i in range(50)}
66 result = diff_snapshots({}, added)
67 assert result == set(added.keys())
68
69 def test_all_files_removed(self) -> None:
70 original = {f"old-{i}.mid": _h(f"old-{i}") for i in range(50)}
71 result = diff_snapshots(original, {})
72 assert result == set(original.keys())
73
74 def test_all_files_modified(self) -> None:
75 base = {f"f{i}.mid": _h(f"v1-{i}") for i in range(50)}
76 target = {f"f{i}.mid": _h(f"v2-{i}") for i in range(50)}
77 result = diff_snapshots(base, target)
78 assert result == set(base.keys())
79
80 def test_mixed_add_remove_modify(self) -> None:
81 base = {"keep.mid": _h("keep"), "remove.mid": _h("remove"), "modify.mid": _h("old")}
82 target = {"keep.mid": _h("keep"), "add.mid": _h("new"), "modify.mid": _h("new")}
83 result = diff_snapshots(base, target)
84 assert result == {"remove.mid", "add.mid", "modify.mid"}
85 assert "keep.mid" not in result
86
87 def test_500_file_manifest_correct_diff(self) -> None:
88 base = {f"path/to/file-{i:04d}.mid": _h(f"v1-{i}") for i in range(500)}
89 target = dict(base)
90 # Modify 100, add 50, remove 50.
91 modified = set()
92 for i in range(0, 100):
93 key = f"path/to/file-{i:04d}.mid"
94 target[key] = _h(f"v2-{i}")
95 modified.add(key)
96 added = set()
97 for i in range(500, 550):
98 key = f"path/to/new-{i}.mid"
99 target[key] = _h(f"new-{i}")
100 added.add(key)
101 removed = set()
102 for i in range(450, 500):
103 key = f"path/to/file-{i:04d}.mid"
104 del target[key]
105 removed.add(key)
106 result = diff_snapshots(base, target)
107 assert result == modified | added | removed
108
109 def test_symmetric_diff_not_required(self) -> None:
110 """diff_snapshots is not symmetric: order matters."""
111 a = {"f.mid": _h("hash-a")}
112 b = {"f.mid": _h("hash-b")}
113 assert diff_snapshots(a, b) == {"f.mid"}
114 assert diff_snapshots(b, a) == {"f.mid"}
115
116
117 # ===========================================================================
118 # detect_conflicts — exhaustive
119 # ===========================================================================
120
121
122 class TestDetectConflictsExhaustive:
123 def test_empty_both_sides(self) -> None:
124 assert detect_conflicts(set(), set()) == set()
125
126 def test_empty_ours(self) -> None:
127 assert detect_conflicts(set(), {"a.mid", "b.mid"}) == set()
128
129 def test_empty_theirs(self) -> None:
130 assert detect_conflicts({"a.mid", "b.mid"}, set()) == set()
131
132 def test_full_overlap(self) -> None:
133 s = {f"f{i}.mid" for i in range(50)}
134 assert detect_conflicts(s, s) == s
135
136 def test_no_overlap(self) -> None:
137 ours = {f"ours-{i}.mid" for i in range(25)}
138 theirs = {f"theirs-{i}.mid" for i in range(25)}
139 assert detect_conflicts(ours, theirs) == set()
140
141 def test_partial_overlap(self) -> None:
142 ours = {"shared.mid", "only-ours.mid"}
143 theirs = {"shared.mid", "only-theirs.mid"}
144 assert detect_conflicts(ours, theirs) == {"shared.mid"}
145
146 def test_commutativity(self) -> None:
147 a = {f"f{i}" for i in range(30)}
148 b = {f"f{i}" for i in range(20, 50)}
149 assert detect_conflicts(a, b) == detect_conflicts(b, a)
150
151
152 # ===========================================================================
153 # apply_merge — exhaustive
154 # ===========================================================================
155
156
157 class TestApplyMergeExhaustive:
158 def test_both_sides_delete_same_file_not_conflicting(self) -> None:
159 """Both sides delete the same file — no conflict, file absent in merged."""
160 base = {"shared.mid": _h("shared")}
161 ours = {}
162 theirs = {}
163 ours_changed = {"shared.mid"}
164 theirs_changed = {"shared.mid"}
165 # No conflict paths specified (caller decided it's not a conflict).
166 result = apply_merge(base, ours, theirs, ours_changed, theirs_changed, set())
167 assert "shared.mid" not in result
168
169 def test_only_theirs_adds_file(self) -> None:
170 base: dict[str, str] = {}
171 ours: dict[str, str] = {}
172 theirs = {"new.mid": _h("new")}
173 result = apply_merge(base, ours, theirs, set(), {"new.mid"}, set())
174 assert result["new.mid"] == _h("new")
175
176 def test_only_ours_adds_file(self) -> None:
177 base: dict[str, str] = {}
178 theirs: dict[str, str] = {}
179 ours = {"new.mid": _h("ours-new")}
180 result = apply_merge(base, ours, theirs, {"new.mid"}, set(), set())
181 assert result["new.mid"] == _h("ours-new")
182
183 def test_both_add_same_file_same_hash_no_conflict(self) -> None:
184 """Both sides independently add the same file with the same content hash — no conflict."""
185 base: dict[str, str] = {}
186 h = _h("identical-content")
187 ours = {"new.mid": h}
188 theirs = {"new.mid": h}
189 # Caller detects: same hash = no conflict.
190 result = apply_merge(base, ours, theirs, {"new.mid"}, {"new.mid"}, set())
191 assert result["new.mid"] == h
192
193 def test_conflict_path_falls_back_to_base(self) -> None:
194 base = {"conflict.mid": _h("base")}
195 ours = {"conflict.mid": _h("ours")}
196 theirs = {"conflict.mid": _h("theirs")}
197 result = apply_merge(
198 base, ours, theirs,
199 {"conflict.mid"}, {"conflict.mid"}, {"conflict.mid"}
200 )
201 # Conflict paths are excluded → base value is kept.
202 assert result["conflict.mid"] == _h("base")
203
204 def test_theirs_deletion_removes_from_merged(self) -> None:
205 base = {"f.mid": _h("f"), "g.mid": _h("g")}
206 ours = {"f.mid": _h("f"), "g.mid": _h("g")}
207 theirs = {"f.mid": _h("f")} # g.mid deleted on theirs
208 result = apply_merge(base, ours, theirs, set(), {"g.mid"}, set())
209 assert "g.mid" not in result
210
211 def test_unrelated_changes_both_preserved(self) -> None:
212 base = {"a.mid": _h("a0"), "b.mid": _h("b0"), "c.mid": _h("c0")}
213 ours = {"a.mid": _h("a1"), "b.mid": _h("b0"), "c.mid": _h("c0")}
214 theirs = {"a.mid": _h("a0"), "b.mid": _h("b1"), "c.mid": _h("c0")}
215 result = apply_merge(
216 base, ours, theirs, {"a.mid"}, {"b.mid"}, set()
217 )
218 assert result["a.mid"] == _h("a1")
219 assert result["b.mid"] == _h("b1")
220 assert result["c.mid"] == _h("c0")
221
222 def test_large_manifest_clean_merge(self) -> None:
223 """200 files: 100 changed by ours, 100 changed by theirs, no overlap."""
224 base = {f"f{i:03d}.mid": _h(f"v0-{i}") for i in range(200)}
225 ours = dict(base)
226 theirs = dict(base)
227 ours_changed = set()
228 theirs_changed = set()
229 for i in range(100):
230 ours[f"f{i:03d}.mid"] = _h(f"v-ours-{i}")
231 ours_changed.add(f"f{i:03d}.mid")
232 for i in range(100, 200):
233 theirs[f"f{i:03d}.mid"] = _h(f"v-theirs-{i}")
234 theirs_changed.add(f"f{i:03d}.mid")
235 result = apply_merge(base, ours, theirs, ours_changed, theirs_changed, set())
236 for i in range(100):
237 assert result[f"f{i:03d}.mid"] == _h(f"v-ours-{i}")
238 for i in range(100, 200):
239 assert result[f"f{i:03d}.mid"] == _h(f"v-theirs-{i}")
240
241 def test_pipeline_diff_detect_merge(self) -> None:
242 """End-to-end: run diff → detect → apply and verify correctness.
243
244 Scenario:
245 base = {conflict.mid, ours-only.mid, theirs-only.mid, untouched.mid}
246 ours: modifies conflict.mid, deletes ours-only.mid (only ours touches it)
247 theirs: modifies conflict.mid, deletes theirs-only.mid (only theirs touches it)
248
249 Expected results:
250 conflict.mid: bilateral conflict → stays at base value
251 ours-only.mid: deleted only by ours → deleted in merged
252 theirs-only.mid: deleted only by theirs → deleted in merged
253 untouched.mid: neither side changed → stays at base
254 """
255 base = {
256 "conflict.mid": _h("c0"),
257 "ours-only.mid": _h("o0"),
258 "theirs-only.mid": _h("t0"),
259 "untouched.mid": _h("u0"),
260 }
261 # ours: modifies conflict.mid, deletes ours-only.mid, leaves theirs-only and untouched
262 ours = {
263 "conflict.mid": _h("c-ours"),
264 "theirs-only.mid": _h("t0"),
265 "untouched.mid": _h("u0"),
266 }
267 # theirs: modifies conflict.mid, deletes theirs-only.mid, leaves ours-only and untouched
268 theirs = {
269 "conflict.mid": _h("c-theirs"),
270 "ours-only.mid": _h("o0"),
271 "untouched.mid": _h("u0"),
272 }
273
274 ours_changed = diff_snapshots(base, ours)
275 theirs_changed = diff_snapshots(base, theirs)
276 conflicts = detect_conflicts(ours_changed, theirs_changed)
277
278 result = apply_merge(base, ours, theirs, ours_changed, theirs_changed, conflicts)
279
280 # conflict.mid: both sides changed → stays at base (excluded from result but key present from base).
281 assert result["conflict.mid"] == _h("c0")
282 # ours-only.mid: deleted by ours only → absent in merged.
283 assert "ours-only.mid" not in result
284 # theirs-only.mid: deleted by theirs only → absent in merged.
285 assert "theirs-only.mid" not in result
286 # untouched.mid: neither side touched → stays at base.
287 assert result["untouched.mid"] == _h("u0")
288
289
290 # ===========================================================================
291 # MergeState I/O — adversarial
292 # ===========================================================================
293
294
295 class TestMergeStateIOAdversarial:
296 def test_conflict_paths_sorted_on_write(self, repo: pathlib.Path) -> None:
297 write_merge_state(
298 repo, base_commit="b", ours_commit="o", theirs_commit="t",
299 conflict_paths=["z.mid", "a.mid", "m.mid"],
300 )
301 state = read_merge_state(repo)
302 assert state is not None
303 assert state.conflict_paths == ["a.mid", "m.mid", "z.mid"]
304
305 def test_optional_other_branch_absent(self, repo: pathlib.Path) -> None:
306 write_merge_state(
307 repo, base_commit="b", ours_commit="o", theirs_commit="t",
308 conflict_paths=[],
309 )
310 state = read_merge_state(repo)
311 assert state is not None
312 assert state.other_branch is None
313
314 def test_corrupt_json_returns_none(self, repo: pathlib.Path) -> None:
315 path = repo / ".muse" / "MERGE_STATE.json"
316 path.write_text("{not valid json")
317 assert read_merge_state(repo) is None
318
319 def test_empty_json_returns_none_gracefully(self, repo: pathlib.Path) -> None:
320 path = repo / ".muse" / "MERGE_STATE.json"
321 path.write_text("")
322 assert read_merge_state(repo) is None
323
324 def test_missing_file_returns_none(self, repo: pathlib.Path) -> None:
325 assert read_merge_state(repo) is None
326
327 def test_clear_idempotent(self, repo: pathlib.Path) -> None:
328 # Clearing when no state file exists should not raise.
329 clear_merge_state(repo)
330 clear_merge_state(repo)
331
332 def test_write_overwrite_previous(self, repo: pathlib.Path) -> None:
333 write_merge_state(repo, base_commit="b1", ours_commit="o1", theirs_commit="t1", conflict_paths=["a.mid"])
334 write_merge_state(repo, base_commit="b2", ours_commit="o2", theirs_commit="t2", conflict_paths=["b.mid"])
335 state = read_merge_state(repo)
336 assert state is not None
337 assert state.base_commit == "b2"
338 assert state.conflict_paths == ["b.mid"]
339
340 def test_100_conflict_paths_round_trip(self, repo: pathlib.Path) -> None:
341 paths = [f"track-{i:03d}.mid" for i in range(100)]
342 write_merge_state(repo, base_commit="b", ours_commit="o", theirs_commit="t", conflict_paths=paths)
343 state = read_merge_state(repo)
344 assert state is not None
345 assert state.conflict_paths == sorted(paths)
346
347 def test_merge_state_is_frozen_dataclass(self) -> None:
348 ms = MergeState(conflict_paths=["a.mid"], base_commit="b")
349 with pytest.raises((AttributeError, TypeError)):
350 ms.__setattr__("base_commit", "new")
351
352
353 # ===========================================================================
354 # apply_resolution
355 # ===========================================================================
356
357
358 class TestApplyResolution:
359 def test_resolution_restores_correct_content(self, repo: pathlib.Path) -> None:
360 data = b"resolved content"
361 oid = hashlib.sha256(data).hexdigest()
362 write_object(repo, oid, data)
363 (repo / "muse-work").mkdir()
364 apply_resolution(repo, "beat.mid", oid)
365 restored = (repo / "muse-work" / "beat.mid").read_bytes()
366 assert restored == data
367
368 def test_resolution_creates_nested_dirs(self, repo: pathlib.Path) -> None:
369 data = b"nested file"
370 oid = hashlib.sha256(data).hexdigest()
371 write_object(repo, oid, data)
372 apply_resolution(repo, "sub/dir/beat.mid", oid)
373 assert (repo / "muse-work" / "sub" / "dir" / "beat.mid").read_bytes() == data
374
375 def test_resolution_missing_object_raises(self, repo: pathlib.Path) -> None:
376 with pytest.raises(FileNotFoundError):
377 apply_resolution(repo, "beat.mid", "a" * 64)