cgcardona / muse public
test_stress_object_store.py python
288 lines 10.2 KB
119290fc Add mission-critical stress test suite (9 new files, 1716 tests total) (#76) Gabriel Cardona <cgcardona@gmail.com> 1d ago
1 """Stress tests for the content-addressed object store.
2
3 Exercises:
4 - Write-then-read round-trip for varied payload sizes (1 byte … 10 MB).
5 - Idempotency: writing the same object ID twice is a no-op.
6 - has_object before and after writes.
7 - object_path sharding: first two hex chars as directory.
8 - read_object returns None for absent objects.
9 - restore_object copies bytes faithfully.
10 - write_object_from_path uses copy semantics, not load.
11 - Content integrity: read(write(content)) == content.
12 - Multiple distinct objects coexist without collision.
13 """
14 from __future__ import annotations
15
16 import hashlib
17 import os
18 import pathlib
19 import secrets
20
21 import pytest
22
23 from muse.core.object_store import (
24 has_object,
25 object_path,
26 objects_dir,
27 read_object,
28 restore_object,
29 write_object,
30 write_object_from_path,
31 )
32
33
34 # ---------------------------------------------------------------------------
35 # Helpers
36 # ---------------------------------------------------------------------------
37
38
39 def _sha256(data: bytes) -> str:
40 return hashlib.sha256(data).hexdigest()
41
42
43 @pytest.fixture
44 def repo(tmp_path: pathlib.Path) -> pathlib.Path:
45 (tmp_path / ".muse").mkdir()
46 return tmp_path
47
48
49 # ---------------------------------------------------------------------------
50 # Basic round-trip
51 # ---------------------------------------------------------------------------
52
53
54 class TestRoundTrip:
55 def test_write_then_read_small(self, repo: pathlib.Path) -> None:
56 data = b"hello muse"
57 oid = _sha256(data)
58 write_object(repo, oid, data)
59 assert read_object(repo, oid) == data
60
61 def test_write_then_read_empty(self, repo: pathlib.Path) -> None:
62 data = b""
63 oid = _sha256(data)
64 write_object(repo, oid, data)
65 assert read_object(repo, oid) == data
66
67 def test_write_then_read_single_byte(self, repo: pathlib.Path) -> None:
68 data = b"\x00"
69 oid = _sha256(data)
70 write_object(repo, oid, data)
71 assert read_object(repo, oid) == data
72
73 def test_write_then_read_binary(self, repo: pathlib.Path) -> None:
74 data = bytes(range(256)) * 100
75 oid = _sha256(data)
76 write_object(repo, oid, data)
77 assert read_object(repo, oid) == data
78
79 @pytest.mark.parametrize("size", [1, 100, 4096, 65536, 1_000_000])
80 def test_write_then_read_various_sizes(self, repo: pathlib.Path, size: int) -> None:
81 data = secrets.token_bytes(size)
82 oid = _sha256(data)
83 assert write_object(repo, oid, data) is True
84 assert read_object(repo, oid) == data
85
86 def test_content_integrity(self, repo: pathlib.Path) -> None:
87 """Read back exactly what was written — not a truncated or padded version."""
88 for i in range(20):
89 data = f"object-content-{i}-{'x' * i}".encode()
90 oid = _sha256(data)
91 write_object(repo, oid, data)
92 recovered = read_object(repo, oid)
93 assert recovered == data
94 assert len(recovered) == len(data)
95
96
97 # ---------------------------------------------------------------------------
98 # Idempotency
99 # ---------------------------------------------------------------------------
100
101
102 class TestIdempotency:
103 def test_double_write_returns_false_second_time(self, repo: pathlib.Path) -> None:
104 data = b"idempotent"
105 oid = _sha256(data)
106 assert write_object(repo, oid, data) is True
107 assert write_object(repo, oid, data) is False
108
109 def test_double_write_does_not_corrupt(self, repo: pathlib.Path) -> None:
110 data = b"original content"
111 oid = _sha256(data)
112 write_object(repo, oid, data)
113 # Attempt to overwrite with different content using the same ID — should be silently skipped.
114 write_object(repo, oid, b"different content")
115 assert read_object(repo, oid) == data
116
117 def test_triple_write_stays_stable(self, repo: pathlib.Path) -> None:
118 data = b"triple-write"
119 oid = _sha256(data)
120 for _ in range(3):
121 write_object(repo, oid, data)
122 assert read_object(repo, oid) == data
123
124
125 # ---------------------------------------------------------------------------
126 # has_object
127 # ---------------------------------------------------------------------------
128
129
130 class TestHasObject:
131 def test_absent_before_write(self, repo: pathlib.Path) -> None:
132 oid = _sha256(b"not yet written")
133 assert not has_object(repo, oid)
134
135 def test_present_after_write(self, repo: pathlib.Path) -> None:
136 data = b"present"
137 oid = _sha256(data)
138 write_object(repo, oid, data)
139 assert has_object(repo, oid)
140
141 def test_other_objects_dont_shadow(self, repo: pathlib.Path) -> None:
142 a = b"object-a"
143 b_ = b"object-b"
144 oid_a = _sha256(a)
145 oid_b = _sha256(b_)
146 write_object(repo, oid_a, a)
147 assert has_object(repo, oid_a)
148 assert not has_object(repo, oid_b)
149 write_object(repo, oid_b, b_)
150 assert has_object(repo, oid_b)
151
152
153 # ---------------------------------------------------------------------------
154 # Absent objects
155 # ---------------------------------------------------------------------------
156
157
158 class TestAbsentObjects:
159 def test_read_absent_returns_none(self, repo: pathlib.Path) -> None:
160 fake_oid = "a" * 64
161 assert read_object(repo, fake_oid) is None
162
163 def test_restore_absent_returns_false(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
164 fake_oid = "b" * 64
165 dest = tmp_path / "restored.bin"
166 result = restore_object(repo, fake_oid, dest)
167 assert result is False
168 assert not dest.exists()
169
170 def test_has_object_false_for_random_id(self, repo: pathlib.Path) -> None:
171 for _ in range(10):
172 assert not has_object(repo, secrets.token_hex(32))
173
174
175 # ---------------------------------------------------------------------------
176 # Sharding layout
177 # ---------------------------------------------------------------------------
178
179
180 class TestSharding:
181 def test_object_path_uses_first_two_chars_as_dir(self, repo: pathlib.Path) -> None:
182 oid = "ab" + "c" * 62
183 path = object_path(repo, oid)
184 assert path.parent.name == "ab"
185 assert path.name == "c" * 62
186
187 def test_objects_with_same_prefix_go_to_same_shard(self, repo: pathlib.Path) -> None:
188 oid1 = "ff" + "0" * 62
189 oid2 = "ff" + "1" * 62
190 assert object_path(repo, oid1).parent == object_path(repo, oid2).parent
191
192 def test_objects_with_different_prefix_go_to_different_shards(self, repo: pathlib.Path) -> None:
193 oid1 = "aa" + "x" * 62
194 oid2 = "bb" + "x" * 62
195 assert object_path(repo, oid1).parent != object_path(repo, oid2).parent
196
197 def test_256_shards_can_all_be_created(self, repo: pathlib.Path) -> None:
198 """Write one object per possible shard prefix (00-ff)."""
199 for prefix in [f"{i:02x}" for i in range(256)]:
200 data = f"shard-{prefix}".encode()
201 oid = prefix + _sha256(data)[2:]
202 write_object(repo, oid, data)
203 # Verify all 256 shard dirs exist.
204 shards = [d.name for d in objects_dir(repo).iterdir() if d.is_dir()]
205 assert len(shards) == 256
206
207
208 # ---------------------------------------------------------------------------
209 # write_object_from_path
210 # ---------------------------------------------------------------------------
211
212
213 class TestWriteObjectFromPath:
214 def test_from_path_round_trip(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
215 src = tmp_path / "source.bin"
216 data = b"from-path-content"
217 src.write_bytes(data)
218 oid = _sha256(data)
219 assert write_object_from_path(repo, oid, src) is True
220 assert read_object(repo, oid) == data
221
222 def test_from_path_idempotent(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
223 src = tmp_path / "idem.bin"
224 data = b"idempotent-from-path"
225 src.write_bytes(data)
226 oid = _sha256(data)
227 write_object_from_path(repo, oid, src)
228 assert write_object_from_path(repo, oid, src) is False
229
230
231 # ---------------------------------------------------------------------------
232 # restore_object
233 # ---------------------------------------------------------------------------
234
235
236 class TestRestoreObject:
237 def test_restore_round_trip(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
238 data = b"restore-me"
239 oid = _sha256(data)
240 write_object(repo, oid, data)
241 dest = tmp_path / "sub" / "restored.bin"
242 assert restore_object(repo, oid, dest) is True
243 assert dest.read_bytes() == data
244
245 def test_restore_creates_parent_dirs(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
246 data = b"deep-restore"
247 oid = _sha256(data)
248 write_object(repo, oid, data)
249 dest = tmp_path / "a" / "b" / "c" / "file.bin"
250 restore_object(repo, oid, dest)
251 assert dest.exists()
252
253 def test_restore_large_object_intact(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
254 data = secrets.token_bytes(2_000_000)
255 oid = _sha256(data)
256 write_object(repo, oid, data)
257 dest = tmp_path / "large.bin"
258 restore_object(repo, oid, dest)
259 assert dest.read_bytes() == data
260
261
262 # ---------------------------------------------------------------------------
263 # Multiple distinct objects
264 # ---------------------------------------------------------------------------
265
266
267 class TestMultipleObjects:
268 def test_100_distinct_objects_coexist(self, repo: pathlib.Path) -> None:
269 written: dict[str, bytes] = {}
270 for i in range(100):
271 data = f"payload-{i:03d}-{'z' * i}".encode()
272 oid = _sha256(data)
273 write_object(repo, oid, data)
274 written[oid] = data
275
276 for oid, data in written.items():
277 assert read_object(repo, oid) == data
278
279 def test_all_objects_independently_addressable(self, repo: pathlib.Path) -> None:
280 """Verify no two distinct objects collide in the store."""
281 oids: list[str] = []
282 for i in range(50):
283 data = secrets.token_bytes(64)
284 oid = _sha256(data)
285 write_object(repo, oid, data)
286 oids.append(oid)
287 # All OIDs should be unique (probabilistic but essentially certain).
288 assert len(set(oids)) == 50