cgcardona / muse public
test_stress_object_store.py python
287 lines 10.1 KB
e6786943 feat: upgrade to Python 3.14, drop from __future__ import annotations Gabriel Cardona <cgcardona@gmail.com> 1d ago
1 """Stress tests for the content-addressed object store.
2
3 Exercises:
4 - Write-then-read round-trip for varied payload sizes (1 byte … 10 MB).
5 - Idempotency: writing the same object ID twice is a no-op.
6 - has_object before and after writes.
7 - object_path sharding: first two hex chars as directory.
8 - read_object returns None for absent objects.
9 - restore_object copies bytes faithfully.
10 - write_object_from_path uses copy semantics, not load.
11 - Content integrity: read(write(content)) == content.
12 - Multiple distinct objects coexist without collision.
13 """
14
15 import hashlib
16 import os
17 import pathlib
18 import secrets
19
20 import pytest
21
22 from muse.core.object_store import (
23 has_object,
24 object_path,
25 objects_dir,
26 read_object,
27 restore_object,
28 write_object,
29 write_object_from_path,
30 )
31
32
33 # ---------------------------------------------------------------------------
34 # Helpers
35 # ---------------------------------------------------------------------------
36
37
38 def _sha256(data: bytes) -> str:
39 return hashlib.sha256(data).hexdigest()
40
41
42 @pytest.fixture
43 def repo(tmp_path: pathlib.Path) -> pathlib.Path:
44 (tmp_path / ".muse").mkdir()
45 return tmp_path
46
47
48 # ---------------------------------------------------------------------------
49 # Basic round-trip
50 # ---------------------------------------------------------------------------
51
52
53 class TestRoundTrip:
54 def test_write_then_read_small(self, repo: pathlib.Path) -> None:
55 data = b"hello muse"
56 oid = _sha256(data)
57 write_object(repo, oid, data)
58 assert read_object(repo, oid) == data
59
60 def test_write_then_read_empty(self, repo: pathlib.Path) -> None:
61 data = b""
62 oid = _sha256(data)
63 write_object(repo, oid, data)
64 assert read_object(repo, oid) == data
65
66 def test_write_then_read_single_byte(self, repo: pathlib.Path) -> None:
67 data = b"\x00"
68 oid = _sha256(data)
69 write_object(repo, oid, data)
70 assert read_object(repo, oid) == data
71
72 def test_write_then_read_binary(self, repo: pathlib.Path) -> None:
73 data = bytes(range(256)) * 100
74 oid = _sha256(data)
75 write_object(repo, oid, data)
76 assert read_object(repo, oid) == data
77
78 @pytest.mark.parametrize("size", [1, 100, 4096, 65536, 1_000_000])
79 def test_write_then_read_various_sizes(self, repo: pathlib.Path, size: int) -> None:
80 data = secrets.token_bytes(size)
81 oid = _sha256(data)
82 assert write_object(repo, oid, data) is True
83 assert read_object(repo, oid) == data
84
85 def test_content_integrity(self, repo: pathlib.Path) -> None:
86 """Read back exactly what was written — not a truncated or padded version."""
87 for i in range(20):
88 data = f"object-content-{i}-{'x' * i}".encode()
89 oid = _sha256(data)
90 write_object(repo, oid, data)
91 recovered = read_object(repo, oid)
92 assert recovered == data
93 assert len(recovered) == len(data)
94
95
96 # ---------------------------------------------------------------------------
97 # Idempotency
98 # ---------------------------------------------------------------------------
99
100
101 class TestIdempotency:
102 def test_double_write_returns_false_second_time(self, repo: pathlib.Path) -> None:
103 data = b"idempotent"
104 oid = _sha256(data)
105 assert write_object(repo, oid, data) is True
106 assert write_object(repo, oid, data) is False
107
108 def test_double_write_does_not_corrupt(self, repo: pathlib.Path) -> None:
109 data = b"original content"
110 oid = _sha256(data)
111 write_object(repo, oid, data)
112 # Attempt to overwrite with different content using the same ID — should be silently skipped.
113 write_object(repo, oid, b"different content")
114 assert read_object(repo, oid) == data
115
116 def test_triple_write_stays_stable(self, repo: pathlib.Path) -> None:
117 data = b"triple-write"
118 oid = _sha256(data)
119 for _ in range(3):
120 write_object(repo, oid, data)
121 assert read_object(repo, oid) == data
122
123
124 # ---------------------------------------------------------------------------
125 # has_object
126 # ---------------------------------------------------------------------------
127
128
129 class TestHasObject:
130 def test_absent_before_write(self, repo: pathlib.Path) -> None:
131 oid = _sha256(b"not yet written")
132 assert not has_object(repo, oid)
133
134 def test_present_after_write(self, repo: pathlib.Path) -> None:
135 data = b"present"
136 oid = _sha256(data)
137 write_object(repo, oid, data)
138 assert has_object(repo, oid)
139
140 def test_other_objects_dont_shadow(self, repo: pathlib.Path) -> None:
141 a = b"object-a"
142 b_ = b"object-b"
143 oid_a = _sha256(a)
144 oid_b = _sha256(b_)
145 write_object(repo, oid_a, a)
146 assert has_object(repo, oid_a)
147 assert not has_object(repo, oid_b)
148 write_object(repo, oid_b, b_)
149 assert has_object(repo, oid_b)
150
151
152 # ---------------------------------------------------------------------------
153 # Absent objects
154 # ---------------------------------------------------------------------------
155
156
157 class TestAbsentObjects:
158 def test_read_absent_returns_none(self, repo: pathlib.Path) -> None:
159 fake_oid = "a" * 64
160 assert read_object(repo, fake_oid) is None
161
162 def test_restore_absent_returns_false(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
163 fake_oid = "b" * 64
164 dest = tmp_path / "restored.bin"
165 result = restore_object(repo, fake_oid, dest)
166 assert result is False
167 assert not dest.exists()
168
169 def test_has_object_false_for_random_id(self, repo: pathlib.Path) -> None:
170 for _ in range(10):
171 assert not has_object(repo, secrets.token_hex(32))
172
173
174 # ---------------------------------------------------------------------------
175 # Sharding layout
176 # ---------------------------------------------------------------------------
177
178
179 class TestSharding:
180 def test_object_path_uses_first_two_chars_as_dir(self, repo: pathlib.Path) -> None:
181 oid = "ab" + "c" * 62
182 path = object_path(repo, oid)
183 assert path.parent.name == "ab"
184 assert path.name == "c" * 62
185
186 def test_objects_with_same_prefix_go_to_same_shard(self, repo: pathlib.Path) -> None:
187 oid1 = "ff" + "0" * 62
188 oid2 = "ff" + "1" * 62
189 assert object_path(repo, oid1).parent == object_path(repo, oid2).parent
190
191 def test_objects_with_different_prefix_go_to_different_shards(self, repo: pathlib.Path) -> None:
192 oid1 = "aa" + "x" * 62
193 oid2 = "bb" + "x" * 62
194 assert object_path(repo, oid1).parent != object_path(repo, oid2).parent
195
196 def test_256_shards_can_all_be_created(self, repo: pathlib.Path) -> None:
197 """Write one object per possible shard prefix (00-ff)."""
198 for prefix in [f"{i:02x}" for i in range(256)]:
199 data = f"shard-{prefix}".encode()
200 oid = prefix + _sha256(data)[2:]
201 write_object(repo, oid, data)
202 # Verify all 256 shard dirs exist.
203 shards = [d.name for d in objects_dir(repo).iterdir() if d.is_dir()]
204 assert len(shards) == 256
205
206
207 # ---------------------------------------------------------------------------
208 # write_object_from_path
209 # ---------------------------------------------------------------------------
210
211
212 class TestWriteObjectFromPath:
213 def test_from_path_round_trip(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
214 src = tmp_path / "source.bin"
215 data = b"from-path-content"
216 src.write_bytes(data)
217 oid = _sha256(data)
218 assert write_object_from_path(repo, oid, src) is True
219 assert read_object(repo, oid) == data
220
221 def test_from_path_idempotent(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
222 src = tmp_path / "idem.bin"
223 data = b"idempotent-from-path"
224 src.write_bytes(data)
225 oid = _sha256(data)
226 write_object_from_path(repo, oid, src)
227 assert write_object_from_path(repo, oid, src) is False
228
229
230 # ---------------------------------------------------------------------------
231 # restore_object
232 # ---------------------------------------------------------------------------
233
234
235 class TestRestoreObject:
236 def test_restore_round_trip(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
237 data = b"restore-me"
238 oid = _sha256(data)
239 write_object(repo, oid, data)
240 dest = tmp_path / "sub" / "restored.bin"
241 assert restore_object(repo, oid, dest) is True
242 assert dest.read_bytes() == data
243
244 def test_restore_creates_parent_dirs(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
245 data = b"deep-restore"
246 oid = _sha256(data)
247 write_object(repo, oid, data)
248 dest = tmp_path / "a" / "b" / "c" / "file.bin"
249 restore_object(repo, oid, dest)
250 assert dest.exists()
251
252 def test_restore_large_object_intact(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
253 data = secrets.token_bytes(2_000_000)
254 oid = _sha256(data)
255 write_object(repo, oid, data)
256 dest = tmp_path / "large.bin"
257 restore_object(repo, oid, dest)
258 assert dest.read_bytes() == data
259
260
261 # ---------------------------------------------------------------------------
262 # Multiple distinct objects
263 # ---------------------------------------------------------------------------
264
265
266 class TestMultipleObjects:
267 def test_100_distinct_objects_coexist(self, repo: pathlib.Path) -> None:
268 written: dict[str, bytes] = {}
269 for i in range(100):
270 data = f"payload-{i:03d}-{'z' * i}".encode()
271 oid = _sha256(data)
272 write_object(repo, oid, data)
273 written[oid] = data
274
275 for oid, data in written.items():
276 assert read_object(repo, oid) == data
277
278 def test_all_objects_independently_addressable(self, repo: pathlib.Path) -> None:
279 """Verify no two distinct objects collide in the store."""
280 oids: list[str] = []
281 for i in range(50):
282 data = secrets.token_bytes(64)
283 oid = _sha256(data)
284 write_object(repo, oid, data)
285 oids.append(oid)
286 # All OIDs should be unique (probabilistic but essentially certain).
287 assert len(set(oids)) == 50