tests/test_plumbing_hash_object.py · gabriel/muse

test_plumbing_hash_object.py python

229 lines 9.2 KB

99746394 feat(tests+docs): supercharge plumbing test suite and update reference doc Gabriel Cardona <gabriel@tellurstori.com> 2d ago

1	"""Tests for ``muse plumbing hash-object``.
2
3	Covers: SHA-256 correctness, ``--write`` flag storage, streaming safety,
4	idempotent writes, error cases (missing path, directory path, bad format,
5	no-repo write), text-format output, and a stress case with a 2 MiB file.
6	"""
7
8	from __future__ import annotations
9
10	import hashlib
11	import json
12	import pathlib
13
14	import pytest
15	from typer.testing import CliRunner
16
17	from muse.cli.app import cli
18	from muse.core.errors import ExitCode
19	from muse.core.object_store import has_object, object_path
20
21	runner = CliRunner()
22
23
24	# ---------------------------------------------------------------------------
25	# Helpers
26	# ---------------------------------------------------------------------------
27
28
29	def _sha(data: bytes) -> str:
30	return hashlib.sha256(data).hexdigest()
31
32
33	def _init_repo(path: pathlib.Path) -> pathlib.Path:
34	muse = path / ".muse"
35	(muse / "commits").mkdir(parents=True)
36	(muse / "snapshots").mkdir(parents=True)
37	(muse / "objects").mkdir(parents=True)
38	(muse / "refs" / "heads").mkdir(parents=True)
39	(muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
40	(muse / "repo.json").write_text(
41	json.dumps({"repo_id": "test-repo", "domain": "midi"}), encoding="utf-8"
42	)
43	return path
44
45
46	def _env(repo: pathlib.Path) -> dict[str, str]:
47	return {"MUSE_REPO_ROOT": str(repo)}
48
49
50	def _file(tmp: pathlib.Path, name: str, content: bytes) -> pathlib.Path:
51	p = tmp / name
52	p.write_bytes(content)
53	return p
54
55
56	# ---------------------------------------------------------------------------
57	# Unit: SHA-256 correctness
58	# ---------------------------------------------------------------------------
59
60
61	class TestHashObjectUnit:
62	def test_known_sha256_matches(self, tmp_path: pathlib.Path) -> None:
63	content = b"hello muse"
64	expected = _sha(content)
65	f = _file(tmp_path, "sample.mid", content)
66	repo = _init_repo(tmp_path / "repo")
67	result = runner.invoke(
68	cli, ["plumbing", "hash-object", "--format", "text", str(f)], env=_env(repo)
69	)
70	assert result.exit_code == 0, result.output
71	assert result.stdout.strip() == expected
72
73	def test_json_output_has_object_id_and_stored_false(self, tmp_path: pathlib.Path) -> None:
74	content = b"json output"
75	f = _file(tmp_path, "x.mid", content)
76	repo = _init_repo(tmp_path / "repo")
77	result = runner.invoke(cli, ["plumbing", "hash-object", str(f)], env=_env(repo))
78	assert result.exit_code == 0, result.output
79	data = json.loads(result.stdout)
80	assert data["object_id"] == _sha(content)
81	assert data["stored"] is False
82
83	def test_different_content_yields_different_id(self, tmp_path: pathlib.Path) -> None:
84	f1 = _file(tmp_path, "a.mid", b"aaa")
85	f2 = _file(tmp_path, "b.mid", b"bbb")
86	repo = _init_repo(tmp_path / "repo")
87	r1 = runner.invoke(cli, ["plumbing", "hash-object", "--format", "text", str(f1)], env=_env(repo))
88	r2 = runner.invoke(cli, ["plumbing", "hash-object", "--format", "text", str(f2)], env=_env(repo))
89	assert r1.stdout.strip() != r2.stdout.strip()
90
91	def test_empty_file_has_deterministic_sha256(self, tmp_path: pathlib.Path) -> None:
92	f = _file(tmp_path, "empty.mid", b"")
93	expected = _sha(b"")
94	repo = _init_repo(tmp_path / "repo")
95	result = runner.invoke(cli, ["plumbing", "hash-object", "--format", "text", str(f)], env=_env(repo))
96	assert result.exit_code == 0
97	assert result.stdout.strip() == expected
98
99
100	# ---------------------------------------------------------------------------
101	# Integration: --write flag
102	# ---------------------------------------------------------------------------
103
104
105	class TestHashObjectWrite:
106	def test_write_stores_object_in_object_store(self, tmp_path: pathlib.Path) -> None:
107	content = b"write me"
108	f = _file(tmp_path, "w.mid", content)
109	repo = _init_repo(tmp_path / "repo")
110	result = runner.invoke(cli, ["plumbing", "hash-object", "--write", str(f)], env=_env(repo))
111	assert result.exit_code == 0, result.output
112	data = json.loads(result.stdout)
113	assert data["stored"] is True
114	assert has_object(repo, data["object_id"])
115
116	def test_write_stores_correct_bytes(self, tmp_path: pathlib.Path) -> None:
117	content = b"check round-trip"
118	f = _file(tmp_path, "rt.mid", content)
119	repo = _init_repo(tmp_path / "repo")
120	result = runner.invoke(cli, ["plumbing", "hash-object", "--write", str(f)], env=_env(repo))
121	assert result.exit_code == 0
122	oid = json.loads(result.stdout)["object_id"]
123	stored = object_path(repo, oid).read_bytes()
124	assert stored == content
125
126	def test_write_idempotent_second_call(self, tmp_path: pathlib.Path) -> None:
127	content = b"idempotent"
128	f = _file(tmp_path, "i.mid", content)
129	repo = _init_repo(tmp_path / "repo")
130	args = ["plumbing", "hash-object", "--write", str(f)]
131	r1 = runner.invoke(cli, args, env=_env(repo))
132	r2 = runner.invoke(cli, args, env=_env(repo))
133	assert r1.exit_code == 0 and r2.exit_code == 0
134	assert json.loads(r1.stdout)["object_id"] == json.loads(r2.stdout)["object_id"]
135
136	def test_write_without_repo_exits_user_error(self, tmp_path: pathlib.Path) -> None:
137	f = _file(tmp_path, "norepo.mid", b"data")
138	# No MUSE_REPO_ROOT, no .muse directory — should fail.
139	result = runner.invoke(
140	cli, ["plumbing", "hash-object", "--write", str(f)],
141	env={"MUSE_REPO_ROOT": str(tmp_path / "nonexistent")}
142	)
143	assert result.exit_code != 0
144
145	def test_short_write_flag_works(self, tmp_path: pathlib.Path) -> None:
146	content = b"short flag"
147	f = _file(tmp_path, "sf.mid", content)
148	repo = _init_repo(tmp_path / "repo")
149	result = runner.invoke(cli, ["plumbing", "hash-object", "-w", str(f)], env=_env(repo))
150	assert result.exit_code == 0
151	assert json.loads(result.stdout)["stored"] is True
152
153
154	# ---------------------------------------------------------------------------
155	# Integration: error cases
156	# ---------------------------------------------------------------------------
157
158
159	class TestHashObjectErrors:
160	def test_nonexistent_path_exits_user_error(self, tmp_path: pathlib.Path) -> None:
161	repo = _init_repo(tmp_path / "repo")
162	result = runner.invoke(
163	cli, ["plumbing", "hash-object", str(tmp_path / "missing.mid")], env=_env(repo)
164	)
165	assert result.exit_code == ExitCode.USER_ERROR
166
167	def test_directory_path_exits_user_error(self, tmp_path: pathlib.Path) -> None:
168	repo = _init_repo(tmp_path / "repo")
169	d = tmp_path / "subdir"
170	d.mkdir()
171	result = runner.invoke(cli, ["plumbing", "hash-object", str(d)], env=_env(repo))
172	assert result.exit_code == ExitCode.USER_ERROR
173
174	def test_bad_format_exits_user_error(self, tmp_path: pathlib.Path) -> None:
175	f = _file(tmp_path, "f.mid", b"data")
176	repo = _init_repo(tmp_path / "repo")
177	result = runner.invoke(
178	cli, ["plumbing", "hash-object", "--format", "yaml", str(f)], env=_env(repo)
179	)
180	assert result.exit_code == ExitCode.USER_ERROR
181
182	def test_short_format_flag_works(self, tmp_path: pathlib.Path) -> None:
183	f = _file(tmp_path, "g.mid", b"data")
184	repo = _init_repo(tmp_path / "repo")
185	result = runner.invoke(cli, ["plumbing", "hash-object", "-f", "text", str(f)], env=_env(repo))
186	assert result.exit_code == 0
187	assert len(result.stdout.strip()) == 64
188
189
190	# ---------------------------------------------------------------------------
191	# Stress: 2 MiB file handled without heap explosion
192	# ---------------------------------------------------------------------------
193
194
195	class TestHashObjectStress:
196	def test_large_file_hashes_correctly(self, tmp_path: pathlib.Path) -> None:
197	content = b"X" * (2 * 1024 * 1024) # 2 MiB
198	expected = _sha(content)
199	f = _file(tmp_path, "big.bin", content)
200	repo = _init_repo(tmp_path / "repo")
201	result = runner.invoke(
202	cli, ["plumbing", "hash-object", "--format", "text", str(f)], env=_env(repo)
203	)
204	assert result.exit_code == 0
205	assert result.stdout.strip() == expected
206
207	def test_large_file_write_round_trip(self, tmp_path: pathlib.Path) -> None:
208	content = bytes(range(256)) * 4096 # 1 MiB of varied bytes
209	f = _file(tmp_path, "varied.bin", content)
210	repo = _init_repo(tmp_path / "repo")
211	result = runner.invoke(
212	cli, ["plumbing", "hash-object", "--write", str(f)], env=_env(repo)
213	)
214	assert result.exit_code == 0
215	oid = json.loads(result.stdout)["object_id"]
216	assert object_path(repo, oid).read_bytes() == content
217
218	def test_100_distinct_files_all_unique_ids(self, tmp_path: pathlib.Path) -> None:
219	repo = _init_repo(tmp_path / "repo")
220	ids: set[str] = set()
221	for i in range(100):
222	content = f"file-content-{i}".encode()
223	f = _file(tmp_path, f"f{i}.mid", content)
224	result = runner.invoke(
225	cli, ["plumbing", "hash-object", "--format", "text", str(f)], env=_env(repo)
226	)
227	assert result.exit_code == 0
228	ids.add(result.stdout.strip())
229	assert len(ids) == 100

Content Address

Object ID (SHA-256)

d482822ba9c9a764621d2f94c6f31cb0d25c95dc08fff640e4a8637377d7c2cb

This file is immutable and content-addressed. The same SHA always refers to the same bytes, across every clone and every time.

File Info

Path tests/test_plumbing_hash_object.py

Lines 229

Size 9.2 KB

Language python

Ref a44ac734

Snapshot 5380f2421dc5…

Last Modified

99746394

feat(tests+docs): supercharge plumbing test suite and update reference doc

Gabriel Cardona <gabriel@tellurstori.com> 2d ago

View commit →

Links

Browse tree at a44ac734 All commits View raw