gabriel / muse public
test_plumbing_hash_object.py python
229 lines 9.2 KB
99746394 feat(tests+docs): supercharge plumbing test suite and update reference doc Gabriel Cardona <gabriel@tellurstori.com> 2d ago
1 """Tests for ``muse plumbing hash-object``.
2
3 Covers: SHA-256 correctness, ``--write`` flag storage, streaming safety,
4 idempotent writes, error cases (missing path, directory path, bad format,
5 no-repo write), text-format output, and a stress case with a 2 MiB file.
6 """
7
8 from __future__ import annotations
9
10 import hashlib
11 import json
12 import pathlib
13
14 import pytest
15 from typer.testing import CliRunner
16
17 from muse.cli.app import cli
18 from muse.core.errors import ExitCode
19 from muse.core.object_store import has_object, object_path
20
21 runner = CliRunner()
22
23
24 # ---------------------------------------------------------------------------
25 # Helpers
26 # ---------------------------------------------------------------------------
27
28
29 def _sha(data: bytes) -> str:
30 return hashlib.sha256(data).hexdigest()
31
32
33 def _init_repo(path: pathlib.Path) -> pathlib.Path:
34 muse = path / ".muse"
35 (muse / "commits").mkdir(parents=True)
36 (muse / "snapshots").mkdir(parents=True)
37 (muse / "objects").mkdir(parents=True)
38 (muse / "refs" / "heads").mkdir(parents=True)
39 (muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
40 (muse / "repo.json").write_text(
41 json.dumps({"repo_id": "test-repo", "domain": "midi"}), encoding="utf-8"
42 )
43 return path
44
45
46 def _env(repo: pathlib.Path) -> dict[str, str]:
47 return {"MUSE_REPO_ROOT": str(repo)}
48
49
50 def _file(tmp: pathlib.Path, name: str, content: bytes) -> pathlib.Path:
51 p = tmp / name
52 p.write_bytes(content)
53 return p
54
55
56 # ---------------------------------------------------------------------------
57 # Unit: SHA-256 correctness
58 # ---------------------------------------------------------------------------
59
60
61 class TestHashObjectUnit:
62 def test_known_sha256_matches(self, tmp_path: pathlib.Path) -> None:
63 content = b"hello muse"
64 expected = _sha(content)
65 f = _file(tmp_path, "sample.mid", content)
66 repo = _init_repo(tmp_path / "repo")
67 result = runner.invoke(
68 cli, ["plumbing", "hash-object", "--format", "text", str(f)], env=_env(repo)
69 )
70 assert result.exit_code == 0, result.output
71 assert result.stdout.strip() == expected
72
73 def test_json_output_has_object_id_and_stored_false(self, tmp_path: pathlib.Path) -> None:
74 content = b"json output"
75 f = _file(tmp_path, "x.mid", content)
76 repo = _init_repo(tmp_path / "repo")
77 result = runner.invoke(cli, ["plumbing", "hash-object", str(f)], env=_env(repo))
78 assert result.exit_code == 0, result.output
79 data = json.loads(result.stdout)
80 assert data["object_id"] == _sha(content)
81 assert data["stored"] is False
82
83 def test_different_content_yields_different_id(self, tmp_path: pathlib.Path) -> None:
84 f1 = _file(tmp_path, "a.mid", b"aaa")
85 f2 = _file(tmp_path, "b.mid", b"bbb")
86 repo = _init_repo(tmp_path / "repo")
87 r1 = runner.invoke(cli, ["plumbing", "hash-object", "--format", "text", str(f1)], env=_env(repo))
88 r2 = runner.invoke(cli, ["plumbing", "hash-object", "--format", "text", str(f2)], env=_env(repo))
89 assert r1.stdout.strip() != r2.stdout.strip()
90
91 def test_empty_file_has_deterministic_sha256(self, tmp_path: pathlib.Path) -> None:
92 f = _file(tmp_path, "empty.mid", b"")
93 expected = _sha(b"")
94 repo = _init_repo(tmp_path / "repo")
95 result = runner.invoke(cli, ["plumbing", "hash-object", "--format", "text", str(f)], env=_env(repo))
96 assert result.exit_code == 0
97 assert result.stdout.strip() == expected
98
99
100 # ---------------------------------------------------------------------------
101 # Integration: --write flag
102 # ---------------------------------------------------------------------------
103
104
105 class TestHashObjectWrite:
106 def test_write_stores_object_in_object_store(self, tmp_path: pathlib.Path) -> None:
107 content = b"write me"
108 f = _file(tmp_path, "w.mid", content)
109 repo = _init_repo(tmp_path / "repo")
110 result = runner.invoke(cli, ["plumbing", "hash-object", "--write", str(f)], env=_env(repo))
111 assert result.exit_code == 0, result.output
112 data = json.loads(result.stdout)
113 assert data["stored"] is True
114 assert has_object(repo, data["object_id"])
115
116 def test_write_stores_correct_bytes(self, tmp_path: pathlib.Path) -> None:
117 content = b"check round-trip"
118 f = _file(tmp_path, "rt.mid", content)
119 repo = _init_repo(tmp_path / "repo")
120 result = runner.invoke(cli, ["plumbing", "hash-object", "--write", str(f)], env=_env(repo))
121 assert result.exit_code == 0
122 oid = json.loads(result.stdout)["object_id"]
123 stored = object_path(repo, oid).read_bytes()
124 assert stored == content
125
126 def test_write_idempotent_second_call(self, tmp_path: pathlib.Path) -> None:
127 content = b"idempotent"
128 f = _file(tmp_path, "i.mid", content)
129 repo = _init_repo(tmp_path / "repo")
130 args = ["plumbing", "hash-object", "--write", str(f)]
131 r1 = runner.invoke(cli, args, env=_env(repo))
132 r2 = runner.invoke(cli, args, env=_env(repo))
133 assert r1.exit_code == 0 and r2.exit_code == 0
134 assert json.loads(r1.stdout)["object_id"] == json.loads(r2.stdout)["object_id"]
135
136 def test_write_without_repo_exits_user_error(self, tmp_path: pathlib.Path) -> None:
137 f = _file(tmp_path, "norepo.mid", b"data")
138 # No MUSE_REPO_ROOT, no .muse directory — should fail.
139 result = runner.invoke(
140 cli, ["plumbing", "hash-object", "--write", str(f)],
141 env={"MUSE_REPO_ROOT": str(tmp_path / "nonexistent")}
142 )
143 assert result.exit_code != 0
144
145 def test_short_write_flag_works(self, tmp_path: pathlib.Path) -> None:
146 content = b"short flag"
147 f = _file(tmp_path, "sf.mid", content)
148 repo = _init_repo(tmp_path / "repo")
149 result = runner.invoke(cli, ["plumbing", "hash-object", "-w", str(f)], env=_env(repo))
150 assert result.exit_code == 0
151 assert json.loads(result.stdout)["stored"] is True
152
153
154 # ---------------------------------------------------------------------------
155 # Integration: error cases
156 # ---------------------------------------------------------------------------
157
158
159 class TestHashObjectErrors:
160 def test_nonexistent_path_exits_user_error(self, tmp_path: pathlib.Path) -> None:
161 repo = _init_repo(tmp_path / "repo")
162 result = runner.invoke(
163 cli, ["plumbing", "hash-object", str(tmp_path / "missing.mid")], env=_env(repo)
164 )
165 assert result.exit_code == ExitCode.USER_ERROR
166
167 def test_directory_path_exits_user_error(self, tmp_path: pathlib.Path) -> None:
168 repo = _init_repo(tmp_path / "repo")
169 d = tmp_path / "subdir"
170 d.mkdir()
171 result = runner.invoke(cli, ["plumbing", "hash-object", str(d)], env=_env(repo))
172 assert result.exit_code == ExitCode.USER_ERROR
173
174 def test_bad_format_exits_user_error(self, tmp_path: pathlib.Path) -> None:
175 f = _file(tmp_path, "f.mid", b"data")
176 repo = _init_repo(tmp_path / "repo")
177 result = runner.invoke(
178 cli, ["plumbing", "hash-object", "--format", "yaml", str(f)], env=_env(repo)
179 )
180 assert result.exit_code == ExitCode.USER_ERROR
181
182 def test_short_format_flag_works(self, tmp_path: pathlib.Path) -> None:
183 f = _file(tmp_path, "g.mid", b"data")
184 repo = _init_repo(tmp_path / "repo")
185 result = runner.invoke(cli, ["plumbing", "hash-object", "-f", "text", str(f)], env=_env(repo))
186 assert result.exit_code == 0
187 assert len(result.stdout.strip()) == 64
188
189
190 # ---------------------------------------------------------------------------
191 # Stress: 2 MiB file handled without heap explosion
192 # ---------------------------------------------------------------------------
193
194
195 class TestHashObjectStress:
196 def test_large_file_hashes_correctly(self, tmp_path: pathlib.Path) -> None:
197 content = b"X" * (2 * 1024 * 1024) # 2 MiB
198 expected = _sha(content)
199 f = _file(tmp_path, "big.bin", content)
200 repo = _init_repo(tmp_path / "repo")
201 result = runner.invoke(
202 cli, ["plumbing", "hash-object", "--format", "text", str(f)], env=_env(repo)
203 )
204 assert result.exit_code == 0
205 assert result.stdout.strip() == expected
206
207 def test_large_file_write_round_trip(self, tmp_path: pathlib.Path) -> None:
208 content = bytes(range(256)) * 4096 # 1 MiB of varied bytes
209 f = _file(tmp_path, "varied.bin", content)
210 repo = _init_repo(tmp_path / "repo")
211 result = runner.invoke(
212 cli, ["plumbing", "hash-object", "--write", str(f)], env=_env(repo)
213 )
214 assert result.exit_code == 0
215 oid = json.loads(result.stdout)["object_id"]
216 assert object_path(repo, oid).read_bytes() == content
217
218 def test_100_distinct_files_all_unique_ids(self, tmp_path: pathlib.Path) -> None:
219 repo = _init_repo(tmp_path / "repo")
220 ids: set[str] = set()
221 for i in range(100):
222 content = f"file-content-{i}".encode()
223 f = _file(tmp_path, f"f{i}.mid", content)
224 result = runner.invoke(
225 cli, ["plumbing", "hash-object", "--format", "text", str(f)], env=_env(repo)
226 )
227 assert result.exit_code == 0
228 ids.add(result.stdout.strip())
229 assert len(ids) == 100