gabriel / muse public
test_plumbing_hash_object.py python
229 lines 9.3 KB
86000da9 fix: replace typer CliRunner with argparse-compatible test helper Gabriel Cardona <gabriel@tellurstori.com> 1d ago
1 """Tests for ``muse plumbing hash-object``.
2
3 Covers: SHA-256 correctness, ``--write`` flag storage, streaming safety,
4 idempotent writes, error cases (missing path, directory path, bad format,
5 no-repo write), text-format output, and a stress case with a 2 MiB file.
6 """
7
8 from __future__ import annotations
9
10 import hashlib
11 import json
12 import pathlib
13
14 import pytest
15 from tests.cli_test_helper import CliRunner
16
17 cli = None # argparse migration — CliRunner ignores this arg
18 from muse.core.errors import ExitCode
19 from muse.core.object_store import has_object, object_path
20
21 runner = CliRunner()
22
23
24 # ---------------------------------------------------------------------------
25 # Helpers
26 # ---------------------------------------------------------------------------
27
28
29 def _sha(data: bytes) -> str:
30 return hashlib.sha256(data).hexdigest()
31
32
33 def _init_repo(path: pathlib.Path) -> pathlib.Path:
34 muse = path / ".muse"
35 (muse / "commits").mkdir(parents=True)
36 (muse / "snapshots").mkdir(parents=True)
37 (muse / "objects").mkdir(parents=True)
38 (muse / "refs" / "heads").mkdir(parents=True)
39 (muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
40 (muse / "repo.json").write_text(
41 json.dumps({"repo_id": "test-repo", "domain": "midi"}), encoding="utf-8"
42 )
43 return path
44
45
46 def _env(repo: pathlib.Path) -> dict[str, str]:
47 return {"MUSE_REPO_ROOT": str(repo)}
48
49
50 def _file(tmp: pathlib.Path, name: str, content: bytes) -> pathlib.Path:
51 p = tmp / name
52 p.write_bytes(content)
53 return p
54
55
56 # ---------------------------------------------------------------------------
57 # Unit: SHA-256 correctness
58 # ---------------------------------------------------------------------------
59
60
61 class TestHashObjectUnit:
62 def test_known_sha256_matches(self, tmp_path: pathlib.Path) -> None:
63 content = b"hello muse"
64 expected = _sha(content)
65 f = _file(tmp_path, "sample.mid", content)
66 repo = _init_repo(tmp_path / "repo")
67 result = runner.invoke(
68 cli, ["plumbing", "hash-object", "--format", "text", str(f)], env=_env(repo)
69 )
70 assert result.exit_code == 0, result.output
71 assert result.stdout.strip() == expected
72
73 def test_json_output_has_object_id_and_stored_false(self, tmp_path: pathlib.Path) -> None:
74 content = b"json output"
75 f = _file(tmp_path, "x.mid", content)
76 repo = _init_repo(tmp_path / "repo")
77 result = runner.invoke(cli, ["plumbing", "hash-object", str(f)], env=_env(repo))
78 assert result.exit_code == 0, result.output
79 data = json.loads(result.stdout)
80 assert data["object_id"] == _sha(content)
81 assert data["stored"] is False
82
83 def test_different_content_yields_different_id(self, tmp_path: pathlib.Path) -> None:
84 f1 = _file(tmp_path, "a.mid", b"aaa")
85 f2 = _file(tmp_path, "b.mid", b"bbb")
86 repo = _init_repo(tmp_path / "repo")
87 r1 = runner.invoke(cli, ["plumbing", "hash-object", "--format", "text", str(f1)], env=_env(repo))
88 r2 = runner.invoke(cli, ["plumbing", "hash-object", "--format", "text", str(f2)], env=_env(repo))
89 assert r1.stdout.strip() != r2.stdout.strip()
90
91 def test_empty_file_has_deterministic_sha256(self, tmp_path: pathlib.Path) -> None:
92 f = _file(tmp_path, "empty.mid", b"")
93 expected = _sha(b"")
94 repo = _init_repo(tmp_path / "repo")
95 result = runner.invoke(cli, ["plumbing", "hash-object", "--format", "text", str(f)], env=_env(repo))
96 assert result.exit_code == 0
97 assert result.stdout.strip() == expected
98
99
100 # ---------------------------------------------------------------------------
101 # Integration: --write flag
102 # ---------------------------------------------------------------------------
103
104
105 class TestHashObjectWrite:
106 def test_write_stores_object_in_object_store(self, tmp_path: pathlib.Path) -> None:
107 content = b"write me"
108 f = _file(tmp_path, "w.mid", content)
109 repo = _init_repo(tmp_path / "repo")
110 result = runner.invoke(cli, ["plumbing", "hash-object", "--write", str(f)], env=_env(repo))
111 assert result.exit_code == 0, result.output
112 data = json.loads(result.stdout)
113 assert data["stored"] is True
114 assert has_object(repo, data["object_id"])
115
116 def test_write_stores_correct_bytes(self, tmp_path: pathlib.Path) -> None:
117 content = b"check round-trip"
118 f = _file(tmp_path, "rt.mid", content)
119 repo = _init_repo(tmp_path / "repo")
120 result = runner.invoke(cli, ["plumbing", "hash-object", "--write", str(f)], env=_env(repo))
121 assert result.exit_code == 0
122 oid = json.loads(result.stdout)["object_id"]
123 stored = object_path(repo, oid).read_bytes()
124 assert stored == content
125
126 def test_write_idempotent_second_call(self, tmp_path: pathlib.Path) -> None:
127 content = b"idempotent"
128 f = _file(tmp_path, "i.mid", content)
129 repo = _init_repo(tmp_path / "repo")
130 args = ["plumbing", "hash-object", "--write", str(f)]
131 r1 = runner.invoke(cli, args, env=_env(repo))
132 r2 = runner.invoke(cli, args, env=_env(repo))
133 assert r1.exit_code == 0 and r2.exit_code == 0
134 assert json.loads(r1.stdout)["object_id"] == json.loads(r2.stdout)["object_id"]
135
136 def test_write_without_repo_exits_user_error(self, tmp_path: pathlib.Path) -> None:
137 f = _file(tmp_path, "norepo.mid", b"data")
138 # No MUSE_REPO_ROOT, no .muse directory — should fail.
139 result = runner.invoke(
140 cli, ["plumbing", "hash-object", "--write", str(f)],
141 env={"MUSE_REPO_ROOT": str(tmp_path / "nonexistent")}
142 )
143 assert result.exit_code != 0
144
145 def test_short_write_flag_works(self, tmp_path: pathlib.Path) -> None:
146 content = b"short flag"
147 f = _file(tmp_path, "sf.mid", content)
148 repo = _init_repo(tmp_path / "repo")
149 result = runner.invoke(cli, ["plumbing", "hash-object", "-w", str(f)], env=_env(repo))
150 assert result.exit_code == 0
151 assert json.loads(result.stdout)["stored"] is True
152
153
154 # ---------------------------------------------------------------------------
155 # Integration: error cases
156 # ---------------------------------------------------------------------------
157
158
159 class TestHashObjectErrors:
160 def test_nonexistent_path_exits_user_error(self, tmp_path: pathlib.Path) -> None:
161 repo = _init_repo(tmp_path / "repo")
162 result = runner.invoke(
163 cli, ["plumbing", "hash-object", str(tmp_path / "missing.mid")], env=_env(repo)
164 )
165 assert result.exit_code == ExitCode.USER_ERROR
166
167 def test_directory_path_exits_user_error(self, tmp_path: pathlib.Path) -> None:
168 repo = _init_repo(tmp_path / "repo")
169 d = tmp_path / "subdir"
170 d.mkdir()
171 result = runner.invoke(cli, ["plumbing", "hash-object", str(d)], env=_env(repo))
172 assert result.exit_code == ExitCode.USER_ERROR
173
174 def test_bad_format_exits_user_error(self, tmp_path: pathlib.Path) -> None:
175 f = _file(tmp_path, "f.mid", b"data")
176 repo = _init_repo(tmp_path / "repo")
177 result = runner.invoke(
178 cli, ["plumbing", "hash-object", "--format", "yaml", str(f)], env=_env(repo)
179 )
180 assert result.exit_code == ExitCode.USER_ERROR
181
182 def test_short_format_flag_works(self, tmp_path: pathlib.Path) -> None:
183 f = _file(tmp_path, "g.mid", b"data")
184 repo = _init_repo(tmp_path / "repo")
185 result = runner.invoke(cli, ["plumbing", "hash-object", "-f", "text", str(f)], env=_env(repo))
186 assert result.exit_code == 0
187 assert len(result.stdout.strip()) == 64
188
189
190 # ---------------------------------------------------------------------------
191 # Stress: 2 MiB file handled without heap explosion
192 # ---------------------------------------------------------------------------
193
194
195 class TestHashObjectStress:
196 def test_large_file_hashes_correctly(self, tmp_path: pathlib.Path) -> None:
197 content = b"X" * (2 * 1024 * 1024) # 2 MiB
198 expected = _sha(content)
199 f = _file(tmp_path, "big.bin", content)
200 repo = _init_repo(tmp_path / "repo")
201 result = runner.invoke(
202 cli, ["plumbing", "hash-object", "--format", "text", str(f)], env=_env(repo)
203 )
204 assert result.exit_code == 0
205 assert result.stdout.strip() == expected
206
207 def test_large_file_write_round_trip(self, tmp_path: pathlib.Path) -> None:
208 content = bytes(range(256)) * 4096 # 1 MiB of varied bytes
209 f = _file(tmp_path, "varied.bin", content)
210 repo = _init_repo(tmp_path / "repo")
211 result = runner.invoke(
212 cli, ["plumbing", "hash-object", "--write", str(f)], env=_env(repo)
213 )
214 assert result.exit_code == 0
215 oid = json.loads(result.stdout)["object_id"]
216 assert object_path(repo, oid).read_bytes() == content
217
218 def test_100_distinct_files_all_unique_ids(self, tmp_path: pathlib.Path) -> None:
219 repo = _init_repo(tmp_path / "repo")
220 ids: set[str] = set()
221 for i in range(100):
222 content = f"file-content-{i}".encode()
223 f = _file(tmp_path, f"f{i}.mid", content)
224 result = runner.invoke(
225 cli, ["plumbing", "hash-object", "--format", "text", str(f)], env=_env(repo)
226 )
227 assert result.exit_code == 0
228 ids.add(result.stdout.strip())
229 assert len(ids) == 100