test_plumbing_hash_object.py
python
| 1 | """Tests for ``muse plumbing hash-object``. |
| 2 | |
| 3 | Covers: SHA-256 correctness, ``--write`` flag storage, streaming safety, |
| 4 | idempotent writes, error cases (missing path, directory path, bad format, |
| 5 | no-repo write), text-format output, and a stress case with a 2 MiB file. |
| 6 | """ |
| 7 | |
| 8 | from __future__ import annotations |
| 9 | |
| 10 | import hashlib |
| 11 | import json |
| 12 | import pathlib |
| 13 | |
| 14 | import pytest |
| 15 | from tests.cli_test_helper import CliRunner |
| 16 | |
| 17 | cli = None # argparse migration — CliRunner ignores this arg |
| 18 | from muse.core.errors import ExitCode |
| 19 | from muse.core.object_store import has_object, object_path |
| 20 | |
| 21 | runner = CliRunner() |
| 22 | |
| 23 | |
| 24 | # --------------------------------------------------------------------------- |
| 25 | # Helpers |
| 26 | # --------------------------------------------------------------------------- |
| 27 | |
| 28 | |
| 29 | def _sha(data: bytes) -> str: |
| 30 | return hashlib.sha256(data).hexdigest() |
| 31 | |
| 32 | |
| 33 | def _init_repo(path: pathlib.Path) -> pathlib.Path: |
| 34 | muse = path / ".muse" |
| 35 | (muse / "commits").mkdir(parents=True) |
| 36 | (muse / "snapshots").mkdir(parents=True) |
| 37 | (muse / "objects").mkdir(parents=True) |
| 38 | (muse / "refs" / "heads").mkdir(parents=True) |
| 39 | (muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8") |
| 40 | (muse / "repo.json").write_text( |
| 41 | json.dumps({"repo_id": "test-repo", "domain": "midi"}), encoding="utf-8" |
| 42 | ) |
| 43 | return path |
| 44 | |
| 45 | |
| 46 | def _env(repo: pathlib.Path) -> dict[str, str]: |
| 47 | return {"MUSE_REPO_ROOT": str(repo)} |
| 48 | |
| 49 | |
| 50 | def _file(tmp: pathlib.Path, name: str, content: bytes) -> pathlib.Path: |
| 51 | p = tmp / name |
| 52 | p.write_bytes(content) |
| 53 | return p |
| 54 | |
| 55 | |
| 56 | # --------------------------------------------------------------------------- |
| 57 | # Unit: SHA-256 correctness |
| 58 | # --------------------------------------------------------------------------- |
| 59 | |
| 60 | |
| 61 | class TestHashObjectUnit: |
| 62 | def test_known_sha256_matches(self, tmp_path: pathlib.Path) -> None: |
| 63 | content = b"hello muse" |
| 64 | expected = _sha(content) |
| 65 | f = _file(tmp_path, "sample.mid", content) |
| 66 | repo = _init_repo(tmp_path / "repo") |
| 67 | result = runner.invoke( |
| 68 | cli, ["plumbing", "hash-object", "--format", "text", str(f)], env=_env(repo) |
| 69 | ) |
| 70 | assert result.exit_code == 0, result.output |
| 71 | assert result.stdout.strip() == expected |
| 72 | |
| 73 | def test_json_output_has_object_id_and_stored_false(self, tmp_path: pathlib.Path) -> None: |
| 74 | content = b"json output" |
| 75 | f = _file(tmp_path, "x.mid", content) |
| 76 | repo = _init_repo(tmp_path / "repo") |
| 77 | result = runner.invoke(cli, ["plumbing", "hash-object", str(f)], env=_env(repo)) |
| 78 | assert result.exit_code == 0, result.output |
| 79 | data = json.loads(result.stdout) |
| 80 | assert data["object_id"] == _sha(content) |
| 81 | assert data["stored"] is False |
| 82 | |
| 83 | def test_different_content_yields_different_id(self, tmp_path: pathlib.Path) -> None: |
| 84 | f1 = _file(tmp_path, "a.mid", b"aaa") |
| 85 | f2 = _file(tmp_path, "b.mid", b"bbb") |
| 86 | repo = _init_repo(tmp_path / "repo") |
| 87 | r1 = runner.invoke(cli, ["plumbing", "hash-object", "--format", "text", str(f1)], env=_env(repo)) |
| 88 | r2 = runner.invoke(cli, ["plumbing", "hash-object", "--format", "text", str(f2)], env=_env(repo)) |
| 89 | assert r1.stdout.strip() != r2.stdout.strip() |
| 90 | |
| 91 | def test_empty_file_has_deterministic_sha256(self, tmp_path: pathlib.Path) -> None: |
| 92 | f = _file(tmp_path, "empty.mid", b"") |
| 93 | expected = _sha(b"") |
| 94 | repo = _init_repo(tmp_path / "repo") |
| 95 | result = runner.invoke(cli, ["plumbing", "hash-object", "--format", "text", str(f)], env=_env(repo)) |
| 96 | assert result.exit_code == 0 |
| 97 | assert result.stdout.strip() == expected |
| 98 | |
| 99 | |
| 100 | # --------------------------------------------------------------------------- |
| 101 | # Integration: --write flag |
| 102 | # --------------------------------------------------------------------------- |
| 103 | |
| 104 | |
| 105 | class TestHashObjectWrite: |
| 106 | def test_write_stores_object_in_object_store(self, tmp_path: pathlib.Path) -> None: |
| 107 | content = b"write me" |
| 108 | f = _file(tmp_path, "w.mid", content) |
| 109 | repo = _init_repo(tmp_path / "repo") |
| 110 | result = runner.invoke(cli, ["plumbing", "hash-object", "--write", str(f)], env=_env(repo)) |
| 111 | assert result.exit_code == 0, result.output |
| 112 | data = json.loads(result.stdout) |
| 113 | assert data["stored"] is True |
| 114 | assert has_object(repo, data["object_id"]) |
| 115 | |
| 116 | def test_write_stores_correct_bytes(self, tmp_path: pathlib.Path) -> None: |
| 117 | content = b"check round-trip" |
| 118 | f = _file(tmp_path, "rt.mid", content) |
| 119 | repo = _init_repo(tmp_path / "repo") |
| 120 | result = runner.invoke(cli, ["plumbing", "hash-object", "--write", str(f)], env=_env(repo)) |
| 121 | assert result.exit_code == 0 |
| 122 | oid = json.loads(result.stdout)["object_id"] |
| 123 | stored = object_path(repo, oid).read_bytes() |
| 124 | assert stored == content |
| 125 | |
| 126 | def test_write_idempotent_second_call(self, tmp_path: pathlib.Path) -> None: |
| 127 | content = b"idempotent" |
| 128 | f = _file(tmp_path, "i.mid", content) |
| 129 | repo = _init_repo(tmp_path / "repo") |
| 130 | args = ["plumbing", "hash-object", "--write", str(f)] |
| 131 | r1 = runner.invoke(cli, args, env=_env(repo)) |
| 132 | r2 = runner.invoke(cli, args, env=_env(repo)) |
| 133 | assert r1.exit_code == 0 and r2.exit_code == 0 |
| 134 | assert json.loads(r1.stdout)["object_id"] == json.loads(r2.stdout)["object_id"] |
| 135 | |
| 136 | def test_write_without_repo_exits_user_error(self, tmp_path: pathlib.Path) -> None: |
| 137 | f = _file(tmp_path, "norepo.mid", b"data") |
| 138 | # No MUSE_REPO_ROOT, no .muse directory — should fail. |
| 139 | result = runner.invoke( |
| 140 | cli, ["plumbing", "hash-object", "--write", str(f)], |
| 141 | env={"MUSE_REPO_ROOT": str(tmp_path / "nonexistent")} |
| 142 | ) |
| 143 | assert result.exit_code != 0 |
| 144 | |
| 145 | def test_short_write_flag_works(self, tmp_path: pathlib.Path) -> None: |
| 146 | content = b"short flag" |
| 147 | f = _file(tmp_path, "sf.mid", content) |
| 148 | repo = _init_repo(tmp_path / "repo") |
| 149 | result = runner.invoke(cli, ["plumbing", "hash-object", "-w", str(f)], env=_env(repo)) |
| 150 | assert result.exit_code == 0 |
| 151 | assert json.loads(result.stdout)["stored"] is True |
| 152 | |
| 153 | |
| 154 | # --------------------------------------------------------------------------- |
| 155 | # Integration: error cases |
| 156 | # --------------------------------------------------------------------------- |
| 157 | |
| 158 | |
| 159 | class TestHashObjectErrors: |
| 160 | def test_nonexistent_path_exits_user_error(self, tmp_path: pathlib.Path) -> None: |
| 161 | repo = _init_repo(tmp_path / "repo") |
| 162 | result = runner.invoke( |
| 163 | cli, ["plumbing", "hash-object", str(tmp_path / "missing.mid")], env=_env(repo) |
| 164 | ) |
| 165 | assert result.exit_code == ExitCode.USER_ERROR |
| 166 | |
| 167 | def test_directory_path_exits_user_error(self, tmp_path: pathlib.Path) -> None: |
| 168 | repo = _init_repo(tmp_path / "repo") |
| 169 | d = tmp_path / "subdir" |
| 170 | d.mkdir() |
| 171 | result = runner.invoke(cli, ["plumbing", "hash-object", str(d)], env=_env(repo)) |
| 172 | assert result.exit_code == ExitCode.USER_ERROR |
| 173 | |
| 174 | def test_bad_format_exits_user_error(self, tmp_path: pathlib.Path) -> None: |
| 175 | f = _file(tmp_path, "f.mid", b"data") |
| 176 | repo = _init_repo(tmp_path / "repo") |
| 177 | result = runner.invoke( |
| 178 | cli, ["plumbing", "hash-object", "--format", "yaml", str(f)], env=_env(repo) |
| 179 | ) |
| 180 | assert result.exit_code == ExitCode.USER_ERROR |
| 181 | |
| 182 | def test_short_format_flag_works(self, tmp_path: pathlib.Path) -> None: |
| 183 | f = _file(tmp_path, "g.mid", b"data") |
| 184 | repo = _init_repo(tmp_path / "repo") |
| 185 | result = runner.invoke(cli, ["plumbing", "hash-object", "-f", "text", str(f)], env=_env(repo)) |
| 186 | assert result.exit_code == 0 |
| 187 | assert len(result.stdout.strip()) == 64 |
| 188 | |
| 189 | |
| 190 | # --------------------------------------------------------------------------- |
| 191 | # Stress: 2 MiB file handled without heap explosion |
| 192 | # --------------------------------------------------------------------------- |
| 193 | |
| 194 | |
| 195 | class TestHashObjectStress: |
| 196 | def test_large_file_hashes_correctly(self, tmp_path: pathlib.Path) -> None: |
| 197 | content = b"X" * (2 * 1024 * 1024) # 2 MiB |
| 198 | expected = _sha(content) |
| 199 | f = _file(tmp_path, "big.bin", content) |
| 200 | repo = _init_repo(tmp_path / "repo") |
| 201 | result = runner.invoke( |
| 202 | cli, ["plumbing", "hash-object", "--format", "text", str(f)], env=_env(repo) |
| 203 | ) |
| 204 | assert result.exit_code == 0 |
| 205 | assert result.stdout.strip() == expected |
| 206 | |
| 207 | def test_large_file_write_round_trip(self, tmp_path: pathlib.Path) -> None: |
| 208 | content = bytes(range(256)) * 4096 # 1 MiB of varied bytes |
| 209 | f = _file(tmp_path, "varied.bin", content) |
| 210 | repo = _init_repo(tmp_path / "repo") |
| 211 | result = runner.invoke( |
| 212 | cli, ["plumbing", "hash-object", "--write", str(f)], env=_env(repo) |
| 213 | ) |
| 214 | assert result.exit_code == 0 |
| 215 | oid = json.loads(result.stdout)["object_id"] |
| 216 | assert object_path(repo, oid).read_bytes() == content |
| 217 | |
| 218 | def test_100_distinct_files_all_unique_ids(self, tmp_path: pathlib.Path) -> None: |
| 219 | repo = _init_repo(tmp_path / "repo") |
| 220 | ids: set[str] = set() |
| 221 | for i in range(100): |
| 222 | content = f"file-content-{i}".encode() |
| 223 | f = _file(tmp_path, f"f{i}.mid", content) |
| 224 | result = runner.invoke( |
| 225 | cli, ["plumbing", "hash-object", "--format", "text", str(f)], env=_env(repo) |
| 226 | ) |
| 227 | assert result.exit_code == 0 |
| 228 | ids.add(result.stdout.strip()) |
| 229 | assert len(ids) == 100 |