test_code_manifest.py
python
| 1 | """Tests for the hierarchical code manifest in muse/plugins/code/manifest.py.""" |
| 2 | |
| 3 | import hashlib |
| 4 | import pathlib |
| 5 | import tempfile |
| 6 | |
| 7 | import pytest |
| 8 | |
| 9 | from muse.plugins.code.manifest import ( |
| 10 | CodeManifest, |
| 11 | ManifestFileDiff, |
| 12 | build_code_manifest, |
| 13 | diff_manifests, |
| 14 | read_code_manifest, |
| 15 | write_code_manifest, |
| 16 | ) |
| 17 | |
| 18 | |
| 19 | # --------------------------------------------------------------------------- |
| 20 | # Helpers |
| 21 | # --------------------------------------------------------------------------- |
| 22 | |
| 23 | |
| 24 | def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 25 | muse = tmp_path / ".muse" |
| 26 | muse.mkdir() |
| 27 | (muse / "objects").mkdir() |
| 28 | return tmp_path |
| 29 | |
| 30 | |
| 31 | def _write_object(root: pathlib.Path, content: bytes) -> str: |
| 32 | h = hashlib.sha256(content).hexdigest() |
| 33 | obj_path = root / ".muse" / "objects" / h[:2] / h[2:] |
| 34 | obj_path.parent.mkdir(parents=True, exist_ok=True) |
| 35 | obj_path.write_bytes(content) |
| 36 | return h |
| 37 | |
| 38 | |
| 39 | # --------------------------------------------------------------------------- |
| 40 | # build_code_manifest |
| 41 | # --------------------------------------------------------------------------- |
| 42 | |
| 43 | |
| 44 | class TestBuildCodeManifest: |
| 45 | def test_empty_snapshot(self) -> None: |
| 46 | with tempfile.TemporaryDirectory() as tmp: |
| 47 | root = _make_repo(pathlib.Path(tmp)) |
| 48 | manifest = build_code_manifest("s" * 64, {}, root) |
| 49 | assert manifest["snapshot_id"] == "s" * 64 |
| 50 | assert manifest["total_files"] == 0 |
| 51 | assert manifest["packages"] == [] |
| 52 | assert manifest["total_symbols"] == 0 |
| 53 | |
| 54 | def test_single_python_file(self) -> None: |
| 55 | with tempfile.TemporaryDirectory() as tmp: |
| 56 | root = _make_repo(pathlib.Path(tmp)) |
| 57 | src = b"def foo():\n return 1\n" |
| 58 | h = _write_object(root, src) |
| 59 | manifest = build_code_manifest("s" * 64, {"src/utils.py": h}, root) |
| 60 | assert manifest["total_files"] == 1 |
| 61 | assert manifest["semantic_files"] >= 1 |
| 62 | assert len(manifest["packages"]) == 1 |
| 63 | pkg = manifest["packages"][0] |
| 64 | assert pkg["package"] == "src" |
| 65 | assert len(pkg["modules"]) == 1 |
| 66 | mod = pkg["modules"][0] |
| 67 | assert mod["module_path"] == "src/utils.py" |
| 68 | assert mod["language"] == "Python" |
| 69 | |
| 70 | def test_groups_by_package(self) -> None: |
| 71 | with tempfile.TemporaryDirectory() as tmp: |
| 72 | root = _make_repo(pathlib.Path(tmp)) |
| 73 | h1 = _write_object(root, b"x = 1\n") |
| 74 | h2 = _write_object(root, b"y = 2\n") |
| 75 | h3 = _write_object(root, b"z = 3\n") |
| 76 | flat = { |
| 77 | "src/a.py": h1, |
| 78 | "src/b.py": h2, |
| 79 | "tests/c.py": h3, |
| 80 | } |
| 81 | manifest = build_code_manifest("s" * 64, flat, root) |
| 82 | assert manifest["total_files"] == 3 |
| 83 | packages = {pkg["package"] for pkg in manifest["packages"]} |
| 84 | assert "src" in packages |
| 85 | assert "tests" in packages |
| 86 | |
| 87 | def test_manifest_hash_stable(self) -> None: |
| 88 | with tempfile.TemporaryDirectory() as tmp: |
| 89 | root = _make_repo(pathlib.Path(tmp)) |
| 90 | src = b"x = 1\n" |
| 91 | h = _write_object(root, src) |
| 92 | m1 = build_code_manifest("s" * 64, {"a.py": h}, root) |
| 93 | m2 = build_code_manifest("s" * 64, {"a.py": h}, root) |
| 94 | assert m1["manifest_hash"] == m2["manifest_hash"] |
| 95 | |
| 96 | def test_non_semantic_file_has_empty_ast_hash(self) -> None: |
| 97 | with tempfile.TemporaryDirectory() as tmp: |
| 98 | root = _make_repo(pathlib.Path(tmp)) |
| 99 | h = _write_object(root, b"some binary or text content") |
| 100 | manifest = build_code_manifest("s" * 64, {"README.md": h}, root) |
| 101 | mod = manifest["packages"][0]["modules"][0] |
| 102 | assert mod["ast_hash"] == "" |
| 103 | assert mod["symbol_count"] == 0 |
| 104 | |
| 105 | |
| 106 | # --------------------------------------------------------------------------- |
| 107 | # diff_manifests |
| 108 | # --------------------------------------------------------------------------- |
| 109 | |
| 110 | |
| 111 | class TestDiffManifests: |
| 112 | def _build_simple(self, root: pathlib.Path, files: dict[str, bytes]) -> CodeManifest: |
| 113 | flat: dict[str, str] = {} |
| 114 | for path, content in files.items(): |
| 115 | flat[path] = _write_object(root, content) |
| 116 | return build_code_manifest("snap", flat, root) |
| 117 | |
| 118 | def test_identical_manifests_no_diff(self) -> None: |
| 119 | with tempfile.TemporaryDirectory() as tmp: |
| 120 | root = _make_repo(pathlib.Path(tmp)) |
| 121 | base = self._build_simple(root, {"a.py": b"x = 1\n"}) |
| 122 | diffs = diff_manifests(base, base) |
| 123 | assert diffs == [] |
| 124 | |
| 125 | def test_added_file_detected(self) -> None: |
| 126 | with tempfile.TemporaryDirectory() as tmp: |
| 127 | root = _make_repo(pathlib.Path(tmp)) |
| 128 | base = self._build_simple(root, {"a.py": b"x = 1\n"}) |
| 129 | target = self._build_simple(root, {"a.py": b"x = 1\n", "b.py": b"y = 2\n"}) |
| 130 | diffs = diff_manifests(base, target) |
| 131 | added = [d for d in diffs if d["change"] == "added"] |
| 132 | assert any(d["path"] == "b.py" for d in added) |
| 133 | |
| 134 | def test_removed_file_detected(self) -> None: |
| 135 | with tempfile.TemporaryDirectory() as tmp: |
| 136 | root = _make_repo(pathlib.Path(tmp)) |
| 137 | base = self._build_simple(root, {"a.py": b"x = 1\n", "b.py": b"y = 2\n"}) |
| 138 | target = self._build_simple(root, {"a.py": b"x = 1\n"}) |
| 139 | diffs = diff_manifests(base, target) |
| 140 | removed = [d for d in diffs if d["change"] == "removed"] |
| 141 | assert any(d["path"] == "b.py" for d in removed) |
| 142 | |
| 143 | def test_semantic_change_detected(self) -> None: |
| 144 | with tempfile.TemporaryDirectory() as tmp: |
| 145 | root = _make_repo(pathlib.Path(tmp)) |
| 146 | base = self._build_simple(root, {"a.py": b"def foo():\n return 1\n"}) |
| 147 | target = self._build_simple(root, {"a.py": b"def foo():\n return 2\n"}) |
| 148 | diffs = diff_manifests(base, target) |
| 149 | assert len(diffs) == 1 |
| 150 | assert diffs[0]["semantic_change"] is True |
| 151 | |
| 152 | def test_whitespace_only_change_non_semantic(self) -> None: |
| 153 | # Whitespace-only changes: content_hash differs but ast_hash should be the same. |
| 154 | with tempfile.TemporaryDirectory() as tmp: |
| 155 | root = _make_repo(pathlib.Path(tmp)) |
| 156 | base = self._build_simple(root, {"a.py": b"def foo():\n return 1\n"}) |
| 157 | target = self._build_simple(root, {"a.py": b"def foo():\n return 1\n\n\n"}) |
| 158 | diffs = diff_manifests(base, target) |
| 159 | # Whitespace diff may or may not change AST hash depending on parser. |
| 160 | # Just assert we get a diff record with a path. |
| 161 | if diffs: |
| 162 | assert diffs[0]["path"] == "a.py" |
| 163 | |
| 164 | |
| 165 | # --------------------------------------------------------------------------- |
| 166 | # Persistence |
| 167 | # --------------------------------------------------------------------------- |
| 168 | |
| 169 | |
| 170 | class TestManifestPersistence: |
| 171 | def test_write_and_read_roundtrip(self) -> None: |
| 172 | with tempfile.TemporaryDirectory() as tmp: |
| 173 | root = _make_repo(pathlib.Path(tmp)) |
| 174 | src = b"def my_fn():\n pass\n" |
| 175 | h = _write_object(root, src) |
| 176 | original = build_code_manifest("s" * 64, {"src/a.py": h}, root) |
| 177 | |
| 178 | write_code_manifest(root, original) |
| 179 | loaded = read_code_manifest(root, original["manifest_hash"]) |
| 180 | |
| 181 | assert loaded is not None |
| 182 | assert loaded["snapshot_id"] == "s" * 64 |
| 183 | assert loaded["manifest_hash"] == original["manifest_hash"] |
| 184 | assert len(loaded["packages"]) == len(original["packages"]) |
| 185 | |
| 186 | def test_read_nonexistent_returns_none(self) -> None: |
| 187 | with tempfile.TemporaryDirectory() as tmp: |
| 188 | root = _make_repo(pathlib.Path(tmp)) |
| 189 | result = read_code_manifest(root, "nonexistent_hash") |
| 190 | assert result is None |
| 191 | |
| 192 | def test_write_idempotent(self) -> None: |
| 193 | with tempfile.TemporaryDirectory() as tmp: |
| 194 | root = _make_repo(pathlib.Path(tmp)) |
| 195 | h = _write_object(root, b"x = 1\n") |
| 196 | manifest = build_code_manifest("s" * 64, {"a.py": h}, root) |
| 197 | write_code_manifest(root, manifest) |
| 198 | write_code_manifest(root, manifest) # second write should not error |
| 199 | loaded = read_code_manifest(root, manifest["manifest_hash"]) |
| 200 | assert loaded is not None |