snapshot.py
python
| 1 | """Snapshot and commit ID hashing — MuseHub-side implementation. |
| 2 | |
| 3 | This module provides the canonical server-side ID computation functions used |
| 4 | by MuseHub services and test fixtures. It intentionally mirrors the hashing |
| 5 | logic in ``muse.core.snapshot`` (the Muse CLI) so that IDs generated on the |
| 6 | server can be cross-verified against IDs sent by the CLI. |
| 7 | |
| 8 | CONTRACT: The separator constant ``_SEP`` and the hash construction algorithm |
| 9 | MUST remain identical to ``muse.core.snapshot._SEP`` and |
| 10 | ``muse.core.snapshot.compute_snapshot_id``. Any change to either side must |
| 11 | be applied to both simultaneously. A mismatch is a silent data-integrity bug. |
| 12 | """ |
| 13 | |
| 14 | import hashlib |
| 15 | |
| 16 | # Must match muse.core.snapshot._SEP exactly. |
| 17 | _SEP = "\x00" |
| 18 | |
| 19 | |
| 20 | def compute_snapshot_id(manifest: dict[str, str]) -> str: |
| 21 | """Return sha256 of the sorted ``path NUL object_id`` pairs. |
| 22 | |
| 23 | Uses a null-byte separator to prevent collision attacks via filenames or |
| 24 | object IDs that contain the previous ``|``/``:`` separators. |
| 25 | """ |
| 26 | parts = sorted(f"{path}{_SEP}{oid}" for path, oid in manifest.items()) |
| 27 | payload = _SEP.join(parts).encode() |
| 28 | return hashlib.sha256(payload).hexdigest() |
| 29 | |
| 30 | |
| 31 | def compute_commit_id( |
| 32 | parent_ids: list[str], |
| 33 | snapshot_id: str, |
| 34 | message: str, |
| 35 | committed_at_iso: str, |
| 36 | ) -> str: |
| 37 | """Return sha256 of the commit's canonical inputs.""" |
| 38 | parts = [ |
| 39 | "|".join(sorted(parent_ids)), |
| 40 | snapshot_id, |
| 41 | message, |
| 42 | committed_at_iso, |
| 43 | ] |
| 44 | payload = "|".join(parts).encode() |
| 45 | return hashlib.sha256(payload).hexdigest() |