gabriel / musehub public
snapshot.py python
45 lines 1.5 KB
198cdd98 dev → main: final polish — snapshots in muse_push, elicitation docs, ca… Gabriel Cardona <cgcardona@gmail.com> 2d ago
1 """Snapshot and commit ID hashing — MuseHub-side implementation.
2
3 This module provides the canonical server-side ID computation functions used
4 by MuseHub services and test fixtures. It intentionally mirrors the hashing
5 logic in ``muse.core.snapshot`` (the Muse CLI) so that IDs generated on the
6 server can be cross-verified against IDs sent by the CLI.
7
8 CONTRACT: The separator constant ``_SEP`` and the hash construction algorithm
9 MUST remain identical to ``muse.core.snapshot._SEP`` and
10 ``muse.core.snapshot.compute_snapshot_id``. Any change to either side must
11 be applied to both simultaneously. A mismatch is a silent data-integrity bug.
12 """
13
14 import hashlib
15
16 # Must match muse.core.snapshot._SEP exactly.
17 _SEP = "\x00"
18
19
20 def compute_snapshot_id(manifest: dict[str, str]) -> str:
21 """Return sha256 of the sorted ``path NUL object_id`` pairs.
22
23 Uses a null-byte separator to prevent collision attacks via filenames or
24 object IDs that contain the previous ``|``/``:`` separators.
25 """
26 parts = sorted(f"{path}{_SEP}{oid}" for path, oid in manifest.items())
27 payload = _SEP.join(parts).encode()
28 return hashlib.sha256(payload).hexdigest()
29
30
31 def compute_commit_id(
32 parent_ids: list[str],
33 snapshot_id: str,
34 message: str,
35 committed_at_iso: str,
36 ) -> str:
37 """Return sha256 of the commit's canonical inputs."""
38 parts = [
39 "|".join(sorted(parent_ids)),
40 snapshot_id,
41 message,
42 committed_at_iso,
43 ]
44 payload = "|".join(parts).encode()
45 return hashlib.sha256(payload).hexdigest()