gabriel / muse public
store.py python
768 lines 26.7 KB
766ee24d feat: code domain leverages core invariants, query engine, manifests, p… Gabriel Cardona <gabriel@tellurstori.com> 5d ago
1 """File-based commit and snapshot store for the Muse VCS.
2
3 All commit and snapshot metadata is stored as JSON files under ``.muse/`` —
4 no external database required.
5
6 Layout
7 ------
8
9 .muse/
10 commits/<commit_id>.json — one JSON file per commit
11 snapshots/<snapshot_id>.json — one JSON file per snapshot manifest
12 tags/<repo_id>/<tag_id>.json — tag records
13 objects/<sha2>/<sha62> — content-addressed blobs (via object_store.py)
14 refs/heads/<branch> — branch HEAD pointers (plain text commit IDs)
15 HEAD — symbolic ref: "refs/heads/main"
16 repo.json — repository identity
17
18 Commit JSON schema
19 ------------------
20
21 {
22 "commit_id": "<sha256>",
23 "repo_id": "<uuid>",
24 "branch": "main",
25 "parent_commit_id": null | "<sha256>",
26 "parent2_commit_id": null | "<sha256>",
27 "snapshot_id": "<sha256>",
28 "message": "Add verse melody",
29 "author": "gabriel",
30 "committed_at": "2026-03-16T12:00:00+00:00",
31 "metadata": {}
32 }
33
34 Snapshot JSON schema
35 --------------------
36
37 {
38 "snapshot_id": "<sha256>",
39 "manifest": {"tracks/drums.mid": "<sha256>", ...},
40 "created_at": "2026-03-16T12:00:00+00:00"
41 }
42
43 All functions are synchronous — file I/O on a local ``.muse/`` directory
44 does not require async. This removes the SQLAlchemy/asyncpg dependency from
45 the CLI entirely.
46 """
47 from __future__ import annotations
48
49 import datetime
50 import json
51 import logging
52 import pathlib
53 import uuid
54 from dataclasses import dataclass, field
55 from typing import TypedDict
56
57 from muse.domain import SemVerBump, StructuredDelta
58
59 logger = logging.getLogger(__name__)
60
61 _COMMITS_DIR = "commits"
62 _SNAPSHOTS_DIR = "snapshots"
63 _TAGS_DIR = "tags"
64
65
66 # ---------------------------------------------------------------------------
67 # Wire-format TypedDicts (JSON-serialisable, used by to_dict / from_dict)
68 # ---------------------------------------------------------------------------
69
70
71 class CommitDict(TypedDict, total=False):
72 """JSON-serialisable representation of a CommitRecord.
73
74 ``structured_delta`` is the typed delta produced by the domain plugin's
75 ``diff()`` at commit time. ``None`` on the initial commit (no parent to
76 diff against).
77
78 ``sem_ver_bump`` and ``breaking_changes`` are semantic versioning
79 metadata. Absent (treated as ``"none"`` / ``[]``) for older records and
80 non-code domains.
81
82 Agent provenance fields (all optional, default ``""`` for older records):
83
84 ``agent_id`` Stable identity string for the committing agent or human
85 (e.g. ``"counterpoint-bot"`` or ``"gabriel"``).
86 ``model_id`` Model identifier when the author is an AI agent
87 (e.g. ``"claude-opus-4"``). Empty for human authors.
88 ``toolchain_id`` Toolchain that produced the commit
89 (e.g. ``"cursor-agent-v2"``).
90 ``prompt_hash`` SHA-256 of the instruction/prompt that triggered this
91 commit. Privacy-preserving: the hash identifies the
92 prompt without storing its content.
93 ``signature`` HMAC-SHA256 hex digest of ``commit_id`` using the
94 agent's shared key. Verifiable with
95 :func:`muse.core.provenance.verify_commit_hmac`.
96 ``signer_key_id`` Fingerprint of the signing key
97 (SHA-256[:16] of the raw key bytes).
98 ``format_version`` Schema evolution counter. Each phase of the Muse
99 supercharge plan that extends the commit record bumps
100 this value. Readers use it to know which optional fields
101 are present:
102
103 - ``1`` — base record (commit_id, snapshot_id, parent, message, author)
104 - ``2`` — adds ``structured_delta`` (Phase 1: Typed Delta Algebra)
105 - ``3`` — adds ``sem_ver_bump``, ``breaking_changes``
106 (Phase 2: Domain Schema)
107 - ``4`` — adds agent provenance: ``agent_id``, ``model_id``,
108 ``toolchain_id``, ``prompt_hash``, ``signature``,
109 ``signer_key_id`` (Phase 4: Agent Identity)
110 - ``5`` — adds CRDT annotation fields: ``reviewed_by``
111 (ORSet of reviewer IDs), ``test_runs``
112 (GCounter of test-run events)
113
114 Old records without this field default to ``1``.
115 """
116
117 commit_id: str
118 repo_id: str
119 branch: str
120 snapshot_id: str
121 message: str
122 committed_at: str
123 parent_commit_id: str | None
124 parent2_commit_id: str | None
125 author: str
126 metadata: dict[str, str]
127 structured_delta: StructuredDelta | None
128 sem_ver_bump: SemVerBump
129 breaking_changes: list[str]
130 agent_id: str
131 model_id: str
132 toolchain_id: str
133 prompt_hash: str
134 signature: str
135 signer_key_id: str
136 format_version: int
137 # CRDT-backed annotation fields (format_version >= 5).
138 # ``reviewed_by`` is the logical state of an ORSet: a list of unique
139 # reviewer identifiers. Merging two records takes the union (set join).
140 # ``test_runs`` is a GCounter: monotonically increasing test-run count.
141 # Both fields are absent in older records and default to [] / 0.
142 reviewed_by: list[str]
143 test_runs: int
144
145
146 class SnapshotDict(TypedDict):
147 """JSON-serialisable representation of a SnapshotRecord."""
148
149 snapshot_id: str
150 manifest: dict[str, str]
151 created_at: str
152
153
154 class TagDict(TypedDict):
155 """JSON-serialisable representation of a TagRecord."""
156
157 tag_id: str
158 repo_id: str
159 commit_id: str
160 tag: str
161 created_at: str
162
163
164 class RemoteCommitPayload(TypedDict, total=False):
165 """Wire format received from a remote during push/pull.
166
167 All fields are optional because the payload may omit fields that are
168 unknown to older protocol versions. Callers validate required fields
169 before constructing a CommitRecord from this payload.
170 """
171
172 commit_id: str
173 repo_id: str
174 branch: str
175 snapshot_id: str
176 message: str
177 committed_at: str
178 parent_commit_id: str | None
179 parent2_commit_id: str | None
180 author: str
181 metadata: dict[str, str]
182 manifest: dict[str, str]
183
184
185 # ---------------------------------------------------------------------------
186 # Data classes
187 # ---------------------------------------------------------------------------
188
189
190 @dataclass
191 class CommitRecord:
192 """An immutable commit record stored as a JSON file under .muse/commits/.
193
194 ``sem_ver_bump`` and ``breaking_changes`` are populated by the commit command
195 when a code-domain delta is available. They default to ``"none"`` and ``[]``
196 for older records and non-code domains.
197
198 Agent provenance fields default to ``""`` so that existing JSON without
199 them deserialises without error. See :class:`CommitDict` for field semantics.
200 """
201
202 commit_id: str
203 repo_id: str
204 branch: str
205 snapshot_id: str
206 message: str
207 committed_at: datetime.datetime
208 parent_commit_id: str | None = None
209 parent2_commit_id: str | None = None
210 author: str = ""
211 metadata: dict[str, str] = field(default_factory=dict)
212 structured_delta: StructuredDelta | None = None
213 sem_ver_bump: SemVerBump = "none"
214 breaking_changes: list[str] = field(default_factory=list)
215 agent_id: str = ""
216 model_id: str = ""
217 toolchain_id: str = ""
218 prompt_hash: str = ""
219 signature: str = ""
220 signer_key_id: str = ""
221 #: Schema evolution counter — see :class:`CommitDict` for the version table.
222 #: Version 5 adds ``reviewed_by`` (ORSet) and ``test_runs`` (GCounter).
223 format_version: int = 5
224 reviewed_by: list[str] = field(default_factory=list)
225 test_runs: int = 0
226
227 def to_dict(self) -> CommitDict:
228 return CommitDict(
229 commit_id=self.commit_id,
230 repo_id=self.repo_id,
231 branch=self.branch,
232 snapshot_id=self.snapshot_id,
233 message=self.message,
234 committed_at=self.committed_at.isoformat(),
235 parent_commit_id=self.parent_commit_id,
236 parent2_commit_id=self.parent2_commit_id,
237 author=self.author,
238 metadata=dict(self.metadata),
239 structured_delta=self.structured_delta,
240 sem_ver_bump=self.sem_ver_bump,
241 breaking_changes=list(self.breaking_changes),
242 agent_id=self.agent_id,
243 model_id=self.model_id,
244 toolchain_id=self.toolchain_id,
245 prompt_hash=self.prompt_hash,
246 signature=self.signature,
247 signer_key_id=self.signer_key_id,
248 format_version=self.format_version,
249 reviewed_by=list(self.reviewed_by),
250 test_runs=self.test_runs,
251 )
252
253 @classmethod
254 def from_dict(cls, d: CommitDict) -> "CommitRecord":
255 try:
256 committed_at = datetime.datetime.fromisoformat(d["committed_at"])
257 except (ValueError, KeyError):
258 committed_at = datetime.datetime.now(datetime.timezone.utc)
259 return cls(
260 commit_id=d["commit_id"],
261 repo_id=d["repo_id"],
262 branch=d["branch"],
263 snapshot_id=d["snapshot_id"],
264 message=d["message"],
265 committed_at=committed_at,
266 parent_commit_id=d.get("parent_commit_id"),
267 parent2_commit_id=d.get("parent2_commit_id"),
268 author=d.get("author", ""),
269 metadata=dict(d.get("metadata") or {}),
270 structured_delta=d.get("structured_delta"),
271 sem_ver_bump=d.get("sem_ver_bump", "none"),
272 breaking_changes=list(d.get("breaking_changes") or []),
273 agent_id=d.get("agent_id", ""),
274 model_id=d.get("model_id", ""),
275 toolchain_id=d.get("toolchain_id", ""),
276 prompt_hash=d.get("prompt_hash", ""),
277 signature=d.get("signature", ""),
278 signer_key_id=d.get("signer_key_id", ""),
279 format_version=d.get("format_version", 1),
280 reviewed_by=list(d.get("reviewed_by") or []),
281 test_runs=int(d.get("test_runs") or 0),
282 )
283
284
285 @dataclass
286 class SnapshotRecord:
287 """An immutable snapshot record stored as a JSON file under .muse/snapshots/."""
288
289 snapshot_id: str
290 manifest: dict[str, str]
291 created_at: datetime.datetime = field(
292 default_factory=lambda: datetime.datetime.now(datetime.timezone.utc)
293 )
294
295 def to_dict(self) -> SnapshotDict:
296 return SnapshotDict(
297 snapshot_id=self.snapshot_id,
298 manifest=self.manifest,
299 created_at=self.created_at.isoformat(),
300 )
301
302 @classmethod
303 def from_dict(cls, d: SnapshotDict) -> "SnapshotRecord":
304 try:
305 created_at = datetime.datetime.fromisoformat(d["created_at"])
306 except (ValueError, KeyError):
307 created_at = datetime.datetime.now(datetime.timezone.utc)
308 return cls(
309 snapshot_id=d["snapshot_id"],
310 manifest=dict(d.get("manifest") or {}),
311 created_at=created_at,
312 )
313
314
315 @dataclass
316 class TagRecord:
317 """A semantic tag attached to a commit."""
318
319 tag_id: str
320 repo_id: str
321 commit_id: str
322 tag: str
323 created_at: datetime.datetime = field(
324 default_factory=lambda: datetime.datetime.now(datetime.timezone.utc)
325 )
326
327 def to_dict(self) -> TagDict:
328 return TagDict(
329 tag_id=self.tag_id,
330 repo_id=self.repo_id,
331 commit_id=self.commit_id,
332 tag=self.tag,
333 created_at=self.created_at.isoformat(),
334 )
335
336 @classmethod
337 def from_dict(cls, d: TagDict) -> "TagRecord":
338 try:
339 created_at = datetime.datetime.fromisoformat(d["created_at"])
340 except (ValueError, KeyError):
341 created_at = datetime.datetime.now(datetime.timezone.utc)
342 return cls(
343 tag_id=d.get("tag_id", str(uuid.uuid4())),
344 repo_id=d["repo_id"],
345 commit_id=d["commit_id"],
346 tag=d["tag"],
347 created_at=created_at,
348 )
349
350
351 # ---------------------------------------------------------------------------
352 # Path helpers
353 # ---------------------------------------------------------------------------
354
355
356 def _commits_dir(repo_root: pathlib.Path) -> pathlib.Path:
357 return repo_root / ".muse" / _COMMITS_DIR
358
359
360 def _snapshots_dir(repo_root: pathlib.Path) -> pathlib.Path:
361 return repo_root / ".muse" / _SNAPSHOTS_DIR
362
363
364 def _tags_dir(repo_root: pathlib.Path, repo_id: str) -> pathlib.Path:
365 return repo_root / ".muse" / _TAGS_DIR / repo_id
366
367
368 def _commit_path(repo_root: pathlib.Path, commit_id: str) -> pathlib.Path:
369 return _commits_dir(repo_root) / f"{commit_id}.json"
370
371
372 def _snapshot_path(repo_root: pathlib.Path, snapshot_id: str) -> pathlib.Path:
373 return _snapshots_dir(repo_root) / f"{snapshot_id}.json"
374
375
376 # ---------------------------------------------------------------------------
377 # Commit operations
378 # ---------------------------------------------------------------------------
379
380
381 def write_commit(repo_root: pathlib.Path, commit: CommitRecord) -> None:
382 """Persist a commit record to ``.muse/commits/<commit_id>.json``."""
383 _commits_dir(repo_root).mkdir(parents=True, exist_ok=True)
384 path = _commit_path(repo_root, commit.commit_id)
385 if path.exists():
386 logger.debug("⚠️ Commit %s already exists — skipped", commit.commit_id[:8])
387 return
388 path.write_text(json.dumps(commit.to_dict(), indent=2) + "\n")
389 logger.debug("✅ Stored commit %s branch=%r", commit.commit_id[:8], commit.branch)
390
391
392 def read_commit(repo_root: pathlib.Path, commit_id: str) -> CommitRecord | None:
393 """Load a commit record by ID, or ``None`` if it does not exist."""
394 path = _commit_path(repo_root, commit_id)
395 if not path.exists():
396 return None
397 try:
398 return CommitRecord.from_dict(json.loads(path.read_text()))
399 except (json.JSONDecodeError, KeyError) as exc:
400 logger.warning("⚠️ Corrupt commit file %s: %s", path, exc)
401 return None
402
403
404 def overwrite_commit(repo_root: pathlib.Path, commit: CommitRecord) -> None:
405 """Overwrite an existing commit record on disk (e.g. for annotation updates).
406
407 Unlike :func:`write_commit`, this function always writes the record even if
408 the file already exists. Use only for annotation fields
409 (``reviewed_by``, ``test_runs``) that are semantically additive — never
410 for changing history (commit_id, parent, snapshot, message).
411
412 Args:
413 repo_root: Repository root.
414 commit: The updated commit record to persist.
415 """
416 _commits_dir(repo_root).mkdir(parents=True, exist_ok=True)
417 path = _commit_path(repo_root, commit.commit_id)
418 path.write_text(json.dumps(commit.to_dict(), indent=2) + "\n")
419 logger.debug("✅ Updated annotation on commit %s", commit.commit_id[:8])
420
421
422 def update_commit_metadata(
423 repo_root: pathlib.Path,
424 commit_id: str,
425 key: str,
426 value: str,
427 ) -> bool:
428 """Set a single string key in a commit's metadata dict.
429
430 Returns ``True`` on success, ``False`` if the commit is not found.
431 """
432 commit = read_commit(repo_root, commit_id)
433 if commit is None:
434 logger.warning("⚠️ Commit %s not found — cannot update metadata", commit_id[:8])
435 return False
436 commit.metadata[key] = value
437 path = _commit_path(repo_root, commit_id)
438 path.write_text(json.dumps(commit.to_dict(), indent=2) + "\n")
439 logger.debug("✅ Set %s=%r on commit %s", key, value, commit_id[:8])
440 return True
441
442
443 def get_head_commit_id(repo_root: pathlib.Path, branch: str) -> str | None:
444 """Return the commit ID at HEAD of *branch*, or ``None`` for an empty branch."""
445 ref_path = repo_root / ".muse" / "refs" / "heads" / branch
446 if not ref_path.exists():
447 return None
448 raw = ref_path.read_text().strip()
449 return raw if raw else None
450
451
452 def get_head_snapshot_id(
453 repo_root: pathlib.Path,
454 repo_id: str,
455 branch: str,
456 ) -> str | None:
457 """Return the snapshot_id at HEAD of *branch*, or ``None``."""
458 commit_id = get_head_commit_id(repo_root, branch)
459 if commit_id is None:
460 return None
461 commit = read_commit(repo_root, commit_id)
462 if commit is None:
463 return None
464 return commit.snapshot_id
465
466
467 def resolve_commit_ref(
468 repo_root: pathlib.Path,
469 repo_id: str,
470 branch: str,
471 ref: str | None,
472 ) -> CommitRecord | None:
473 """Resolve a commit reference to a ``CommitRecord``.
474
475 *ref* may be:
476 - ``None`` / ``"HEAD"`` — the most recent commit on *branch*.
477 - A full or abbreviated commit SHA — resolved by prefix scan.
478 """
479 if ref is None or ref.upper() == "HEAD":
480 commit_id = get_head_commit_id(repo_root, branch)
481 if commit_id is None:
482 return None
483 return read_commit(repo_root, commit_id)
484
485 # Try exact match
486 commit = read_commit(repo_root, ref)
487 if commit is not None:
488 return commit
489
490 # Prefix scan
491 return _find_commit_by_prefix(repo_root, ref)
492
493
494 def _find_commit_by_prefix(
495 repo_root: pathlib.Path, prefix: str
496 ) -> CommitRecord | None:
497 """Find the first commit whose ID starts with *prefix*."""
498 commits_dir = _commits_dir(repo_root)
499 if not commits_dir.exists():
500 return None
501 for path in commits_dir.glob(f"{prefix}*.json"):
502 try:
503 return CommitRecord.from_dict(json.loads(path.read_text()))
504 except (json.JSONDecodeError, KeyError):
505 continue
506 return None
507
508
509 def find_commits_by_prefix(
510 repo_root: pathlib.Path, prefix: str
511 ) -> list[CommitRecord]:
512 """Return all commits whose ID starts with *prefix*."""
513 commits_dir = _commits_dir(repo_root)
514 if not commits_dir.exists():
515 return []
516 results: list[CommitRecord] = []
517 for path in commits_dir.glob(f"{prefix}*.json"):
518 try:
519 results.append(CommitRecord.from_dict(json.loads(path.read_text())))
520 except (json.JSONDecodeError, KeyError):
521 continue
522 return results
523
524
525 def get_commits_for_branch(
526 repo_root: pathlib.Path,
527 repo_id: str,
528 branch: str,
529 ) -> list[CommitRecord]:
530 """Return all commits on *branch*, newest first, by walking the parent chain."""
531 commits: list[CommitRecord] = []
532 commit_id = get_head_commit_id(repo_root, branch)
533 seen: set[str] = set()
534 while commit_id and commit_id not in seen:
535 seen.add(commit_id)
536 commit = read_commit(repo_root, commit_id)
537 if commit is None:
538 break
539 commits.append(commit)
540 commit_id = commit.parent_commit_id
541 return commits
542
543
544 def get_all_commits(repo_root: pathlib.Path) -> list[CommitRecord]:
545 """Return all commits in the store (order not guaranteed)."""
546 commits_dir = _commits_dir(repo_root)
547 if not commits_dir.exists():
548 return []
549 results: list[CommitRecord] = []
550 for path in commits_dir.glob("*.json"):
551 try:
552 results.append(CommitRecord.from_dict(json.loads(path.read_text())))
553 except (json.JSONDecodeError, KeyError):
554 continue
555 return results
556
557
558 def walk_commits_between(
559 repo_root: pathlib.Path,
560 to_commit_id: str,
561 from_commit_id: str | None = None,
562 max_commits: int = 10_000,
563 ) -> list[CommitRecord]:
564 """Return commits reachable from *to_commit_id*, stopping before *from_commit_id*.
565
566 Walks the parent chain from *to_commit_id* backwards. Returns commits in
567 newest-first order (callers can reverse for oldest-first).
568
569 Args:
570 repo_root: Repository root.
571 to_commit_id: Inclusive end of the range.
572 from_commit_id: Exclusive start; ``None`` means walk to the initial commit.
573 max_commits: Safety cap.
574
575 Returns:
576 List of ``CommitRecord`` objects, newest first.
577 """
578 commits: list[CommitRecord] = []
579 seen: set[str] = set()
580 current_id: str | None = to_commit_id
581 while current_id and current_id not in seen and len(commits) < max_commits:
582 seen.add(current_id)
583 if current_id == from_commit_id:
584 break
585 commit = read_commit(repo_root, current_id)
586 if commit is None:
587 break
588 commits.append(commit)
589 current_id = commit.parent_commit_id
590 return commits
591
592
593 # ---------------------------------------------------------------------------
594 # Snapshot operations
595 # ---------------------------------------------------------------------------
596
597
598 def write_snapshot(repo_root: pathlib.Path, snapshot: SnapshotRecord) -> None:
599 """Persist a snapshot record to ``.muse/snapshots/<snapshot_id>.json``."""
600 _snapshots_dir(repo_root).mkdir(parents=True, exist_ok=True)
601 path = _snapshot_path(repo_root, snapshot.snapshot_id)
602 if path.exists():
603 logger.debug("⚠️ Snapshot %s already exists — skipped", snapshot.snapshot_id[:8])
604 return
605 path.write_text(json.dumps(snapshot.to_dict(), indent=2) + "\n")
606 logger.debug("✅ Stored snapshot %s (%d files)", snapshot.snapshot_id[:8], len(snapshot.manifest))
607
608
609 def read_snapshot(repo_root: pathlib.Path, snapshot_id: str) -> SnapshotRecord | None:
610 """Load a snapshot record by ID, or ``None`` if it does not exist."""
611 path = _snapshot_path(repo_root, snapshot_id)
612 if not path.exists():
613 return None
614 try:
615 return SnapshotRecord.from_dict(json.loads(path.read_text()))
616 except (json.JSONDecodeError, KeyError) as exc:
617 logger.warning("⚠️ Corrupt snapshot file %s: %s", path, exc)
618 return None
619
620
621 def get_commit_snapshot_manifest(
622 repo_root: pathlib.Path, commit_id: str
623 ) -> dict[str, str] | None:
624 """Return the file manifest for the snapshot attached to *commit_id*, or ``None``."""
625 commit = read_commit(repo_root, commit_id)
626 if commit is None:
627 logger.warning("⚠️ Commit %s not found", commit_id[:8])
628 return None
629 snapshot = read_snapshot(repo_root, commit.snapshot_id)
630 if snapshot is None:
631 logger.warning(
632 "⚠️ Snapshot %s referenced by commit %s not found",
633 commit.snapshot_id[:8],
634 commit_id[:8],
635 )
636 return None
637 return dict(snapshot.manifest)
638
639
640 def get_head_snapshot_manifest(
641 repo_root: pathlib.Path, repo_id: str, branch: str
642 ) -> dict[str, str] | None:
643 """Return the manifest of the most recent commit on *branch*, or ``None``."""
644 snapshot_id = get_head_snapshot_id(repo_root, repo_id, branch)
645 if snapshot_id is None:
646 return None
647 snapshot = read_snapshot(repo_root, snapshot_id)
648 if snapshot is None:
649 return None
650 return dict(snapshot.manifest)
651
652
653 def get_all_object_ids(repo_root: pathlib.Path, repo_id: str) -> list[str]:
654 """Return all object IDs referenced by any snapshot in this repo."""
655 object_ids: set[str] = set()
656 for commit in get_all_commits(repo_root):
657 snapshot = read_snapshot(repo_root, commit.snapshot_id)
658 if snapshot is not None:
659 object_ids.update(snapshot.manifest.values())
660 return sorted(object_ids)
661
662
663 # ---------------------------------------------------------------------------
664 # Tag operations
665 # ---------------------------------------------------------------------------
666
667
668 def write_tag(repo_root: pathlib.Path, tag: TagRecord) -> None:
669 """Persist a tag record to ``.muse/tags/<repo_id>/<tag_id>.json``."""
670 tags_dir = _tags_dir(repo_root, tag.repo_id)
671 tags_dir.mkdir(parents=True, exist_ok=True)
672 path = tags_dir / f"{tag.tag_id}.json"
673 path.write_text(json.dumps(tag.to_dict(), indent=2) + "\n")
674 logger.debug("✅ Stored tag %r on commit %s", tag.tag, tag.commit_id[:8])
675
676
677 def get_tags_for_commit(
678 repo_root: pathlib.Path, repo_id: str, commit_id: str
679 ) -> list[TagRecord]:
680 """Return all tags attached to *commit_id*."""
681 tags_dir = _tags_dir(repo_root, repo_id)
682 if not tags_dir.exists():
683 return []
684 results: list[TagRecord] = []
685 for path in tags_dir.glob("*.json"):
686 try:
687 record = TagRecord.from_dict(json.loads(path.read_text()))
688 if record.commit_id == commit_id:
689 results.append(record)
690 except (json.JSONDecodeError, KeyError):
691 continue
692 return results
693
694
695 def get_all_tags(repo_root: pathlib.Path, repo_id: str) -> list[TagRecord]:
696 """Return all tags in this repository."""
697 tags_dir = _tags_dir(repo_root, repo_id)
698 if not tags_dir.exists():
699 return []
700 results: list[TagRecord] = []
701 for path in tags_dir.glob("*.json"):
702 try:
703 results.append(TagRecord.from_dict(json.loads(path.read_text())))
704 except (json.JSONDecodeError, KeyError):
705 continue
706 return results
707
708
709 # ---------------------------------------------------------------------------
710 # Remote sync helpers (push/pull)
711 # ---------------------------------------------------------------------------
712
713
714 def store_pulled_commit(
715 repo_root: pathlib.Path, commit_data: RemoteCommitPayload
716 ) -> bool:
717 """Persist a commit received from a remote into local storage.
718
719 Idempotent — silently skips if the commit already exists. Returns
720 ``True`` if the row was newly written, ``False`` if it already existed.
721 """
722 commit_id = commit_data.get("commit_id") or ""
723 if not commit_id:
724 logger.warning("⚠️ store_pulled_commit: missing commit_id — skipping")
725 return False
726
727 if read_commit(repo_root, commit_id) is not None:
728 logger.debug("⚠️ Pulled commit %s already exists — skipped", commit_id[:8])
729 return False
730
731 commit_dict = CommitDict(
732 commit_id=commit_id,
733 repo_id=commit_data.get("repo_id") or "",
734 branch=commit_data.get("branch") or "",
735 snapshot_id=commit_data.get("snapshot_id") or "",
736 message=commit_data.get("message") or "",
737 committed_at=commit_data.get("committed_at") or "",
738 parent_commit_id=commit_data.get("parent_commit_id"),
739 parent2_commit_id=commit_data.get("parent2_commit_id"),
740 author=commit_data.get("author") or "",
741 metadata=dict(commit_data.get("metadata") or {}),
742 structured_delta=None,
743 )
744 write_commit(repo_root, CommitRecord.from_dict(commit_dict))
745
746 # Ensure a (possibly stub) snapshot record exists.
747 snapshot_id = commit_data.get("snapshot_id") or ""
748 if snapshot_id and read_snapshot(repo_root, snapshot_id) is None:
749 manifest: dict[str, str] = dict(commit_data.get("manifest") or {})
750 write_snapshot(repo_root, SnapshotRecord(
751 snapshot_id=snapshot_id,
752 manifest=manifest,
753 ))
754
755 return True
756
757
758 def store_pulled_object_metadata(
759 repo_root: pathlib.Path, object_data: dict[str, str]
760 ) -> bool:
761 """Register an object descriptor received from a remote.
762
763 The actual blob bytes are stored by ``object_store.write_object``.
764 This function records that the object is known (for GC and push-delta
765 computation). Currently a no-op since objects are content-addressed
766 files — presence in ``.muse/objects/`` is the ground truth.
767 """
768 return True