cgcardona / muse public
domain.py python
627 lines 24.2 KB
4e224376 feat(phase-4): CRDT semantics for convergent multi-agent writes (691 te… Gabriel Cardona <cgcardona@gmail.com> 2d ago
1 """MuseDomainPlugin — the six-interface protocol that defines a Muse domain.
2
3 Muse provides the DAG engine, content-addressed object store, branching,
4 lineage walking, topological log graph, and merge base finder. A domain plugin
5 implements these six interfaces and Muse does the rest.
6
7 The music plugin (``muse.plugins.music``) is the reference implementation.
8 Every other domain — scientific simulation, genomics, 3D spatial design,
9 spacetime — is a new plugin.
10
11 Phase 1 — Typed Delta Algebra
12 ------------------------------
13 ``StateDelta`` is now a ``StructuredDelta`` carrying a typed operation list
14 rather than the old opaque ``{added, removed, modified}`` path lists. Each
15 operation knows its kind (insert / delete / move / replace / patch), the
16 address it touched, and a content-addressed ID for the before/after content.
17
18 This replaces ``DeltaManifest`` entirely. Plugins that previously returned
19 ``DeltaManifest`` must now return ``StructuredDelta``.
20
21 Phase 2 — Domain Schema & Diff Algorithm Library
22 -------------------------------------------------
23 ``schema()`` is now the sixth protocol method. Plugins return a
24 ``DomainSchema`` declaring their data structure. The core engine uses this
25 declaration to drive diff algorithm selection via
26 :func:`~muse.core.diff_algorithms.diff_by_schema`, and the merge engine
27 (Phase 3) will use it for informed conflict detection.
28
29 Phase 3 — Operation-Level Merge Engine
30 ---------------------------------------
31 Plugins may optionally implement :class:`StructuredMergePlugin`, a sub-protocol
32 that adds ``merge_ops()``. When both branches have produced ``StructuredDelta``
33 from ``diff()``, the merge engine checks
34 ``isinstance(plugin, StructuredMergePlugin)`` and calls ``merge_ops()`` for
35 fine-grained, operation-level conflict detection. Non-supporting plugins fall
36 back to the existing file-level ``merge()`` path.
37
38 Phase 4 — CRDT Semantics for Convergent Multi-Agent Writes
39 ------------------------------------------------------------
40 Plugins may optionally implement :class:`CRDTPlugin`, a sub-protocol that
41 replaces ``merge()`` with ``join()``. ``join`` always succeeds — no conflict
42 state ever exists. This is the endgame for high-throughput multi-agent
43 scenarios: given any two :class:`CRDTSnapshotManifest` values, ``join``
44 produces a deterministic merged result regardless of message delivery order.
45
46 The core engine detects ``CRDTPlugin`` via ``isinstance`` at merge time.
47 ``DomainSchema.merge_mode == "crdt"`` signals that the CRDT path should be
48 taken.
49 """
50 from __future__ import annotations
51
52 import pathlib
53 from dataclasses import dataclass, field
54 from typing import TYPE_CHECKING, Literal, Protocol, TypedDict, runtime_checkable
55
56 if TYPE_CHECKING:
57 from muse.core.schema import CRDTDimensionSpec, DomainSchema
58
59
60 # ---------------------------------------------------------------------------
61 # Snapshot types (unchanged from pre-Phase-1)
62 # ---------------------------------------------------------------------------
63
64
65 class SnapshotManifest(TypedDict):
66 """Content-addressed snapshot of domain state.
67
68 ``files`` maps workspace-relative POSIX paths to their SHA-256 content
69 digests. ``domain`` identifies which plugin produced this snapshot.
70 """
71
72 files: dict[str, str]
73 domain: str
74
75
76 # ---------------------------------------------------------------------------
77 # Typed delta algebra — Phase 1
78 # ---------------------------------------------------------------------------
79
80 #: A domain-specific address identifying a location within the state graph.
81 #: For file-level ops this is a workspace-relative POSIX path.
82 #: For sub-file ops this is a domain-specific coordinate (e.g. "note:42").
83 DomainAddress = str
84
85
86 class InsertOp(TypedDict):
87 """An element was inserted into a collection.
88
89 For ordered sequences ``position`` is the integer index at which the
90 element was inserted. For unordered sets ``position`` is ``None``.
91 ``content_id`` is the SHA-256 of the inserted content — either a blob
92 already in the object store (for file-level ops) or a deterministic hash
93 of the element's canonical serialisation (for sub-file ops).
94 """
95
96 op: Literal["insert"]
97 address: DomainAddress
98 position: int | None
99 content_id: str
100 content_summary: str
101
102
103 class DeleteOp(TypedDict):
104 """An element was removed from a collection.
105
106 ``position`` is the integer index that was removed for ordered sequences,
107 or ``None`` for unordered sets. ``content_id`` is the SHA-256 of the
108 deleted content so that the operation can be applied idempotently (already-
109 absent elements can be skipped). ``content_summary`` is the human-readable
110 description of what was removed, for ``muse show``.
111 """
112
113 op: Literal["delete"]
114 address: DomainAddress
115 position: int | None
116 content_id: str
117 content_summary: str
118
119
120 class MoveOp(TypedDict):
121 """An element was repositioned within an ordered sequence.
122
123 ``from_position`` is the source index (in the pre-move sequence) and
124 ``to_position`` is the destination index (in the post-move sequence).
125 Both are mandatory — moves are only meaningful in ordered collections.
126 ``content_id`` identifies the element being moved so that the operation
127 can be validated during replay.
128 """
129
130 op: Literal["move"]
131 address: DomainAddress
132 from_position: int
133 to_position: int
134 content_id: str
135
136
137 class ReplaceOp(TypedDict):
138 """An element's value changed (atomic, leaf-level replacement).
139
140 ``old_content_id`` and ``new_content_id`` are SHA-256 hashes of the
141 before- and after-content. They enable three-way merge engines to detect
142 concurrent conflicting modifications (both changed from the same
143 ``old_content_id`` to different ``new_content_id`` values).
144 ``old_summary`` and ``new_summary`` are human-readable strings for display,
145 analogous to ``content_summary`` on :class:`InsertOp`.
146 ``position`` is the index within the container (``None`` for unordered).
147 """
148
149 op: Literal["replace"]
150 address: DomainAddress
151 position: int | None
152 old_content_id: str
153 new_content_id: str
154 old_summary: str
155 new_summary: str
156
157
158 #: The four non-recursive (leaf) operation types.
159 LeafDomainOp = InsertOp | DeleteOp | MoveOp | ReplaceOp
160
161
162 class PatchOp(TypedDict):
163 """A container element was internally modified.
164
165 ``address`` names the container (e.g. a file path). ``child_ops`` lists
166 the sub-element changes inside that container. In Phase 1 these are always
167 leaf ops. Phase 3 will introduce true recursion via a nested
168 ``StructuredDelta`` when the operation-level merge engine requires it.
169
170 ``child_domain`` identifies the sub-element domain (e.g. ``"midi_notes"``
171 for note-level ops inside a ``.mid`` file). ``child_summary`` is a
172 human-readable description of the child changes for ``muse show``.
173 """
174
175 op: Literal["patch"]
176 address: DomainAddress
177 child_ops: list[DomainOp]
178 child_domain: str
179 child_summary: str
180
181
182 #: Union of all operation types — the atoms of a ``StructuredDelta``.
183 DomainOp = LeafDomainOp | PatchOp
184
185
186 class StructuredDelta(TypedDict):
187 """Rich, composable delta between two domain snapshots.
188
189 ``ops`` is an ordered list of operations that transforms ``base`` into
190 ``target`` when applied in sequence. The core engine stores this alongside
191 commit records so that ``muse show`` and ``muse diff`` can display it
192 without reloading full blobs.
193
194 ``summary`` is a precomputed human-readable string — for example
195 ``"3 notes added, 1 note removed"``. Plugins compute it because only they
196 understand their domain semantics.
197 """
198
199 domain: str
200 ops: list[DomainOp]
201 summary: str
202
203
204 # ---------------------------------------------------------------------------
205 # Type aliases used in the protocol signatures
206 # ---------------------------------------------------------------------------
207
208 #: Live state is either an already-snapshotted manifest dict or a workdir path.
209 #: The music plugin accepts both: a Path (for CLI commit/status) and a
210 #: SnapshotManifest dict (for in-memory merge and diff operations).
211 LiveState = SnapshotManifest | pathlib.Path
212
213 #: A content-addressed, immutable snapshot of state at a point in time.
214 StateSnapshot = SnapshotManifest
215
216 #: The minimal change between two snapshots — a list of typed domain operations.
217 StateDelta = StructuredDelta
218
219
220 # ---------------------------------------------------------------------------
221 # Merge and drift result types
222 # ---------------------------------------------------------------------------
223
224
225 @dataclass
226 class MergeResult:
227 """Outcome of a three-way merge between two divergent state lines.
228
229 ``merged`` is the reconciled snapshot. ``conflicts`` is a list of
230 workspace-relative file paths that could not be auto-merged and require
231 manual resolution. An empty ``conflicts`` list means the merge was clean.
232 The CLI is responsible for formatting user-facing messages from these paths.
233
234 ``applied_strategies`` maps each path where a ``.museattributes`` rule
235 overrode the default conflict behaviour to the strategy that was applied.
236
237 ``dimension_reports`` maps conflicting paths to their per-dimension
238 resolution detail.
239
240 ``op_log`` is the ordered list of ``DomainOp`` entries applied to produce
241 the merged snapshot. Empty for file-level merges; populated by plugins
242 that implement operation-level merge (Phase 3).
243 """
244
245 merged: StateSnapshot
246 conflicts: list[str] = field(default_factory=list)
247 applied_strategies: dict[str, str] = field(default_factory=dict)
248 dimension_reports: dict[str, dict[str, str]] = field(default_factory=dict)
249 op_log: list[DomainOp] = field(default_factory=list)
250
251 @property
252 def is_clean(self) -> bool:
253 """``True`` when no unresolvable conflicts remain."""
254 return len(self.conflicts) == 0
255
256
257 @dataclass
258 class DriftReport:
259 """Gap between committed state and current live state.
260
261 ``has_drift`` is ``True`` when the live state differs from the committed
262 snapshot. ``summary`` is a human-readable description of what changed.
263 ``delta`` is the machine-readable structured delta for programmatic consumers.
264 """
265
266 has_drift: bool
267 summary: str = ""
268 delta: StateDelta = field(default_factory=lambda: StructuredDelta(
269 domain="", ops=[], summary="working tree clean",
270 ))
271
272
273 # ---------------------------------------------------------------------------
274 # The plugin protocol
275 # ---------------------------------------------------------------------------
276
277
278 @runtime_checkable
279 class MuseDomainPlugin(Protocol):
280 """The six interfaces a domain plugin must implement.
281
282 Muse provides everything else: the DAG, branching, checkout, lineage
283 walking, ASCII log graph, and merge base finder. Implement these six
284 methods and your domain gets the full Muse VCS for free.
285
286 Music is the reference implementation (``muse.plugins.music``).
287 """
288
289 def snapshot(self, live_state: LiveState) -> StateSnapshot:
290 """Capture current live state as a serialisable, hashable snapshot.
291
292 The returned ``SnapshotManifest`` must be JSON-serialisable. Muse will
293 compute a SHA-256 content address from the canonical JSON form and
294 store the snapshot as a blob in ``.muse/objects/``.
295
296 **``.museignore`` contract** — when *live_state* is a
297 ``pathlib.Path`` (the ``muse-work/`` directory), domain plugin
298 implementations **must** honour ``.museignore`` by calling
299 :func:`muse.core.ignore.load_patterns` on the repository root and
300 filtering out paths matched by :func:`muse.core.ignore.is_ignored`.
301 """
302 ...
303
304 def diff(
305 self,
306 base: StateSnapshot,
307 target: StateSnapshot,
308 *,
309 repo_root: pathlib.Path | None = None,
310 ) -> StateDelta:
311 """Compute the structured delta between two snapshots.
312
313 Returns a ``StructuredDelta`` where ``ops`` is a minimal list of
314 typed operations that transforms ``base`` into ``target``. Plugins
315 should:
316
317 1. Compute ops at the finest granularity they can interpret.
318 2. Assign meaningful ``content_summary`` strings to each op.
319 3. When ``repo_root`` is provided, load sub-file content from the
320 object store and produce ``PatchOp`` entries with note/element-level
321 ``child_ops`` instead of coarse ``ReplaceOp`` entries.
322 4. Compute a human-readable ``summary`` across all ops.
323
324 The core engine stores this delta alongside the commit record so that
325 ``muse show`` and ``muse diff`` can display it without reloading blobs.
326 """
327 ...
328
329 def merge(
330 self,
331 base: StateSnapshot,
332 left: StateSnapshot,
333 right: StateSnapshot,
334 *,
335 repo_root: pathlib.Path | None = None,
336 ) -> MergeResult:
337 """Three-way merge two divergent state lines against a common base.
338
339 ``base`` is the common ancestor (merge base). ``left`` and ``right``
340 are the two divergent snapshots. Returns a ``MergeResult`` with the
341 reconciled snapshot and any unresolvable conflicts.
342
343 **``.museattributes`` and multidimensional merge contract** — when
344 *repo_root* is provided, domain plugin implementations should:
345
346 1. Load ``.museattributes`` via
347 :func:`muse.core.attributes.load_attributes`.
348 2. For each conflicting path, call
349 :func:`muse.core.attributes.resolve_strategy` with the relevant
350 dimension name (or ``"*"`` for file-level resolution).
351 3. Apply the returned strategy:
352
353 - ``"ours"`` — take the *left* version; remove from conflict list.
354 - ``"theirs"`` — take the *right* version; remove from conflict list.
355 - ``"manual"`` — force into conflict list even if the engine would
356 auto-resolve.
357 - ``"auto"`` / ``"union"`` — defer to the engine's default logic.
358
359 4. For domain formats that support true multidimensional content (e.g.
360 MIDI: melodic, rhythmic, harmonic, dynamic, structural), attempt
361 sub-file dimension merge before falling back to a file-level conflict.
362 """
363 ...
364
365 def drift(
366 self,
367 committed: StateSnapshot,
368 live: LiveState,
369 ) -> DriftReport:
370 """Compare committed state against current live state.
371
372 Used by ``muse status`` to detect uncommitted changes. Returns a
373 ``DriftReport`` describing whether the live state has diverged from
374 the last committed snapshot and, if so, by how much.
375 """
376 ...
377
378 def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState:
379 """Apply a delta to produce a new live state.
380
381 Used by ``muse checkout`` to reconstruct a historical state. Applies
382 ``delta`` on top of ``live_state`` and returns the resulting state.
383
384 For ``InsertOp`` and ``ReplaceOp``, the new content is identified by
385 ``content_id`` (a SHA-256 hash). When ``live_state`` is a
386 ``pathlib.Path``, the plugin reads the content from the object store.
387 When ``live_state`` is a ``SnapshotManifest``, only ``DeleteOp`` and
388 ``ReplaceOp`` at the file level can be applied in-memory.
389 """
390 ...
391
392 def schema(self) -> DomainSchema:
393 """Declare the structural schema of this domain's state.
394
395 The core engine calls this once at plugin registration time. Plugins
396 must return a stable, deterministic :class:`~muse.core.schema.DomainSchema`
397 describing:
398
399 - ``top_level`` — the primary collection structure (e.g. a set of
400 files, a map of chromosome names to sequences).
401 - ``dimensions`` — the semantic sub-dimensions of state (e.g. melodic,
402 harmonic, dynamic, structural for music).
403 - ``merge_mode`` — ``"three_way"`` (Phases 1–3) or ``"crdt"`` (Phase 4).
404
405 The schema drives :func:`~muse.core.diff_algorithms.diff_by_schema`
406 algorithm selection and the Phase 3 merge engine's conflict detection.
407
408 See :mod:`muse.core.schema` for all available element schema types.
409 """
410 ...
411
412
413 # ---------------------------------------------------------------------------
414 # Phase 3 optional extension — structured (operation-level) merge
415 # ---------------------------------------------------------------------------
416
417
418 @runtime_checkable
419 class StructuredMergePlugin(MuseDomainPlugin, Protocol):
420 """Optional extension for plugins that support operation-level merging.
421
422 Plugins that implement this sub-protocol gain sub-file auto-merge: two
423 agents inserting notes at non-overlapping bars never produce a conflict,
424 because the merge engine reasons over ``DomainOp`` trees rather than file
425 paths.
426
427 The merge engine detects support at runtime via::
428
429 isinstance(plugin, StructuredMergePlugin)
430
431 Plugins that do not implement ``merge_ops`` fall back to the existing
432 file-level ``merge()`` path automatically — no changes required.
433
434 The :class:`~muse.plugins.music.plugin.MusicPlugin` is the reference
435 implementation for Phase 3.
436 """
437
438 def merge_ops(
439 self,
440 base: StateSnapshot,
441 ours_snap: StateSnapshot,
442 theirs_snap: StateSnapshot,
443 ours_ops: list[DomainOp],
444 theirs_ops: list[DomainOp],
445 *,
446 repo_root: pathlib.Path | None = None,
447 ) -> MergeResult:
448 """Merge two op lists against a common base using domain knowledge.
449
450 The core merge engine calls this when both branches have produced
451 ``StructuredDelta`` from ``diff()``. The plugin:
452
453 1. Calls :func:`muse.core.op_transform.merge_op_lists` to detect
454 conflicting ``DomainOp`` pairs.
455 2. For clean pairs, builds the merged ``SnapshotManifest`` by applying
456 the adjusted merged ops to *base*. The plugin uses *ours_snap* and
457 *theirs_snap* to look up the final content IDs for files touched only
458 by one side (necessary for ``PatchOp`` entries, which do not carry a
459 ``new_content_id`` directly).
460 3. For conflicting pairs, consults ``.museattributes`` (when
461 *repo_root* is provided) and either auto-resolves via the declared
462 strategy or adds the address to ``MergeResult.conflicts``.
463
464 Implementations must be domain-aware: a ``.museattributes`` rule of
465 ``merge=ours`` should take this plugin's understanding of "ours" (the
466 left branch content), not a raw file-level copy.
467
468 Args:
469 base: Common ancestor snapshot.
470 ours_snap: Final snapshot of our branch.
471 theirs_snap: Final snapshot of their branch.
472 ours_ops: Operations from our branch delta (base → ours).
473 theirs_ops: Operations from their branch delta (base → theirs).
474 repo_root: Repository root for ``.museattributes`` lookup.
475
476 Returns:
477 A :class:`MergeResult` with the reconciled snapshot and any
478 remaining unresolvable conflicts.
479 """
480 ...
481
482
483 # ---------------------------------------------------------------------------
484 # Phase 4 — CRDT snapshot manifest and CRDTPlugin protocol
485 # ---------------------------------------------------------------------------
486
487
488 class CRDTSnapshotManifest(TypedDict):
489 """Extended snapshot manifest for CRDT-mode plugins.
490
491 Carries all the fields of a standard snapshot manifest plus CRDT-specific
492 metadata. The ``files`` mapping has the same semantics as
493 :class:`SnapshotManifest` — path → content hash. The additional fields
494 persist CRDT state between commits.
495
496 ``vclock`` records the causal state of the snapshot as a vector clock
497 ``{agent_id: event_count}``. It is used to detect concurrent writes and
498 to resolve LWW tiebreaks when two agents write at the same logical time.
499
500 ``crdt_state`` maps per-file-path CRDT state blobs to their SHA-256 hashes
501 in the object store. CRDT metadata (tombstones, RGA element IDs, OR-Set
502 tokens) lives here, separate from content hashes, so the content-addressed
503 store remains valid.
504
505 ``schema_version`` is always ``1`` for Phase 4.
506 """
507
508 files: dict[str, str]
509 domain: str
510 vclock: dict[str, int]
511 crdt_state: dict[str, str]
512 schema_version: Literal[1]
513
514
515 @runtime_checkable
516 class CRDTPlugin(MuseDomainPlugin, Protocol):
517 """Optional extension for plugins that want convergent CRDT merge semantics.
518
519 Plugins implementing this protocol replace the three-way ``merge()`` with
520 a mathematical ``join()`` on a lattice. ``join`` always succeeds:
521
522 - **No conflict state ever exists.**
523 - Any two replicas that have received the same set of writes converge to
524 the same state, regardless of delivery order.
525 - Millions of agents can write concurrently without coordination.
526
527 The three lattice laws guaranteed by ``join``:
528
529 1. **Commutativity**: ``join(a, b) == join(b, a)``
530 2. **Associativity**: ``join(join(a, b), c) == join(a, join(b, c))``
531 3. **Idempotency**: ``join(a, a) == a``
532
533 The core engine detects support at runtime via::
534
535 isinstance(plugin, CRDTPlugin)
536
537 and routes to ``join`` when ``DomainSchema.merge_mode == "crdt"``.
538 Plugins that do not implement ``CRDTPlugin`` fall back to the existing
539 three-way ``merge()`` path.
540
541 Implementation checklist for plugin authors
542 -------------------------------------------
543 1. Override ``schema()`` to return a :class:`~muse.core.schema.DomainSchema`
544 with ``merge_mode="crdt"`` and :class:`~muse.core.schema.CRDTDimensionSpec`
545 for each CRDT dimension.
546 2. Implement ``crdt_schema()`` to declare which CRDT primitive maps to each
547 dimension.
548 3. Implement ``join(a, b)`` using the CRDT primitives in
549 :mod:`muse.core.crdts`.
550 4. Implement ``to_crdt_state(snapshot)`` to lift a plain snapshot into
551 CRDT state.
552 5. Implement ``from_crdt_state(crdt)`` to materialise a CRDT state back to
553 a plain snapshot for ``muse show`` and CLI display.
554 """
555
556 def crdt_schema(self) -> list[CRDTDimensionSpec]:
557 """Declare the CRDT type used for each dimension.
558
559 Returns a list of :class:`~muse.core.schema.CRDTDimensionSpec` — one
560 per dimension that uses CRDT semantics. Dimensions not listed here
561 fall back to three-way merge.
562
563 Returns:
564 List of CRDT dimension declarations.
565 """
566 ...
567
568 def join(
569 self,
570 a: CRDTSnapshotManifest,
571 b: CRDTSnapshotManifest,
572 ) -> CRDTSnapshotManifest:
573 """Merge two CRDT snapshots by computing their lattice join.
574
575 This operation is:
576
577 - Commutative: ``join(a, b) == join(b, a)``
578 - Associative: ``join(join(a, b), c) == join(a, join(b, c))``
579 - Idempotent: ``join(a, a) == a``
580
581 These three properties guarantee convergence regardless of message
582 order or delivery count.
583
584 The implementation should use the CRDT primitives in
585 :mod:`muse.core.crdts` (one primitive per declared CRDT dimension),
586 compute the per-dimension joins, then rebuild the ``files`` manifest
587 and ``vclock`` from the results.
588
589 Args:
590 a: First CRDT snapshot manifest.
591 b: Second CRDT snapshot manifest.
592
593 Returns:
594 A new :class:`CRDTSnapshotManifest` that is the join of *a* and *b*.
595 """
596 ...
597
598 def to_crdt_state(self, snapshot: StateSnapshot) -> CRDTSnapshotManifest:
599 """Lift a plain snapshot into CRDT state representation.
600
601 Called when importing a snapshot that was created before this plugin
602 opted into CRDT mode. The implementation should initialise fresh CRDT
603 primitives from the snapshot content, with an empty vector clock.
604
605 Args:
606 snapshot: A plain :class:`StateSnapshot` to lift.
607
608 Returns:
609 A :class:`CRDTSnapshotManifest` with the same content and empty
610 CRDT metadata (zero vector clock, empty ``crdt_state``).
611 """
612 ...
613
614 def from_crdt_state(self, crdt: CRDTSnapshotManifest) -> StateSnapshot:
615 """Materialise a CRDT state back to a plain snapshot.
616
617 Used by ``muse show``, ``muse status``, and CLI commands that need a
618 standard :class:`StateSnapshot` view of a CRDT-mode snapshot.
619
620 Args:
621 crdt: A :class:`CRDTSnapshotManifest` to materialise.
622
623 Returns:
624 A plain :class:`StateSnapshot` with the visible (non-tombstoned)
625 content.
626 """
627 ...