gabriel / muse public
domain.py python
952 lines 36.4 KB
95367f8d feat: implement muse rerere — reuse recorded conflict resolutions Gabriel Cardona <gabriel@tellurstori.com> 2d ago
1 """MuseDomainPlugin — the six-interface protocol that defines a Muse domain.
2
3 Muse provides the DAG engine, content-addressed object store, branching,
4 lineage walking, topological log graph, and merge base finder. A domain plugin
5 implements these six interfaces and Muse does the rest.
6
7 The MIDI plugin (``muse.plugins.midi``) is the reference implementation.
8 Every other domain — scientific simulation, genomics, 3D spatial design,
9 spacetime — is a new plugin.
10
11 Typed Delta Algebra
12 -------------------
13 ``StateDelta`` is a ``StructuredDelta`` carrying a typed operation list rather
14 than an opaque path list. Each operation knows its kind (insert / delete /
15 move / replace / patch), the address it touched, and a content-addressed ID
16 for the before/after content.
17
18 Domain Schema
19 -------------
20 ``schema()`` is the sixth protocol method. Plugins return a ``DomainSchema``
21 declaring their data structure. The core engine uses this declaration to drive
22 diff algorithm selection via :func:`~muse.core.diff_algorithms.diff_by_schema`.
23
24 Operational Transformation Merge
25 ---------------------------------
26 Plugins may optionally implement :class:`StructuredMergePlugin`, a sub-protocol
27 that adds ``merge_ops()``. When both branches have produced ``StructuredDelta``
28 from ``diff()``, the merge engine checks
29 ``isinstance(plugin, StructuredMergePlugin)`` and calls ``merge_ops()`` for
30 fine-grained, operation-level conflict detection. Non-supporting plugins fall
31 back to the existing file-level ``merge()`` path.
32
33 CRDT Convergent Merge
34 ---------------------
35 Plugins may optionally implement :class:`CRDTPlugin`, a sub-protocol that
36 replaces ``merge()`` with ``join()``. ``join`` always succeeds — no conflict
37 state ever exists. Given any two :class:`CRDTSnapshotManifest` values,
38 ``join`` produces a deterministic merged result regardless of message delivery
39 order.
40
41 The core engine detects ``CRDTPlugin`` via ``isinstance`` at merge time.
42 ``DomainSchema.merge_mode == "crdt"`` signals that the CRDT path should be
43 taken.
44 """
45
46 from __future__ import annotations
47
48 import pathlib
49 from dataclasses import dataclass, field
50 from typing import TYPE_CHECKING, Literal, NotRequired, Protocol, TypedDict, runtime_checkable
51
52 # Public re-exports so callers can do ``from muse.domain import MutateOp`` etc.
53 __all__ = [
54 "SnapshotManifest",
55 "DomainAddress",
56 "InsertOp",
57 "DeleteOp",
58 "MoveOp",
59 "ReplaceOp",
60 "FieldMutation",
61 "MutateOp",
62 "EntityProvenance",
63 "LeafDomainOp",
64 "PatchOp",
65 "DomainOp",
66 "SemVerBump",
67 "StructuredDelta",
68 "infer_sem_ver_bump",
69 "LiveState",
70 "StateSnapshot",
71 "StateDelta",
72 "ConflictRecord",
73 "MergeResult",
74 "DriftReport",
75 "MuseDomainPlugin",
76 "StructuredMergePlugin",
77 "CRDTSnapshotManifest",
78 "CRDTPlugin",
79 "RererePlugin",
80 ]
81
82 if TYPE_CHECKING:
83 from muse.core.schema import CRDTDimensionSpec, DomainSchema
84
85
86 # ---------------------------------------------------------------------------
87 # Snapshot types (unchanged from pre-Phase-1)
88 # ---------------------------------------------------------------------------
89
90
91 class SnapshotManifest(TypedDict):
92 """Content-addressed snapshot of domain state.
93
94 ``files`` maps workspace-relative POSIX paths to their SHA-256 content
95 digests. ``domain`` identifies which plugin produced this snapshot.
96 """
97
98 files: dict[str, str]
99 domain: str
100
101
102 # ---------------------------------------------------------------------------
103 # Typed delta algebra
104 # ---------------------------------------------------------------------------
105
106 #: A domain-specific address identifying a location within the state graph.
107 #: For file-level ops this is a workspace-relative POSIX path.
108 #: For sub-file ops this is a domain-specific coordinate (e.g. "note:42").
109 DomainAddress = str
110
111
112 class InsertOp(TypedDict):
113 """An element was inserted into a collection.
114
115 For ordered sequences ``position`` is the integer index at which the
116 element was inserted. For unordered sets ``position`` is ``None``.
117 ``content_id`` is the SHA-256 of the inserted content — either a blob
118 already in the object store (for file-level ops) or a deterministic hash
119 of the element's canonical serialisation (for sub-file ops).
120 """
121
122 op: Literal["insert"]
123 address: DomainAddress
124 position: int | None
125 content_id: str
126 content_summary: str
127
128
129 class DeleteOp(TypedDict):
130 """An element was removed from a collection.
131
132 ``position`` is the integer index that was removed for ordered sequences,
133 or ``None`` for unordered sets. ``content_id`` is the SHA-256 of the
134 deleted content so that the operation can be applied idempotently (already-
135 absent elements can be skipped). ``content_summary`` is the human-readable
136 description of what was removed, for ``muse show``.
137 """
138
139 op: Literal["delete"]
140 address: DomainAddress
141 position: int | None
142 content_id: str
143 content_summary: str
144
145
146 class MoveOp(TypedDict):
147 """An element was repositioned within an ordered sequence.
148
149 ``from_position`` is the source index (in the pre-move sequence) and
150 ``to_position`` is the destination index (in the post-move sequence).
151 Both are mandatory — moves are only meaningful in ordered collections.
152 ``content_id`` identifies the element being moved so that the operation
153 can be validated during replay.
154 """
155
156 op: Literal["move"]
157 address: DomainAddress
158 from_position: int
159 to_position: int
160 content_id: str
161
162
163 class ReplaceOp(TypedDict):
164 """An element's value changed (atomic, leaf-level replacement).
165
166 ``old_content_id`` and ``new_content_id`` are SHA-256 hashes of the
167 before- and after-content. They enable three-way merge engines to detect
168 concurrent conflicting modifications (both changed from the same
169 ``old_content_id`` to different ``new_content_id`` values).
170 ``old_summary`` and ``new_summary`` are human-readable strings for display,
171 analogous to ``content_summary`` on :class:`InsertOp`.
172 ``position`` is the index within the container (``None`` for unordered).
173 """
174
175 op: Literal["replace"]
176 address: DomainAddress
177 position: int | None
178 old_content_id: str
179 new_content_id: str
180 old_summary: str
181 new_summary: str
182
183
184 class FieldMutation(TypedDict):
185 """The string-serialised before/after of a single field in a :class:`MutateOp`.
186
187 Values are always strings so that typed primitives (int, float, bool) can
188 be compared uniformly without carrying domain-specific type information in
189 the generic delta algebra. Plugins format them according to their domain
190 conventions (e.g. ``"80"`` for a MIDI velocity, ``"C4"`` for a pitch name).
191 """
192
193 old: str
194 new: str
195
196
197 class MutateOp(TypedDict):
198 """A named entity's specific fields were updated.
199
200 Unlike :class:`ReplaceOp` — which replaces an entire element atomically —
201 ``MutateOp`` records *which* specific fields of a domain entity changed.
202 This enables mutation tracking for domains that maintain stable entity
203 identity separate from content equality.
204
205 Example: a MIDI note's velocity changed from 80 to 100. Under a pure
206 content-hash model that becomes ``DeleteOp + InsertOp`` (two different
207 content hashes). With ``MutateOp`` and a stable ``entity_id`` the diff
208 reports "velocity 80→100 on entity C4@bar4" — lineage is preserved.
209
210 ``entity_id``
211 Stable identifier for the mutated entity, assigned at first insertion
212 and reused across all subsequent mutations (regardless of content
213 changes).
214 ``fields``
215 Mapping from field name (e.g. ``"velocity"``, ``"start_tick"``) to a
216 :class:`FieldMutation` recording the serialised old and new values.
217 ``old_content_id`` / ``new_content_id``
218 SHA-256 of the full element state before and after the mutation,
219 enabling three-way merge conflict detection identical to
220 :class:`ReplaceOp`.
221 ``position``
222 Index within the containing ordered sequence (``None`` for unordered).
223 """
224
225 op: Literal["mutate"]
226 address: DomainAddress
227 entity_id: str
228 old_content_id: str
229 new_content_id: str
230 fields: dict[str, FieldMutation]
231 old_summary: str
232 new_summary: str
233 position: int | None
234
235
236 class EntityProvenance(TypedDict, total=False):
237 """Causal metadata attached to ops that create or modify tracked entities.
238
239 All fields are optional (``total=False``) because entity tracking is an
240 opt-in capability. Plugins that implement stable entity identity populate
241 these fields when constructing :class:`InsertOp`, :class:`MutateOp`, or
242 :class:`DeleteOp` entries. Consumers that do not understand entity
243 provenance can safely ignore them.
244
245 ``entity_id``
246 Stable domain-specific identifier for the entity (e.g. a UUID assigned
247 at the note's first insertion).
248 ``origin_op_id``
249 The ``op_id`` of the op that first created this entity.
250 ``last_modified_op_id``
251 The ``op_id`` of the most recent op that touched this entity.
252 ``created_at_commit``
253 Short-form commit ID where this entity was first introduced.
254 ``actor_id``
255 The agent or human identity that performed this op.
256 """
257
258 entity_id: str
259 origin_op_id: str
260 last_modified_op_id: str
261 created_at_commit: str
262 actor_id: str
263
264
265 #: The five non-recursive (leaf) operation types.
266 LeafDomainOp = InsertOp | DeleteOp | MoveOp | ReplaceOp | MutateOp
267
268
269 class PatchOp(TypedDict):
270 """A container element was internally modified.
271
272 ``address`` names the container (e.g. a file path). ``child_ops`` lists
273 the sub-element changes inside that container. These are always
274 leaf ops in the current implementation; true recursion via a nested
275 ``StructuredDelta`` is reserved for a future release.
276
277 ``child_domain`` identifies the sub-element domain (e.g. ``"midi_notes"``
278 for note-level ops inside a ``.mid`` file). ``child_summary`` is a
279 human-readable description of the child changes for ``muse show``.
280
281 When ``from_address`` is present the container was simultaneously renamed
282 (moved from that path) *and* modified. ``address`` is the new path;
283 ``from_address`` is the old path. The child_ops list contains the
284 semantic diff between the old and new symbol trees.
285 """
286
287 op: Literal["patch"]
288 address: DomainAddress
289 child_ops: list[DomainOp]
290 child_domain: str
291 child_summary: str
292 from_address: NotRequired[DomainAddress]
293
294
295 #: Union of all operation types — the atoms of a ``StructuredDelta``.
296 type DomainOp = LeafDomainOp | PatchOp
297
298
299 SemVerBump = Literal["major", "minor", "patch", "none"]
300 """Semantic version impact of a delta.
301
302 ``major`` Breaking change: public symbol deleted, renamed, or signature changed.
303 ``minor`` Additive: new public symbol inserted.
304 ``patch`` Implementation-only change: body changed, signature stable.
305 ``none`` No semantic change (formatting, whitespace, metadata only).
306 """
307
308 class StructuredDelta(TypedDict, total=False):
309 """Rich, composable delta between two domain snapshots.
310
311 ``ops`` is an ordered list of operations that transforms ``base`` into
312 ``target`` when applied in sequence. The core engine stores this alongside
313 commit records so that ``muse show`` and ``muse diff`` can display it
314 without reloading full blobs.
315
316 ``summary`` is a precomputed human-readable string — for example
317 ``"3 notes added, 1 note removed"``. Plugins compute it because only they
318 understand their domain semantics.
319
320 ``sem_ver_bump`` (v2, optional) is the semantic version impact of this
321 delta, computed by :func:`infer_sem_ver_bump`. Absent for legacy records
322 or non-code domains that do not compute it.
323
324 ``breaking_changes`` (v2, optional) lists the symbol addresses whose
325 public interface was removed or incompatibly changed.
326 """
327
328 domain: str
329 ops: list[DomainOp]
330 summary: str
331 sem_ver_bump: SemVerBump
332 breaking_changes: list[str]
333
334
335 # ---------------------------------------------------------------------------
336 # SemVer inference helper
337 # ---------------------------------------------------------------------------
338
339
340 def infer_sem_ver_bump(delta: "StructuredDelta") -> tuple[SemVerBump, list[str]]:
341 """Infer the semantic version bump and breaking-change list from a delta.
342
343 Reads the ``ops`` list and applies the following rules:
344
345 * Any public symbol (name not starting with ``_``) that is deleted or
346 renamed → **major** (breaking: callers will fail).
347 * Any public symbol whose ``signature_id`` changed (signature_only or
348 full_rewrite with new signature) → **major** (breaking: call-site
349 compatibility broken).
350 * Any public symbol inserted → **minor** (additive).
351 * Any symbol whose only change is the body (``impl_only``) → **patch**.
352 * No semantic ops → **none**.
353
354 Returns:
355 A ``(bump, breaking_changes)`` tuple where ``breaking_changes`` is a
356 sorted list of symbol addresses whose public contract changed.
357
358 This function is domain-agnostic; it relies on the op address format used
359 by code plugins (``<file>::<symbol>``) and the ``new_summary`` / ``old_summary``
360 conventions from :func:`~muse.plugins.code.symbol_diff.diff_symbol_trees`.
361 For non-code domains the heuristics may not apply — plugins should override
362 by setting ``sem_ver_bump`` directly when constructing the delta.
363 """
364 ops = delta.get("ops", [])
365 bump: SemVerBump = "none"
366 breaking: list[str] = []
367
368 def _is_public(address: str) -> bool:
369 """Return True if the innermost symbol name does not start with ``_``."""
370 parts = address.split("::")
371 name = parts[-1].split(".")[-1] if parts else ""
372 return not name.startswith("_")
373
374 def _promote(current: SemVerBump, candidate: SemVerBump) -> SemVerBump:
375 order: list[SemVerBump] = ["none", "patch", "minor", "major"]
376 return candidate if order.index(candidate) > order.index(current) else current
377
378 for op in ops:
379 op_type = op.get("op", "")
380 address = str(op.get("address", ""))
381
382 if op_type == "patch":
383 # Recurse into child_ops. We know op is a PatchOp here.
384 if op["op"] == "patch":
385 child_ops_raw: list[DomainOp] = op["child_ops"]
386 sub_delta: StructuredDelta = {"domain": "", "ops": child_ops_raw, "summary": ""}
387 sub_bump, sub_breaking = infer_sem_ver_bump(sub_delta)
388 bump = _promote(bump, sub_bump)
389 breaking.extend(sub_breaking)
390 continue
391
392 if not _is_public(address):
393 continue
394
395 if op_type == "delete":
396 bump = _promote(bump, "major")
397 breaking.append(address)
398
399 elif op_type == "insert":
400 bump = _promote(bump, "minor")
401
402 elif op_type == "replace":
403 new_summary: str = str(op.get("new_summary", ""))
404 old_summary: str = str(op.get("old_summary", ""))
405 if (
406 new_summary.startswith("renamed to ")
407 or "signature" in new_summary
408 or "signature" in old_summary
409 ):
410 bump = _promote(bump, "major")
411 breaking.append(address)
412 elif "implementation" in new_summary or "implementation" in old_summary:
413 bump = _promote(bump, "patch")
414 else:
415 bump = _promote(bump, "major")
416 breaking.append(address)
417
418 return bump, sorted(set(breaking))
419
420
421 # ---------------------------------------------------------------------------
422 # Type aliases used in the protocol signatures
423 # ---------------------------------------------------------------------------
424
425 #: Live state is either an already-snapshotted manifest dict or a workdir path.
426 #: The MIDI plugin accepts both: a Path (for CLI commit/status) and a
427 #: SnapshotManifest dict (for in-memory merge and diff operations).
428 type LiveState = SnapshotManifest | pathlib.Path
429
430 #: A content-addressed, immutable snapshot of state at a point in time.
431 type StateSnapshot = SnapshotManifest
432
433 #: The minimal change between two snapshots — a list of typed domain operations.
434 type StateDelta = StructuredDelta
435
436
437 # ---------------------------------------------------------------------------
438 # Merge and drift result types
439 # ---------------------------------------------------------------------------
440
441
442 @dataclass
443 class ConflictRecord:
444 """Structured conflict record in a merge result (v2 taxonomy).
445
446 ``path`` The workspace-relative file path in conflict.
447 ``conflict_type`` One of: ``symbol_edit_overlap``, ``rename_edit``,
448 ``move_edit``, ``delete_use``, ``dependency_conflict``,
449 ``file_level`` (legacy, no symbol info).
450 ``ours_summary`` Short description of ours-side change.
451 ``theirs_summary`` Short description of theirs-side change.
452 ``addresses`` Symbol addresses involved (empty for file-level).
453 """
454
455 path: str
456 conflict_type: str = "file_level"
457 ours_summary: str = ""
458 theirs_summary: str = ""
459 addresses: list[str] = field(default_factory=list)
460
461 def to_dict(self) -> dict[str, str | list[str]]:
462 return {
463 "path": self.path,
464 "conflict_type": self.conflict_type,
465 "ours_summary": self.ours_summary,
466 "theirs_summary": self.theirs_summary,
467 "addresses": self.addresses,
468 }
469
470
471 @dataclass
472 class MergeResult:
473 """Outcome of a three-way merge between two divergent state lines.
474
475 ``merged`` is the reconciled snapshot. ``conflicts`` is a list of
476 workspace-relative file paths that could not be auto-merged and require
477 manual resolution. An empty ``conflicts`` list means the merge was clean.
478 The CLI is responsible for formatting user-facing messages from these paths.
479
480 ``applied_strategies`` maps each path where a ``.museattributes`` rule
481 overrode the default conflict behaviour to the strategy that was applied.
482
483 ``dimension_reports`` maps conflicting paths to their per-dimension
484 resolution detail.
485
486 ``op_log`` is the ordered list of ``DomainOp`` entries applied to produce
487 the merged snapshot. Empty for file-level merges; populated by plugins
488 that implement operation-level OT merge.
489
490 ``conflict_records`` (v2) provides structured conflict metadata with a
491 semantic taxonomy per conflicting path. Populated by plugins that
492 implement :class:`StructuredMergePlugin`. May be empty even when
493 ``conflicts`` is non-empty (legacy file-level conflict).
494 """
495
496 merged: StateSnapshot
497 conflicts: list[str] = field(default_factory=list)
498 applied_strategies: dict[str, str] = field(default_factory=dict)
499 dimension_reports: dict[str, dict[str, str]] = field(default_factory=dict)
500 op_log: list[DomainOp] = field(default_factory=list)
501 conflict_records: list[ConflictRecord] = field(default_factory=list)
502
503 @property
504 def is_clean(self) -> bool:
505 """``True`` when no unresolvable conflicts remain."""
506 return len(self.conflicts) == 0
507
508
509 @dataclass
510 class DriftReport:
511 """Gap between committed state and current live state.
512
513 ``has_drift`` is ``True`` when the live state differs from the committed
514 snapshot. ``summary`` is a human-readable description of what changed.
515 ``delta`` is the machine-readable structured delta for programmatic consumers.
516 """
517
518 has_drift: bool
519 summary: str = ""
520 delta: StateDelta = field(default_factory=lambda: StructuredDelta(
521 domain="", ops=[], summary="working tree clean",
522 ))
523
524
525 # ---------------------------------------------------------------------------
526 # The plugin protocol
527 # ---------------------------------------------------------------------------
528
529
530 @runtime_checkable
531 class MuseDomainPlugin(Protocol):
532 """The six interfaces a domain plugin must implement.
533
534 Muse provides everything else: the DAG, branching, checkout, lineage
535 walking, ASCII log graph, and merge base finder. Implement these six
536 methods and your domain gets the full Muse VCS for free.
537
538 Music is the reference implementation (``muse.plugins.midi``).
539 """
540
541 def snapshot(self, live_state: LiveState) -> StateSnapshot:
542 """Capture current live state as a serialisable, hashable snapshot.
543
544 The returned ``SnapshotManifest`` must be JSON-serialisable. Muse will
545 compute a SHA-256 content address from the canonical JSON form and
546 store the snapshot as a blob in ``.muse/objects/``.
547
548 **``.museignore`` contract** — when *live_state* is a
549 ``pathlib.Path`` (the ``state/`` directory), domain plugin
550 implementations **must** honour ``.museignore`` by calling
551 :func:`muse.core.ignore.load_ignore_config` on the repository root,
552 then :func:`muse.core.ignore.resolve_patterns` with the active domain
553 name, and finally filtering paths with :func:`muse.core.ignore.is_ignored`.
554 Domain-specific patterns (``[domain.<name>]`` sections) are applied
555 only when the active domain matches.
556 """
557 ...
558
559 def diff(
560 self,
561 base: StateSnapshot,
562 target: StateSnapshot,
563 *,
564 repo_root: pathlib.Path | None = None,
565 ) -> StateDelta:
566 """Compute the structured delta between two snapshots.
567
568 Returns a ``StructuredDelta`` where ``ops`` is a minimal list of
569 typed operations that transforms ``base`` into ``target``. Plugins
570 should:
571
572 1. Compute ops at the finest granularity they can interpret.
573 2. Assign meaningful ``content_summary`` strings to each op.
574 3. When ``repo_root`` is provided, load sub-file content from the
575 object store and produce ``PatchOp`` entries with note/element-level
576 ``child_ops`` instead of coarse ``ReplaceOp`` entries.
577 4. Compute a human-readable ``summary`` across all ops.
578
579 The core engine stores this delta alongside the commit record so that
580 ``muse show`` and ``muse diff`` can display it without reloading blobs.
581 """
582 ...
583
584 def merge(
585 self,
586 base: StateSnapshot,
587 left: StateSnapshot,
588 right: StateSnapshot,
589 *,
590 repo_root: pathlib.Path | None = None,
591 ) -> MergeResult:
592 """Three-way merge two divergent state lines against a common base.
593
594 ``base`` is the common ancestor (merge base). ``left`` and ``right``
595 are the two divergent snapshots. Returns a ``MergeResult`` with the
596 reconciled snapshot and any unresolvable conflicts.
597
598 **``.museattributes`` and multidimensional merge contract** — when
599 *repo_root* is provided, domain plugin implementations should:
600
601 1. Load ``.museattributes`` via
602 :func:`muse.core.attributes.load_attributes`.
603 2. For each conflicting path, call
604 :func:`muse.core.attributes.resolve_strategy` with the relevant
605 dimension name (or ``"*"`` for file-level resolution).
606 3. Apply the returned strategy:
607
608 - ``"ours"`` — take the *left* version; remove from conflict list.
609 - ``"theirs"`` — take the *right* version; remove from conflict list.
610 - ``"manual"`` — force into conflict list even if the engine would
611 auto-resolve.
612 - ``"auto"`` / ``"union"`` — defer to the engine's default logic.
613
614 4. For domain formats that support true multidimensional content (e.g.
615 MIDI: notes, pitch_bend, cc_volume, track_structure), attempt
616 sub-file dimension merge before falling back to a file-level conflict.
617 """
618 ...
619
620 def drift(
621 self,
622 committed: StateSnapshot,
623 live: LiveState,
624 ) -> DriftReport:
625 """Compare committed state against current live state.
626
627 Used by ``muse status`` to detect uncommitted changes. Returns a
628 ``DriftReport`` describing whether the live state has diverged from
629 the last committed snapshot and, if so, by how much.
630 """
631 ...
632
633 def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState:
634 """Apply a delta to produce a new live state.
635
636 Used by ``muse checkout`` to reconstruct a historical state. Applies
637 ``delta`` on top of ``live_state`` and returns the resulting state.
638
639 For ``InsertOp`` and ``ReplaceOp``, the new content is identified by
640 ``content_id`` (a SHA-256 hash). When ``live_state`` is a
641 ``pathlib.Path``, the plugin reads the content from the object store.
642 When ``live_state`` is a ``SnapshotManifest``, only ``DeleteOp`` and
643 ``ReplaceOp`` at the file level can be applied in-memory.
644 """
645 ...
646
647 def schema(self) -> DomainSchema:
648 """Declare the structural schema of this domain's state.
649
650 The core engine calls this once at plugin registration time. Plugins
651 must return a stable, deterministic :class:`~muse.core.schema.DomainSchema`
652 describing:
653
654 - ``top_level`` — the primary collection structure (e.g. a set of
655 files, a map of chromosome names to sequences).
656 - ``dimensions`` — the semantic sub-dimensions of state (e.g. notes, pitch_bend, cc_volume, track_structure for MIDI).
657 - ``merge_mode`` — ``"three_way"`` (OT merge) or ``"crdt"`` (CRDT convergent join).
658
659 The schema drives :func:`~muse.core.diff_algorithms.diff_by_schema`
660 algorithm selection and the OT merge engine's conflict detection.
661
662 See :mod:`muse.core.schema` for all available element schema types.
663 """
664 ...
665
666
667 # ---------------------------------------------------------------------------
668 # Operational Transformation optional extension — structured (operation-level) merge
669 # ---------------------------------------------------------------------------
670
671
672 @runtime_checkable
673 class StructuredMergePlugin(MuseDomainPlugin, Protocol):
674 """Optional extension for plugins that support operation-level merging.
675
676 Plugins that implement this sub-protocol gain sub-file auto-merge: two
677 agents inserting notes at non-overlapping bars never produce a conflict,
678 because the merge engine reasons over ``DomainOp`` trees rather than file
679 paths.
680
681 The merge engine detects support at runtime via::
682
683 isinstance(plugin, StructuredMergePlugin)
684
685 Plugins that do not implement ``merge_ops`` fall back to the existing
686 file-level ``merge()`` path automatically — no changes required.
687
688 The :class:`~muse.plugins.midi.plugin.MidiPlugin` is the reference
689 implementation for OT-based merge.
690 """
691
692 def merge_ops(
693 self,
694 base: StateSnapshot,
695 ours_snap: StateSnapshot,
696 theirs_snap: StateSnapshot,
697 ours_ops: list[DomainOp],
698 theirs_ops: list[DomainOp],
699 *,
700 repo_root: pathlib.Path | None = None,
701 ) -> MergeResult:
702 """Merge two op lists against a common base using domain knowledge.
703
704 The core merge engine calls this when both branches have produced
705 ``StructuredDelta`` from ``diff()``. The plugin:
706
707 1. Calls :func:`muse.core.op_transform.merge_op_lists` to detect
708 conflicting ``DomainOp`` pairs.
709 2. For clean pairs, builds the merged ``SnapshotManifest`` by applying
710 the adjusted merged ops to *base*. The plugin uses *ours_snap* and
711 *theirs_snap* to look up the final content IDs for files touched only
712 by one side (necessary for ``PatchOp`` entries, which do not carry a
713 ``new_content_id`` directly).
714 3. For conflicting pairs, consults ``.museattributes`` (when
715 *repo_root* is provided) and either auto-resolves via the declared
716 strategy or adds the address to ``MergeResult.conflicts``.
717
718 Implementations must be domain-aware: a ``.museattributes`` rule of
719 ``merge=ours`` should take this plugin's understanding of "ours" (the
720 left branch content), not a raw file-level copy.
721
722 Args:
723 base: Common ancestor snapshot.
724 ours_snap: Final snapshot of our branch.
725 theirs_snap: Final snapshot of their branch.
726 ours_ops: Operations from our branch delta (base → ours).
727 theirs_ops: Operations from their branch delta (base → theirs).
728 repo_root: Repository root for ``.museattributes`` lookup.
729
730 Returns:
731 A :class:`MergeResult` with the reconciled snapshot and any
732 remaining unresolvable conflicts.
733 """
734 ...
735
736
737 # ---------------------------------------------------------------------------
738 # CRDT convergent merge — snapshot manifest and CRDTPlugin protocol
739 # ---------------------------------------------------------------------------
740
741
742 class CRDTSnapshotManifest(TypedDict):
743 """Extended snapshot manifest for CRDT-mode plugins.
744
745 Carries all the fields of a standard snapshot manifest plus CRDT-specific
746 metadata. The ``files`` mapping has the same semantics as
747 :class:`SnapshotManifest` — path → content hash. The additional fields
748 persist CRDT state between commits.
749
750 ``vclock`` records the causal state of the snapshot as a vector clock
751 ``{agent_id: event_count}``. It is used to detect concurrent writes and
752 to resolve LWW tiebreaks when two agents write at the same logical time.
753
754 ``crdt_state`` maps per-file-path CRDT state blobs to their SHA-256 hashes
755 in the object store. CRDT metadata (tombstones, RGA element IDs, OR-Set
756 tokens) lives here, separate from content hashes, so the content-addressed
757 store remains valid.
758
759 ``schema_version`` is the Muse package version (read from ``muse._version``).
760 """
761
762 files: dict[str, str]
763 domain: str
764 vclock: dict[str, int]
765 crdt_state: dict[str, str]
766 schema_version: str
767
768
769 @runtime_checkable
770 class CRDTPlugin(MuseDomainPlugin, Protocol):
771 """Optional extension for plugins that want convergent CRDT merge semantics.
772
773 Plugins implementing this protocol replace the three-way ``merge()`` with
774 a mathematical ``join()`` on a lattice. ``join`` always succeeds:
775
776 - **No conflict state ever exists.**
777 - Any two replicas that have received the same set of writes converge to
778 the same state, regardless of delivery order.
779 - Millions of agents can write concurrently without coordination.
780
781 The three lattice laws guaranteed by ``join``:
782
783 1. **Commutativity**: ``join(a, b) == join(b, a)``
784 2. **Associativity**: ``join(join(a, b), c) == join(a, join(b, c))``
785 3. **Idempotency**: ``join(a, a) == a``
786
787 The core engine detects support at runtime via::
788
789 isinstance(plugin, CRDTPlugin)
790
791 and routes to ``join`` when ``DomainSchema.merge_mode == "crdt"``.
792 Plugins that do not implement ``CRDTPlugin`` fall back to the existing
793 three-way ``merge()`` path.
794
795 Implementation checklist for plugin authors
796 -------------------------------------------
797 1. Override ``schema()`` to return a :class:`~muse.core.schema.DomainSchema`
798 with ``merge_mode="crdt"`` and :class:`~muse.core.schema.CRDTDimensionSpec`
799 for each CRDT dimension.
800 2. Implement ``crdt_schema()`` to declare which CRDT primitive maps to each
801 dimension.
802 3. Implement ``join(a, b)`` using the CRDT primitives in
803 :mod:`muse.core.crdts`.
804 4. Implement ``to_crdt_state(snapshot)`` to lift a plain snapshot into
805 CRDT state.
806 5. Implement ``from_crdt_state(crdt)`` to materialise a CRDT state back to
807 a plain snapshot for ``muse show`` and CLI display.
808 """
809
810 def crdt_schema(self) -> list[CRDTDimensionSpec]:
811 """Declare the CRDT type used for each dimension.
812
813 Returns a list of :class:`~muse.core.schema.CRDTDimensionSpec` — one
814 per dimension that uses CRDT semantics. Dimensions not listed here
815 fall back to three-way merge.
816
817 Returns:
818 List of CRDT dimension declarations.
819 """
820 ...
821
822 def join(
823 self,
824 a: CRDTSnapshotManifest,
825 b: CRDTSnapshotManifest,
826 ) -> CRDTSnapshotManifest:
827 """Merge two CRDT snapshots by computing their lattice join.
828
829 This operation is:
830
831 - Commutative: ``join(a, b) == join(b, a)``
832 - Associative: ``join(join(a, b), c) == join(a, join(b, c))``
833 - Idempotent: ``join(a, a) == a``
834
835 These three properties guarantee convergence regardless of message
836 order or delivery count.
837
838 The implementation should use the CRDT primitives in
839 :mod:`muse.core.crdts` (one primitive per declared CRDT dimension),
840 compute the per-dimension joins, then rebuild the ``files`` manifest
841 and ``vclock`` from the results.
842
843 Args:
844 a: First CRDT snapshot manifest.
845 b: Second CRDT snapshot manifest.
846
847 Returns:
848 A new :class:`CRDTSnapshotManifest` that is the join of *a* and *b*.
849 """
850 ...
851
852 def to_crdt_state(self, snapshot: StateSnapshot) -> CRDTSnapshotManifest:
853 """Lift a plain snapshot into CRDT state representation.
854
855 Called when importing a snapshot that was created before this plugin
856 opted into CRDT mode. The implementation should initialise fresh CRDT
857 primitives from the snapshot content, with an empty vector clock.
858
859 Args:
860 snapshot: A plain :class:`StateSnapshot` to lift.
861
862 Returns:
863 A :class:`CRDTSnapshotManifest` with the same content and empty
864 CRDT metadata (zero vector clock, empty ``crdt_state``).
865 """
866 ...
867
868 def from_crdt_state(self, crdt: CRDTSnapshotManifest) -> StateSnapshot:
869 """Materialise a CRDT state back to a plain snapshot.
870
871 Used by ``muse show``, ``muse status``, and CLI commands that need a
872 standard :class:`StateSnapshot` view of a CRDT-mode snapshot.
873
874 Args:
875 crdt: A :class:`CRDTSnapshotManifest` to materialise.
876
877 Returns:
878 A plain :class:`StateSnapshot` with the visible (non-tombstoned)
879 content.
880 """
881 ...
882
883
884 # ---------------------------------------------------------------------------
885 # Rerere optional extension — domain-aware conflict fingerprinting
886 # ---------------------------------------------------------------------------
887
888
889 @runtime_checkable
890 class RererePlugin(MuseDomainPlugin, Protocol):
891 """Optional extension for plugins that provide domain-aware rerere fingerprinting.
892
893 The default rerere fingerprint is::
894
895 SHA-256( min(ours_id, theirs_id) + ":" + max(ours_id, theirs_id) )
896
897 This is content-addressed and commutative, but it can fail to recognise
898 "the same conflict" when the two conflicting blobs differ by surrounding
899 context (e.g. tempo metadata in MIDI, or import ordering in code).
900
901 Plugins implementing this sub-protocol can return a richer fingerprint that
902 captures only the *semantically meaningful* parts of the conflict — allowing
903 rerere to recognise and replay resolutions across superficially different
904 but semantically identical conflicts.
905
906 The core engine detects support at runtime via::
907
908 isinstance(plugin, RererePlugin)
909
910 Plugins that do not implement this fall back to the default content
911 fingerprint automatically — no changes required to the core rerere engine.
912
913 Example: a MIDI plugin might compute::
914
915 SHA-256( sorted_note_events(ours) + ":" + sorted_note_events(theirs) )
916
917 so that a re-timed MIDI file that otherwise has the same musical conflict
918 matches an existing rerere record.
919 """
920
921 def conflict_fingerprint(
922 self,
923 path: str,
924 ours_id: str,
925 theirs_id: str,
926 repo_root: pathlib.Path,
927 ) -> str:
928 """Return a stable fingerprint identifying this conflict's semantic shape.
929
930 The returned string must be exactly 64 lowercase hex characters (the
931 same format as a SHA-256 digest). If the implementation cannot produce
932 a valid fingerprint (e.g. the blob is not in the local store), it should
933 raise an exception — the caller will fall back to the default fingerprint.
934
935 The result must be:
936
937 - **Deterministic**: the same inputs always produce the same fingerprint.
938 - **Commutative**: ``fingerprint(p, a, b, root) == fingerprint(p, b, a, root)``
939 — the order of ours/theirs must not matter.
940 - **Collision-resistant**: different conflicts should produce different
941 fingerprints with high probability.
942
943 Args:
944 path: Workspace-relative POSIX path of the conflicting file.
945 ours_id: SHA-256 object ID of the "ours" blob.
946 theirs_id: SHA-256 object ID of the "theirs" blob.
947 repo_root: Repository root for loading blob content from the store.
948
949 Returns:
950 64-char lowercase hex fingerprint.
951 """
952 ...