cgcardona / muse public
domain.py python
444 lines 16.9 KB
53d2d9ce feat(phase-3): operation-level merge engine — OT-based auto-merge for n… Gabriel Cardona <cgcardona@gmail.com> 2d ago
1 """MuseDomainPlugin — the six-interface protocol that defines a Muse domain.
2
3 Muse provides the DAG engine, content-addressed object store, branching,
4 lineage walking, topological log graph, and merge base finder. A domain plugin
5 implements these six interfaces and Muse does the rest.
6
7 The music plugin (``muse.plugins.music``) is the reference implementation.
8 Every other domain — scientific simulation, genomics, 3D spatial design,
9 spacetime — is a new plugin.
10
11 Phase 1 — Typed Delta Algebra
12 ------------------------------
13 ``StateDelta`` is now a ``StructuredDelta`` carrying a typed operation list
14 rather than the old opaque ``{added, removed, modified}`` path lists. Each
15 operation knows its kind (insert / delete / move / replace / patch), the
16 address it touched, and a content-addressed ID for the before/after content.
17
18 This replaces ``DeltaManifest`` entirely. Plugins that previously returned
19 ``DeltaManifest`` must now return ``StructuredDelta``.
20
21 Phase 2 — Domain Schema & Diff Algorithm Library
22 -------------------------------------------------
23 ``schema()`` is now the sixth protocol method. Plugins return a
24 ``DomainSchema`` declaring their data structure. The core engine uses this
25 declaration to drive diff algorithm selection via
26 :func:`~muse.core.diff_algorithms.diff_by_schema`, and the merge engine
27 (Phase 3) will use it for informed conflict detection.
28
29 Phase 3 — Operation-Level Merge Engine
30 ---------------------------------------
31 Plugins may optionally implement :class:`StructuredMergePlugin`, a sub-protocol
32 that adds ``merge_ops()``. When both branches have produced ``StructuredDelta``
33 from ``diff()``, the merge engine checks
34 ``isinstance(plugin, StructuredMergePlugin)`` and calls ``merge_ops()`` for
35 fine-grained, operation-level conflict detection. Non-supporting plugins fall
36 back to the existing file-level ``merge()`` path.
37 """
38 from __future__ import annotations
39
40 import pathlib
41 from dataclasses import dataclass, field
42 from typing import TYPE_CHECKING, Literal, Protocol, TypedDict, runtime_checkable
43
44 if TYPE_CHECKING:
45 from muse.core.schema import DomainSchema
46
47
48 # ---------------------------------------------------------------------------
49 # Snapshot types (unchanged from pre-Phase-1)
50 # ---------------------------------------------------------------------------
51
52
53 class SnapshotManifest(TypedDict):
54 """Content-addressed snapshot of domain state.
55
56 ``files`` maps workspace-relative POSIX paths to their SHA-256 content
57 digests. ``domain`` identifies which plugin produced this snapshot.
58 """
59
60 files: dict[str, str]
61 domain: str
62
63
64 # ---------------------------------------------------------------------------
65 # Typed delta algebra — Phase 1
66 # ---------------------------------------------------------------------------
67
68 #: A domain-specific address identifying a location within the state graph.
69 #: For file-level ops this is a workspace-relative POSIX path.
70 #: For sub-file ops this is a domain-specific coordinate (e.g. "note:42").
71 DomainAddress = str
72
73
74 class InsertOp(TypedDict):
75 """An element was inserted into a collection.
76
77 For ordered sequences ``position`` is the integer index at which the
78 element was inserted. For unordered sets ``position`` is ``None``.
79 ``content_id`` is the SHA-256 of the inserted content — either a blob
80 already in the object store (for file-level ops) or a deterministic hash
81 of the element's canonical serialisation (for sub-file ops).
82 """
83
84 op: Literal["insert"]
85 address: DomainAddress
86 position: int | None
87 content_id: str
88 content_summary: str
89
90
91 class DeleteOp(TypedDict):
92 """An element was removed from a collection."""
93
94 op: Literal["delete"]
95 address: DomainAddress
96 position: int | None
97 content_id: str
98 content_summary: str
99
100
101 class MoveOp(TypedDict):
102 """An element was repositioned within an ordered sequence."""
103
104 op: Literal["move"]
105 address: DomainAddress
106 from_position: int
107 to_position: int
108 content_id: str
109
110
111 class ReplaceOp(TypedDict):
112 """An element's value changed (atomic, leaf-level replacement)."""
113
114 op: Literal["replace"]
115 address: DomainAddress
116 position: int | None
117 old_content_id: str
118 new_content_id: str
119 old_summary: str
120 new_summary: str
121
122
123 #: The four non-recursive (leaf) operation types.
124 LeafDomainOp = InsertOp | DeleteOp | MoveOp | ReplaceOp
125
126
127 class PatchOp(TypedDict):
128 """A container element was internally modified.
129
130 ``address`` names the container (e.g. a file path). ``child_ops`` lists
131 the sub-element changes inside that container. In Phase 1 these are always
132 leaf ops. Phase 3 will introduce true recursion via a nested
133 ``StructuredDelta`` when the operation-level merge engine requires it.
134
135 ``child_domain`` identifies the sub-element domain (e.g. ``"midi_notes"``
136 for note-level ops inside a ``.mid`` file). ``child_summary`` is a
137 human-readable description of the child changes for ``muse show``.
138 """
139
140 op: Literal["patch"]
141 address: DomainAddress
142 child_ops: list[DomainOp]
143 child_domain: str
144 child_summary: str
145
146
147 #: Union of all operation types — the atoms of a ``StructuredDelta``.
148 DomainOp = LeafDomainOp | PatchOp
149
150
151 class StructuredDelta(TypedDict):
152 """Rich, composable delta between two domain snapshots.
153
154 ``ops`` is an ordered list of operations that transforms ``base`` into
155 ``target`` when applied in sequence. The core engine stores this alongside
156 commit records so that ``muse show`` and ``muse diff`` can display it
157 without reloading full blobs.
158
159 ``summary`` is a precomputed human-readable string — for example
160 ``"3 notes added, 1 note removed"``. Plugins compute it because only they
161 understand their domain semantics.
162 """
163
164 domain: str
165 ops: list[DomainOp]
166 summary: str
167
168
169 # ---------------------------------------------------------------------------
170 # Type aliases used in the protocol signatures
171 # ---------------------------------------------------------------------------
172
173 #: Live state is either an already-snapshotted manifest dict or a workdir path.
174 #: The music plugin accepts both: a Path (for CLI commit/status) and a
175 #: SnapshotManifest dict (for in-memory merge and diff operations).
176 LiveState = SnapshotManifest | pathlib.Path
177
178 #: A content-addressed, immutable snapshot of state at a point in time.
179 StateSnapshot = SnapshotManifest
180
181 #: The minimal change between two snapshots — a list of typed domain operations.
182 StateDelta = StructuredDelta
183
184
185 # ---------------------------------------------------------------------------
186 # Merge and drift result types
187 # ---------------------------------------------------------------------------
188
189
190 @dataclass
191 class MergeResult:
192 """Outcome of a three-way merge between two divergent state lines.
193
194 ``merged`` is the reconciled snapshot. ``conflicts`` is a list of
195 workspace-relative file paths that could not be auto-merged and require
196 manual resolution. An empty ``conflicts`` list means the merge was clean.
197 The CLI is responsible for formatting user-facing messages from these paths.
198
199 ``applied_strategies`` maps each path where a ``.museattributes`` rule
200 overrode the default conflict behaviour to the strategy that was applied.
201
202 ``dimension_reports`` maps conflicting paths to their per-dimension
203 resolution detail.
204
205 ``op_log`` is the ordered list of ``DomainOp`` entries applied to produce
206 the merged snapshot. Empty for file-level merges; populated by plugins
207 that implement operation-level merge (Phase 3).
208 """
209
210 merged: StateSnapshot
211 conflicts: list[str] = field(default_factory=list)
212 applied_strategies: dict[str, str] = field(default_factory=dict)
213 dimension_reports: dict[str, dict[str, str]] = field(default_factory=dict)
214 op_log: list[DomainOp] = field(default_factory=list)
215
216 @property
217 def is_clean(self) -> bool:
218 return len(self.conflicts) == 0
219
220
221 @dataclass
222 class DriftReport:
223 """Gap between committed state and current live state.
224
225 ``has_drift`` is ``True`` when the live state differs from the committed
226 snapshot. ``summary`` is a human-readable description of what changed.
227 ``delta`` is the machine-readable structured delta for programmatic consumers.
228 """
229
230 has_drift: bool
231 summary: str = ""
232 delta: StateDelta = field(default_factory=lambda: StructuredDelta(
233 domain="", ops=[], summary="working tree clean",
234 ))
235
236
237 # ---------------------------------------------------------------------------
238 # The plugin protocol
239 # ---------------------------------------------------------------------------
240
241
242 @runtime_checkable
243 class MuseDomainPlugin(Protocol):
244 """The six interfaces a domain plugin must implement.
245
246 Muse provides everything else: the DAG, branching, checkout, lineage
247 walking, ASCII log graph, and merge base finder. Implement these six
248 methods and your domain gets the full Muse VCS for free.
249
250 Music is the reference implementation (``muse.plugins.music``).
251 """
252
253 def snapshot(self, live_state: LiveState) -> StateSnapshot:
254 """Capture current live state as a serialisable, hashable snapshot.
255
256 The returned ``SnapshotManifest`` must be JSON-serialisable. Muse will
257 compute a SHA-256 content address from the canonical JSON form and
258 store the snapshot as a blob in ``.muse/objects/``.
259
260 **``.museignore`` contract** — when *live_state* is a
261 ``pathlib.Path`` (the ``muse-work/`` directory), domain plugin
262 implementations **must** honour ``.museignore`` by calling
263 :func:`muse.core.ignore.load_patterns` on the repository root and
264 filtering out paths matched by :func:`muse.core.ignore.is_ignored`.
265 """
266 ...
267
268 def diff(
269 self,
270 base: StateSnapshot,
271 target: StateSnapshot,
272 *,
273 repo_root: pathlib.Path | None = None,
274 ) -> StateDelta:
275 """Compute the structured delta between two snapshots.
276
277 Returns a ``StructuredDelta`` where ``ops`` is a minimal list of
278 typed operations that transforms ``base`` into ``target``. Plugins
279 should:
280
281 1. Compute ops at the finest granularity they can interpret.
282 2. Assign meaningful ``content_summary`` strings to each op.
283 3. When ``repo_root`` is provided, load sub-file content from the
284 object store and produce ``PatchOp`` entries with note/element-level
285 ``child_ops`` instead of coarse ``ReplaceOp`` entries.
286 4. Compute a human-readable ``summary`` across all ops.
287
288 The core engine stores this delta alongside the commit record so that
289 ``muse show`` and ``muse diff`` can display it without reloading blobs.
290 """
291 ...
292
293 def merge(
294 self,
295 base: StateSnapshot,
296 left: StateSnapshot,
297 right: StateSnapshot,
298 *,
299 repo_root: pathlib.Path | None = None,
300 ) -> MergeResult:
301 """Three-way merge two divergent state lines against a common base.
302
303 ``base`` is the common ancestor (merge base). ``left`` and ``right``
304 are the two divergent snapshots. Returns a ``MergeResult`` with the
305 reconciled snapshot and any unresolvable conflicts.
306
307 **``.museattributes`` and multidimensional merge contract** — when
308 *repo_root* is provided, domain plugin implementations should:
309
310 1. Load ``.museattributes`` via
311 :func:`muse.core.attributes.load_attributes`.
312 2. For each conflicting path, call
313 :func:`muse.core.attributes.resolve_strategy` with the relevant
314 dimension name (or ``"*"`` for file-level resolution).
315 3. Apply the returned strategy:
316
317 - ``"ours"`` — take the *left* version; remove from conflict list.
318 - ``"theirs"`` — take the *right* version; remove from conflict list.
319 - ``"manual"`` — force into conflict list even if the engine would
320 auto-resolve.
321 - ``"auto"`` / ``"union"`` — defer to the engine's default logic.
322
323 4. For domain formats that support true multidimensional content (e.g.
324 MIDI: melodic, rhythmic, harmonic, dynamic, structural), attempt
325 sub-file dimension merge before falling back to a file-level conflict.
326 """
327 ...
328
329 def drift(
330 self,
331 committed: StateSnapshot,
332 live: LiveState,
333 ) -> DriftReport:
334 """Compare committed state against current live state.
335
336 Used by ``muse status`` to detect uncommitted changes. Returns a
337 ``DriftReport`` describing whether the live state has diverged from
338 the last committed snapshot and, if so, by how much.
339 """
340 ...
341
342 def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState:
343 """Apply a delta to produce a new live state.
344
345 Used by ``muse checkout`` to reconstruct a historical state. Applies
346 ``delta`` on top of ``live_state`` and returns the resulting state.
347
348 For ``InsertOp`` and ``ReplaceOp``, the new content is identified by
349 ``content_id`` (a SHA-256 hash). When ``live_state`` is a
350 ``pathlib.Path``, the plugin reads the content from the object store.
351 When ``live_state`` is a ``SnapshotManifest``, only ``DeleteOp`` and
352 ``ReplaceOp`` at the file level can be applied in-memory.
353 """
354 ...
355
356 def schema(self) -> DomainSchema:
357 """Declare the structural schema of this domain's state.
358
359 The core engine calls this once at plugin registration time. Plugins
360 must return a stable, deterministic :class:`~muse.core.schema.DomainSchema`
361 describing:
362
363 - ``top_level`` — the primary collection structure (e.g. a set of
364 files, a map of chromosome names to sequences).
365 - ``dimensions`` — the semantic sub-dimensions of state (e.g. melodic,
366 harmonic, dynamic, structural for music).
367 - ``merge_mode`` — ``"three_way"`` (Phases 1–3) or ``"crdt"`` (Phase 4).
368
369 The schema drives :func:`~muse.core.diff_algorithms.diff_by_schema`
370 algorithm selection and the Phase 3 merge engine's conflict detection.
371
372 See :mod:`muse.core.schema` for all available element schema types.
373 """
374 ...
375
376
377 # ---------------------------------------------------------------------------
378 # Phase 3 optional extension — structured (operation-level) merge
379 # ---------------------------------------------------------------------------
380
381
382 @runtime_checkable
383 class StructuredMergePlugin(MuseDomainPlugin, Protocol):
384 """Optional extension for plugins that support operation-level merging.
385
386 Plugins that implement this sub-protocol gain sub-file auto-merge: two
387 agents inserting notes at non-overlapping bars never produce a conflict,
388 because the merge engine reasons over ``DomainOp`` trees rather than file
389 paths.
390
391 The merge engine detects support at runtime via::
392
393 isinstance(plugin, StructuredMergePlugin)
394
395 Plugins that do not implement ``merge_ops`` fall back to the existing
396 file-level ``merge()`` path automatically — no changes required.
397
398 The :class:`~muse.plugins.music.plugin.MusicPlugin` is the reference
399 implementation for Phase 3.
400 """
401
402 def merge_ops(
403 self,
404 base: StateSnapshot,
405 ours_snap: StateSnapshot,
406 theirs_snap: StateSnapshot,
407 ours_ops: list[DomainOp],
408 theirs_ops: list[DomainOp],
409 *,
410 repo_root: pathlib.Path | None = None,
411 ) -> MergeResult:
412 """Merge two op lists against a common base using domain knowledge.
413
414 The core merge engine calls this when both branches have produced
415 ``StructuredDelta`` from ``diff()``. The plugin:
416
417 1. Calls :func:`muse.core.op_transform.merge_op_lists` to detect
418 conflicting ``DomainOp`` pairs.
419 2. For clean pairs, builds the merged ``SnapshotManifest`` by applying
420 the adjusted merged ops to *base*. The plugin uses *ours_snap* and
421 *theirs_snap* to look up the final content IDs for files touched only
422 by one side (necessary for ``PatchOp`` entries, which do not carry a
423 ``new_content_id`` directly).
424 3. For conflicting pairs, consults ``.museattributes`` (when
425 *repo_root* is provided) and either auto-resolves via the declared
426 strategy or adds the address to ``MergeResult.conflicts``.
427
428 Implementations must be domain-aware: a ``.museattributes`` rule of
429 ``merge=ours`` should take this plugin's understanding of "ours" (the
430 left branch content), not a raw file-level copy.
431
432 Args:
433 base: Common ancestor snapshot.
434 ours_snap: Final snapshot of our branch.
435 theirs_snap: Final snapshot of their branch.
436 ours_ops: Operations from our branch delta (base → ours).
437 theirs_ops: Operations from their branch delta (base → theirs).
438 repo_root: Repository root for ``.museattributes`` lookup.
439
440 Returns:
441 A :class:`MergeResult` with the reconciled snapshot and any
442 remaining unresolvable conflicts.
443 """
444 ...