cgcardona / muse public
plugin.py python
456 lines 17.0 KB
d1dfb412 feat: implement 3 missing plan items — property tests, format_version, … Gabriel Cardona <gabriel@tellurstori.com> 1d ago
1 """Scaffold domain plugin — copy-paste template for a new Muse domain.
2
3 How to use this file
4 --------------------
5 1. Copy this entire ``scaffold/`` directory:
6 cp -r muse/plugins/scaffold muse/plugins/<your_domain>
7
8 2. Rename ``ScaffoldPlugin`` to ``<YourDomain>Plugin`` throughout.
9
10 3. Replace every ``raise NotImplementedError(...)`` with real implementation.
11 Each method carries a detailed docstring explaining the contract.
12
13 4. Register the plugin in ``muse/plugins/registry.py``:
14 from muse.plugins.<your_domain>.plugin import <YourDomain>Plugin
15 _REGISTRY["<your_domain>"] = <YourDomain>Plugin()
16
17 5. Run ``muse init --domain <your_domain>`` in a project directory.
18
19 6. All 14 ``muse`` CLI commands work immediately — no core changes needed.
20
21 See ``docs/guide/plugin-authoring-guide.md`` for the full walkthrough including
22 Domain Schema, OT merge, and CRDT convergent merge extensions.
23
24 Protocol capabilities implemented here
25 ---------------------------------------
26 - Core: ``MuseDomainPlugin`` (required — 6 methods including ``schema()``)
27 - OT merge: ``StructuredMergePlugin`` (optional — remove if not needed)
28 - CRDT: ``CRDTPlugin`` (optional — remove if not needed)
29 """
30 from __future__ import annotations
31
32 import hashlib
33 import json
34 import pathlib
35
36 from muse.core.crdts import ORSet, VectorClock
37 from muse.core.diff_algorithms import snapshot_diff
38 from muse.core.op_transform import merge_op_lists
39 from muse.core.schema import (
40 CRDTDimensionSpec,
41 DimensionSpec,
42 DomainSchema,
43 SequenceSchema,
44 SetSchema,
45 )
46 from muse.domain import (
47 CRDTSnapshotManifest,
48 DomainOp,
49 DriftReport,
50 LiveState,
51 MergeResult,
52 SnapshotManifest,
53 StateDelta,
54 StateSnapshot,
55 StructuredDelta,
56 )
57
58 # ---------------------------------------------------------------------------
59 # TODO: replace with your domain name and the file extension(s) you version.
60 # ---------------------------------------------------------------------------
61 _DOMAIN_NAME = "scaffold"
62 _FILE_GLOB = "*.scaffold" # e.g. "*.mid" for music, "*.fasta" for genomics
63
64
65 class ScaffoldPlugin:
66 """Scaffold implementation — replace every NotImplementedError with real code.
67
68 This class satisfies all three optional protocol levels (Phases 2–4) via
69 structural duck-typing — no explicit inheritance from the Protocol classes
70 is needed or desired (see ``MidiPlugin`` for the reference example).
71
72 If your domain only needs Phases 1–2, delete ``merge_ops`` and the four
73 CRDT methods.
74
75 See ``docs/guide/plugin-authoring-guide.md`` for detailed guidance.
76 """
77
78 # ------------------------------------------------------------------
79 # MuseDomainPlugin — required core protocol
80 # ------------------------------------------------------------------
81
82 def snapshot(self, live_state: LiveState) -> StateSnapshot:
83 """Capture the current working tree as a content-addressed manifest.
84
85 Walk every domain file under ``live_state`` and hash its raw bytes with
86 SHA-256. Returns a ``SnapshotManifest`` with ``files`` and ``domain``.
87
88 Args:
89 live_state: Either a ``pathlib.Path`` pointing to the working tree
90 directory, or a ``SnapshotManifest`` dict for in-memory use.
91
92 Returns:
93 A ``SnapshotManifest`` mapping workspace-relative POSIX paths to
94 their SHA-256 content digests.
95 """
96 if isinstance(live_state, pathlib.Path):
97 files: dict[str, str] = {}
98 for p in sorted(live_state.rglob(_FILE_GLOB)):
99 raw = p.read_bytes()
100 sha = hashlib.sha256(raw).hexdigest()
101 files[str(p.relative_to(live_state))] = sha
102 return SnapshotManifest(files=files, domain=_DOMAIN_NAME)
103
104 # SnapshotManifest dict path — used by merge / diff in memory
105 return live_state
106
107 def diff(
108 self,
109 base: StateSnapshot,
110 target: StateSnapshot,
111 *,
112 repo_root: pathlib.Path | None = None,
113 ) -> StateDelta:
114 """Compute the typed operation list between two snapshots.
115
116 For a file-level implementation this is set algebra on the ``files``
117 dict: paths in target but not base → ``InsertOp``, paths in base but
118 not target → ``DeleteOp``, paths in both with different hashes →
119 ``ReplaceOp``.
120
121 For sub-file granularity (Phases 2–3), parse each file and diff its
122 internal elements using ``diff_by_schema()`` from
123 ``muse.core.diff_algorithms``.
124
125 Args:
126 base: Snapshot of the earlier state (e.g. HEAD).
127 target: Snapshot of the later state (e.g. working tree).
128
129 Returns:
130 A ``StructuredDelta`` whose ``ops`` list describes every change.
131 """
132 # snapshot_diff provides the "auto diff" promised by Phase 2: any plugin
133 # that declares a DomainSchema can call this instead of writing file-set
134 # algebra from scratch. For sub-file granularity, build PatchOps on top.
135 return snapshot_diff(self.schema(), base, target)
136
137 def merge(
138 self,
139 base: StateSnapshot,
140 left: StateSnapshot,
141 right: StateSnapshot,
142 *,
143 repo_root: pathlib.Path | None = None,
144 ) -> MergeResult:
145 """Three-way merge at file granularity (fallback for cherry-pick etc.).
146
147 Implements standard three-way logic:
148 - left and right agree → use the consensus
149 - only one side changed → take that side
150 - both sides changed differently → conflict
151
152 If you implement OT merge (``merge_ops``), this method is only called
153 for ``muse cherry-pick`` and other non-OT operations.
154
155 Args:
156 base: Common ancestor snapshot.
157 left: Snapshot from the current branch (ours).
158 right: Snapshot from the incoming branch (theirs).
159 repo_root: Path to the repository root for ``.museattributes``.
160 ``None`` in tests and non-file-system contexts.
161
162 Returns:
163 A ``MergeResult`` with ``merged`` snapshot, ``conflicts`` path list,
164 ``applied_strategies``, and ``dimension_reports``.
165 """
166 base_files = base["files"]
167 left_files = left["files"]
168 right_files = right["files"]
169
170 merged: dict[str, str] = dict(base_files)
171 conflicts: list[str] = []
172
173 all_paths = set(base_files) | set(left_files) | set(right_files)
174 for path in sorted(all_paths):
175 b_val = base_files.get(path)
176 l_val = left_files.get(path)
177 r_val = right_files.get(path)
178
179 if l_val == r_val:
180 # Both sides agree — consensus wins (including both deleted)
181 if l_val is None:
182 merged.pop(path, None)
183 else:
184 merged[path] = l_val
185 elif b_val == l_val:
186 # Only right changed
187 if r_val is None:
188 merged.pop(path, None)
189 else:
190 merged[path] = r_val
191 elif b_val == r_val:
192 # Only left changed
193 if l_val is None:
194 merged.pop(path, None)
195 else:
196 merged[path] = l_val
197 else:
198 # Both changed differently — conflict; keep left as placeholder
199 conflicts.append(path)
200 merged[path] = l_val or r_val or b_val or ""
201
202 return MergeResult(
203 merged=SnapshotManifest(files=merged, domain=_DOMAIN_NAME),
204 conflicts=conflicts,
205 )
206
207 def drift(self, committed: StateSnapshot, live: LiveState) -> DriftReport:
208 """Report how much the working tree has drifted from the last commit.
209
210 Called by ``muse status``. Produces a ``DriftReport`` dataclass with
211 ``has_drift``, ``summary``, and ``delta`` fields.
212
213 Args:
214 committed: The last committed snapshot.
215 live: Current live state (path or snapshot manifest).
216
217 Returns:
218 A ``DriftReport`` describing what has changed since the last commit.
219 """
220 current = self.snapshot(live)
221 delta = self.diff(committed, current)
222 has_drift = len(delta["ops"]) > 0
223 return DriftReport(
224 has_drift=has_drift,
225 summary=delta["summary"],
226 delta=delta,
227 )
228
229 def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState:
230 """Apply a delta to the working tree.
231
232 Called by ``muse checkout`` after the core engine has already restored
233 file-level objects from the object store. Use this hook for any
234 domain-level post-processing (e.g. recompiling derived artefacts,
235 updating an index).
236
237 For most domains this is a no-op — the core engine handles file
238 restoration and nothing more is needed.
239
240 Args:
241 delta: The typed operation list to apply.
242 live_state: Current live state.
243
244 Returns:
245 The updated live state.
246 """
247 # TODO: add domain-level post-processing if needed.
248 return live_state
249
250 # ------------------------------------------------------------------
251 # Domain schema — required
252 # ------------------------------------------------------------------
253
254 def schema(self) -> DomainSchema:
255 """Declare the structural shape of this domain's data.
256
257 The schema drives diff algorithm selection, the ``muse domains``
258 capability display, and routing between three-way and CRDT merge.
259
260 Returns:
261 A ``DomainSchema`` describing the top-level element type, semantic
262 dimensions, merge mode, and schema version.
263 """
264 # TODO: replace with your domain's actual elements and dimensions.
265 return DomainSchema(
266 domain=_DOMAIN_NAME,
267 description=(
268 "Scaffold domain — replace this description with your domain's purpose. "
269 "TODO: update domain, description, top_level, and dimensions."
270 ),
271 top_level=SetSchema(
272 kind="set",
273 element_type="record", # TODO: rename to your element type
274 identity="by_content",
275 ),
276 dimensions=[
277 DimensionSpec(
278 name="primary",
279 description=(
280 "Primary data dimension. "
281 "TODO: rename and describe what this dimension represents."
282 ),
283 schema=SequenceSchema(
284 kind="sequence",
285 element_type="record", # TODO: rename
286 identity="by_position",
287 diff_algorithm="lcs",
288 alphabet=None,
289 ),
290 independent_merge=True,
291 ),
292 DimensionSpec(
293 name="metadata",
294 description=(
295 "Metadata / annotation dimension. "
296 "TODO: rename or remove if not applicable."
297 ),
298 schema=SetSchema(
299 kind="set",
300 element_type="label", # TODO: rename
301 identity="by_content",
302 ),
303 independent_merge=True,
304 ),
305 ],
306 merge_mode="three_way", # TODO: change to "crdt" if implementing CRDT convergent merge
307 schema_version=1,
308 )
309
310 # ------------------------------------------------------------------
311 # StructuredMergePlugin — optional OT merge extension
312 # Remove this method and StructuredMergePlugin from the base classes if
313 # your domain does not need sub-file OT merge.
314 # ------------------------------------------------------------------
315
316 def merge_ops(
317 self,
318 base: StateSnapshot,
319 ours_snap: StateSnapshot,
320 theirs_snap: StateSnapshot,
321 ours_ops: list[DomainOp],
322 theirs_ops: list[DomainOp],
323 *,
324 repo_root: pathlib.Path | None = None,
325 ) -> MergeResult:
326 """Operation-level three-way merge using Operational Transformation.
327
328 The core engine calls this when both branches have a ``StructuredDelta``.
329 ``merge_op_lists`` determines which ops commute (auto-mergeable) and
330 which conflict (need human resolution).
331
332 Args:
333 base: Common ancestor snapshot.
334 ours_snap: Our branch's final snapshot.
335 theirs_snap: Their branch's final snapshot.
336 ours_ops: Our branch's typed operation list.
337 theirs_ops: Their branch's typed operation list.
338 repo_root: Repository root path for ``.museattributes`` loading.
339
340 Returns:
341 A ``MergeResult`` whose ``conflicts`` list is empty if all ops
342 commute (can auto-merge) or populated for genuine conflicts.
343 """
344 result = merge_op_lists(
345 base_ops=[],
346 ours_ops=ours_ops,
347 theirs_ops=theirs_ops,
348 )
349
350 conflicts: list[str] = []
351 if result.conflict_ops:
352 seen: set[str] = set()
353 for our_op, _their_op in result.conflict_ops:
354 seen.add(our_op["address"])
355 conflicts = sorted(seen)
356
357 # TODO: reconstruct the merged snapshot from merged_ops for finer
358 # granularity. This fallback re-runs the file-level three-way merge
359 # and uses the OT conflict list as the authoritative conflict set.
360 fallback = self.merge(base, ours_snap, theirs_snap, repo_root=repo_root)
361 return MergeResult(
362 merged=fallback.merged,
363 conflicts=conflicts if conflicts else fallback.conflicts,
364 applied_strategies=fallback.applied_strategies,
365 dimension_reports=fallback.dimension_reports,
366 )
367
368 # ------------------------------------------------------------------
369 # CRDTPlugin — optional convergent merge extension
370 # Remove these methods and CRDTPlugin from the base classes if your
371 # domain does not need convergent multi-agent join semantics.
372 # ------------------------------------------------------------------
373
374 def crdt_schema(self) -> list[CRDTDimensionSpec]:
375 """Declare which dimensions use which CRDT primitive.
376
377 Returns:
378 One ``CRDTDimensionSpec`` per CRDT-enabled dimension.
379 """
380 # TODO: replace with your domain's CRDT dimensions.
381 return [
382 CRDTDimensionSpec(
383 name="labels",
384 description="Annotation labels — concurrent adds win.",
385 crdt_type="or_set",
386 independent_merge=True,
387 ),
388 ]
389
390 def join(
391 self,
392 a: CRDTSnapshotManifest,
393 b: CRDTSnapshotManifest,
394 ) -> CRDTSnapshotManifest:
395 """Convergent join of two CRDT snapshot manifests.
396
397 ``join`` always succeeds — no conflict state ever exists.
398
399 Args:
400 a: First CRDT snapshot manifest.
401 b: Second CRDT snapshot manifest.
402
403 Returns:
404 The joined manifest (least upper bound of ``a`` and ``b``).
405 """
406 # TODO: join each CRDT dimension declared in crdt_schema().
407 vc_a = VectorClock.from_dict(a["vclock"])
408 vc_b = VectorClock.from_dict(b["vclock"])
409 merged_vc = vc_a.merge(vc_b)
410
411 # ORSet stores per-label OR-Set state serialised as JSON strings
412 labels_a = ORSet.from_dict(json.loads(a["crdt_state"].get("labels", "{}")))
413 labels_b = ORSet.from_dict(json.loads(b["crdt_state"].get("labels", "{}")))
414 merged_labels = labels_a.join(labels_b)
415
416 return CRDTSnapshotManifest(
417 files=a["files"],
418 domain=_DOMAIN_NAME,
419 vclock=merged_vc.to_dict(),
420 crdt_state={"labels": json.dumps(merged_labels.to_dict())},
421 schema_version=1,
422 )
423
424 def to_crdt_state(self, snapshot: StateSnapshot) -> CRDTSnapshotManifest:
425 """Lift a plain snapshot into CRDT state.
426
427 Called when merging a snapshot produced before CRDT mode was enabled,
428 or when bootstrapping CRDT state for the first time.
429
430 Args:
431 snapshot: A plain ``SnapshotManifest``.
432
433 Returns:
434 A ``CRDTSnapshotManifest`` with empty CRDT state.
435 """
436 return CRDTSnapshotManifest(
437 files=snapshot["files"],
438 domain=_DOMAIN_NAME,
439 vclock=VectorClock().to_dict(),
440 crdt_state={"labels": json.dumps(ORSet().to_dict())},
441 schema_version=1,
442 )
443
444 def from_crdt_state(self, crdt: CRDTSnapshotManifest) -> StateSnapshot:
445 """Materialise a CRDT manifest back into a plain snapshot.
446
447 Called after a CRDT join to produce the snapshot the core engine writes
448 to the commit record.
449
450 Args:
451 crdt: A ``CRDTSnapshotManifest``.
452
453 Returns:
454 A plain ``SnapshotManifest``.
455 """
456 return SnapshotManifest(files=crdt["files"], domain=_DOMAIN_NAME)