gabriel / muse public
plugin.py python
492 lines 18.5 KB
f288fedc perf+fix: muse status — eliminate rglob descent, stat cache, single rep… Gabriel Cardona <gabriel@tellurstori.com> 3d ago
1 """Scaffold domain plugin — copy-paste template for a new Muse domain.
2
3 How to use this file
4 --------------------
5 1. Copy this entire ``scaffold/`` directory:
6 cp -r muse/plugins/scaffold muse/plugins/<your_domain>
7
8 2. Rename ``ScaffoldPlugin`` to ``<YourDomain>Plugin`` throughout.
9
10 3. Replace every ``raise NotImplementedError(...)`` with real implementation.
11 Each method carries a detailed docstring explaining the contract.
12
13 4. Register the plugin in ``muse/plugins/registry.py``:
14 from muse.plugins.<your_domain>.plugin import <YourDomain>Plugin
15 _REGISTRY["<your_domain>"] = <YourDomain>Plugin()
16
17 5. Run ``muse init --domain <your_domain>`` in a project directory.
18
19 6. All 14 ``muse`` CLI commands work immediately — no core changes needed.
20
21 See ``docs/guide/plugin-authoring-guide.md`` for the full walkthrough including
22 Domain Schema, OT merge, and CRDT convergent merge extensions.
23
24 Protocol capabilities implemented here
25 ---------------------------------------
26 - Core: ``MuseDomainPlugin`` (required — 6 methods including ``schema()``)
27 - OT merge: ``StructuredMergePlugin`` (optional — remove if not needed)
28 - CRDT: ``CRDTPlugin`` (optional — remove if not needed)
29 """
30
31 from __future__ import annotations
32
33 import hashlib
34 import json
35 import os
36 import pathlib
37 import stat as _stat
38
39 from muse._version import __version__
40 from muse.core.crdts import ORSet, VectorClock
41 from muse.core.diff_algorithms import snapshot_diff
42 from muse.core.op_transform import merge_op_lists
43 from muse.core.stat_cache import load_cache
44 from muse.core.schema import (
45 CRDTDimensionSpec,
46 DimensionSpec,
47 DomainSchema,
48 SequenceSchema,
49 SetSchema,
50 )
51 from muse.domain import (
52 CRDTSnapshotManifest,
53 DomainOp,
54 DriftReport,
55 LiveState,
56 MergeResult,
57 SnapshotManifest,
58 StateDelta,
59 StateSnapshot,
60 StructuredDelta,
61 )
62
63 # ---------------------------------------------------------------------------
64 # TODO: replace with your domain name and the file extension(s) you version.
65 # ---------------------------------------------------------------------------
66 _DOMAIN_NAME = "scaffold"
67 _FILE_GLOB = "*.scaffold" # e.g. "*.mid" for music, "*.fasta" for genomics
68
69
70 class ScaffoldPlugin:
71 """Scaffold implementation — replace every NotImplementedError with real code.
72
73 This class satisfies all three optional protocol levels (Phases 2–4) via
74 structural duck-typing — no explicit inheritance from the Protocol classes
75 is needed or desired (see ``MidiPlugin`` for the reference example).
76
77 If your domain only needs Phases 1–2, delete ``merge_ops`` and the four
78 CRDT methods.
79
80 See ``docs/guide/plugin-authoring-guide.md`` for detailed guidance.
81 """
82
83 # ------------------------------------------------------------------
84 # MuseDomainPlugin — required core protocol
85 # ------------------------------------------------------------------
86
87 def snapshot(self, live_state: LiveState) -> StateSnapshot:
88 """Capture the current working tree as a content-addressed manifest.
89
90 Walk every domain file under ``live_state`` and hash its raw bytes with
91 SHA-256. Paths matched by ``.museignore`` are excluded before hashing.
92 Returns a ``SnapshotManifest`` with ``files`` and ``domain``.
93
94 Args:
95 live_state: Either a ``pathlib.Path`` pointing to the working tree
96 directory, or a ``SnapshotManifest`` dict for in-memory use.
97
98 Returns:
99 A ``SnapshotManifest`` mapping workspace-relative POSIX paths to
100 their SHA-256 content digests.
101
102 Note:
103 ``.museignore`` contract — ``.museignore`` lives in the repository
104 root (the working tree root). Global patterns and patterns
105 under ``[domain.<name>]`` matching this plugin's domain are applied.
106 """
107 if isinstance(live_state, pathlib.Path):
108 from muse.core.ignore import is_ignored, load_ignore_config, resolve_patterns
109
110 workdir = live_state
111 patterns = resolve_patterns(load_ignore_config(workdir), _DOMAIN_NAME)
112 cache = load_cache(workdir)
113 files: dict[str, str] = {}
114 root_str = str(workdir)
115 prefix_len = len(root_str) + 1
116
117 for dirpath, dirnames, filenames in os.walk(root_str, followlinks=False):
118 dirnames[:] = sorted(d for d in dirnames if not d.startswith("."))
119 for fname in sorted(filenames):
120 if fname.startswith("."):
121 continue
122 abs_str = os.path.join(dirpath, fname)
123 try:
124 st = os.lstat(abs_str)
125 except OSError:
126 continue
127 if not _stat.S_ISREG(st.st_mode):
128 continue
129 rel = abs_str[prefix_len:]
130 if os.sep != "/":
131 rel = rel.replace(os.sep, "/")
132 if is_ignored(rel, patterns):
133 continue
134 files[rel] = cache.get_cached(rel, abs_str, st.st_mtime, st.st_size)
135
136 cache.prune(set(files))
137 cache.save()
138 return SnapshotManifest(files=files, domain=_DOMAIN_NAME)
139
140 # SnapshotManifest dict path — used by merge / diff in memory
141 return live_state
142
143 def diff(
144 self,
145 base: StateSnapshot,
146 target: StateSnapshot,
147 *,
148 repo_root: pathlib.Path | None = None,
149 ) -> StateDelta:
150 """Compute the typed operation list between two snapshots.
151
152 For a file-level implementation this is set algebra on the ``files``
153 dict: paths in target but not base → ``InsertOp``, paths in base but
154 not target → ``DeleteOp``, paths in both with different hashes →
155 ``ReplaceOp``.
156
157 For sub-file granularity (Phases 2–3), parse each file and diff its
158 internal elements using ``diff_by_schema()`` from
159 ``muse.core.diff_algorithms``.
160
161 Args:
162 base: Snapshot of the earlier state (e.g. HEAD).
163 target: Snapshot of the later state (e.g. working tree).
164
165 Returns:
166 A ``StructuredDelta`` whose ``ops`` list describes every change.
167 """
168 # snapshot_diff provides the "auto diff" promised by Phase 2: any plugin
169 # that declares a DomainSchema can call this instead of writing file-set
170 # algebra from scratch. For sub-file granularity, build PatchOps on top.
171 return snapshot_diff(self.schema(), base, target)
172
173 def merge(
174 self,
175 base: StateSnapshot,
176 left: StateSnapshot,
177 right: StateSnapshot,
178 *,
179 repo_root: pathlib.Path | None = None,
180 ) -> MergeResult:
181 """Three-way merge at file granularity (fallback for cherry-pick etc.).
182
183 Implements standard three-way logic:
184 - left and right agree → use the consensus
185 - only one side changed → take that side
186 - both sides changed differently → conflict
187
188 If you implement OT merge (``merge_ops``), this method is only called
189 for ``muse cherry-pick`` and other non-OT operations.
190
191 Args:
192 base: Common ancestor snapshot.
193 left: Snapshot from the current branch (ours).
194 right: Snapshot from the incoming branch (theirs).
195 repo_root: Path to the repository root for ``.museattributes``.
196 ``None`` in tests and non-file-system contexts.
197
198 Returns:
199 A ``MergeResult`` with ``merged`` snapshot, ``conflicts`` path list,
200 ``applied_strategies``, and ``dimension_reports``.
201 """
202 base_files = base["files"]
203 left_files = left["files"]
204 right_files = right["files"]
205
206 merged: dict[str, str] = dict(base_files)
207 conflicts: list[str] = []
208
209 all_paths = set(base_files) | set(left_files) | set(right_files)
210 for path in sorted(all_paths):
211 b_val = base_files.get(path)
212 l_val = left_files.get(path)
213 r_val = right_files.get(path)
214
215 if l_val == r_val:
216 # Both sides agree — consensus wins (including both deleted)
217 if l_val is None:
218 merged.pop(path, None)
219 else:
220 merged[path] = l_val
221 elif b_val == l_val:
222 # Only right changed
223 if r_val is None:
224 merged.pop(path, None)
225 else:
226 merged[path] = r_val
227 elif b_val == r_val:
228 # Only left changed
229 if l_val is None:
230 merged.pop(path, None)
231 else:
232 merged[path] = l_val
233 else:
234 # Both changed differently — conflict; keep left as placeholder
235 conflicts.append(path)
236 merged[path] = l_val or r_val or b_val or ""
237
238 return MergeResult(
239 merged=SnapshotManifest(files=merged, domain=_DOMAIN_NAME),
240 conflicts=conflicts,
241 )
242
243 def drift(self, committed: StateSnapshot, live: LiveState) -> DriftReport:
244 """Report how much the working tree has drifted from the last commit.
245
246 Called by ``muse status``. Produces a ``DriftReport`` dataclass with
247 ``has_drift``, ``summary``, and ``delta`` fields.
248
249 Args:
250 committed: The last committed snapshot.
251 live: Current live state (path or snapshot manifest).
252
253 Returns:
254 A ``DriftReport`` describing what has changed since the last commit.
255 """
256 current = self.snapshot(live)
257 delta = self.diff(committed, current)
258 has_drift = len(delta["ops"]) > 0
259 return DriftReport(
260 has_drift=has_drift,
261 summary=delta["summary"],
262 delta=delta,
263 )
264
265 def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState:
266 """Apply a delta to the working tree.
267
268 Called by ``muse checkout`` after the core engine has already restored
269 file-level objects from the object store. Use this hook for any
270 domain-level post-processing (e.g. recompiling derived artefacts,
271 updating an index).
272
273 For most domains this is a no-op — the core engine handles file
274 restoration and nothing more is needed.
275
276 Args:
277 delta: The typed operation list to apply.
278 live_state: Current live state.
279
280 Returns:
281 The updated live state.
282 """
283 # TODO: add domain-level post-processing if needed.
284 return live_state
285
286 # ------------------------------------------------------------------
287 # Domain schema — required
288 # ------------------------------------------------------------------
289
290 def schema(self) -> DomainSchema:
291 """Declare the structural shape of this domain's data.
292
293 The schema drives diff algorithm selection, the ``muse domains``
294 capability display, and routing between three-way and CRDT merge.
295
296 Returns:
297 A ``DomainSchema`` describing the top-level element type, semantic
298 dimensions, merge mode, and schema version.
299 """
300 # TODO: replace with your domain's actual elements and dimensions.
301 return DomainSchema(
302 domain=_DOMAIN_NAME,
303 description=(
304 "Scaffold domain — replace this description with your domain's purpose. "
305 "TODO: update domain, description, top_level, and dimensions."
306 ),
307 top_level=SetSchema(
308 kind="set",
309 element_type="record", # TODO: rename to your element type
310 identity="by_content",
311 ),
312 dimensions=[
313 DimensionSpec(
314 name="primary",
315 description=(
316 "Primary data dimension. "
317 "TODO: rename and describe what this dimension represents."
318 ),
319 schema=SequenceSchema(
320 kind="sequence",
321 element_type="record", # TODO: rename
322 identity="by_position",
323 diff_algorithm="lcs",
324 alphabet=None,
325 ),
326 independent_merge=True,
327 ),
328 DimensionSpec(
329 name="metadata",
330 description=(
331 "Metadata / annotation dimension. "
332 "TODO: rename or remove if not applicable."
333 ),
334 schema=SetSchema(
335 kind="set",
336 element_type="label", # TODO: rename
337 identity="by_content",
338 ),
339 independent_merge=True,
340 ),
341 ],
342 merge_mode="three_way", # TODO: change to "crdt" if implementing CRDT convergent merge
343 schema_version=__version__,
344 )
345
346 # ------------------------------------------------------------------
347 # StructuredMergePlugin — optional OT merge extension
348 # Remove this method and StructuredMergePlugin from the base classes if
349 # your domain does not need sub-file OT merge.
350 # ------------------------------------------------------------------
351
352 def merge_ops(
353 self,
354 base: StateSnapshot,
355 ours_snap: StateSnapshot,
356 theirs_snap: StateSnapshot,
357 ours_ops: list[DomainOp],
358 theirs_ops: list[DomainOp],
359 *,
360 repo_root: pathlib.Path | None = None,
361 ) -> MergeResult:
362 """Operation-level three-way merge using Operational Transformation.
363
364 The core engine calls this when both branches have a ``StructuredDelta``.
365 ``merge_op_lists`` determines which ops commute (auto-mergeable) and
366 which conflict (need human resolution).
367
368 Args:
369 base: Common ancestor snapshot.
370 ours_snap: Our branch's final snapshot.
371 theirs_snap: Their branch's final snapshot.
372 ours_ops: Our branch's typed operation list.
373 theirs_ops: Their branch's typed operation list.
374 repo_root: Repository root path for ``.museattributes`` loading.
375
376 Returns:
377 A ``MergeResult`` whose ``conflicts`` list is empty if all ops
378 commute (can auto-merge) or populated for genuine conflicts.
379 """
380 result = merge_op_lists(
381 base_ops=[],
382 ours_ops=ours_ops,
383 theirs_ops=theirs_ops,
384 )
385
386 conflicts: list[str] = []
387 if result.conflict_ops:
388 seen: set[str] = set()
389 for our_op, _their_op in result.conflict_ops:
390 seen.add(our_op["address"])
391 conflicts = sorted(seen)
392
393 # TODO: reconstruct the merged snapshot from merged_ops for finer
394 # granularity. This fallback re-runs the file-level three-way merge
395 # and uses the OT conflict list as the authoritative conflict set.
396 fallback = self.merge(base, ours_snap, theirs_snap, repo_root=repo_root)
397 return MergeResult(
398 merged=fallback.merged,
399 conflicts=conflicts if conflicts else fallback.conflicts,
400 applied_strategies=fallback.applied_strategies,
401 dimension_reports=fallback.dimension_reports,
402 )
403
404 # ------------------------------------------------------------------
405 # CRDTPlugin — optional convergent merge extension
406 # Remove these methods and CRDTPlugin from the base classes if your
407 # domain does not need convergent multi-agent join semantics.
408 # ------------------------------------------------------------------
409
410 def crdt_schema(self) -> list[CRDTDimensionSpec]:
411 """Declare which dimensions use which CRDT primitive.
412
413 Returns:
414 One ``CRDTDimensionSpec`` per CRDT-enabled dimension.
415 """
416 # TODO: replace with your domain's CRDT dimensions.
417 return [
418 CRDTDimensionSpec(
419 name="labels",
420 description="Annotation labels — concurrent adds win.",
421 crdt_type="or_set",
422 independent_merge=True,
423 ),
424 ]
425
426 def join(
427 self,
428 a: CRDTSnapshotManifest,
429 b: CRDTSnapshotManifest,
430 ) -> CRDTSnapshotManifest:
431 """Convergent join of two CRDT snapshot manifests.
432
433 ``join`` always succeeds — no conflict state ever exists.
434
435 Args:
436 a: First CRDT snapshot manifest.
437 b: Second CRDT snapshot manifest.
438
439 Returns:
440 The joined manifest (least upper bound of ``a`` and ``b``).
441 """
442 # TODO: join each CRDT dimension declared in crdt_schema().
443 vc_a = VectorClock.from_dict(a["vclock"])
444 vc_b = VectorClock.from_dict(b["vclock"])
445 merged_vc = vc_a.merge(vc_b)
446
447 # ORSet stores per-label OR-Set state serialised as JSON strings
448 labels_a = ORSet.from_dict(json.loads(a["crdt_state"].get("labels", "{}")))
449 labels_b = ORSet.from_dict(json.loads(b["crdt_state"].get("labels", "{}")))
450 merged_labels = labels_a.join(labels_b)
451
452 return CRDTSnapshotManifest(
453 files=a["files"],
454 domain=_DOMAIN_NAME,
455 vclock=merged_vc.to_dict(),
456 crdt_state={"labels": json.dumps(merged_labels.to_dict())},
457 schema_version=__version__,
458 )
459
460 def to_crdt_state(self, snapshot: StateSnapshot) -> CRDTSnapshotManifest:
461 """Lift a plain snapshot into CRDT state.
462
463 Called when merging a snapshot produced before CRDT mode was enabled,
464 or when bootstrapping CRDT state for the first time.
465
466 Args:
467 snapshot: A plain ``SnapshotManifest``.
468
469 Returns:
470 A ``CRDTSnapshotManifest`` with empty CRDT state.
471 """
472 return CRDTSnapshotManifest(
473 files=snapshot["files"],
474 domain=_DOMAIN_NAME,
475 vclock=VectorClock().to_dict(),
476 crdt_state={"labels": json.dumps(ORSet().to_dict())},
477 schema_version=__version__,
478 )
479
480 def from_crdt_state(self, crdt: CRDTSnapshotManifest) -> StateSnapshot:
481 """Materialise a CRDT manifest back into a plain snapshot.
482
483 Called after a CRDT join to produce the snapshot the core engine writes
484 to the commit record.
485
486 Args:
487 crdt: A ``CRDTSnapshotManifest``.
488
489 Returns:
490 A plain ``SnapshotManifest``.
491 """
492 return SnapshotManifest(files=crdt["files"], domain=_DOMAIN_NAME)