plugin.py
python
| 1 | """Scaffold domain plugin — copy-paste template for a new Muse domain. |
| 2 | |
| 3 | How to use this file |
| 4 | -------------------- |
| 5 | 1. Copy this entire ``scaffold/`` directory: |
| 6 | cp -r muse/plugins/scaffold muse/plugins/<your_domain> |
| 7 | |
| 8 | 2. Rename ``ScaffoldPlugin`` to ``<YourDomain>Plugin`` throughout. |
| 9 | |
| 10 | 3. Replace every ``raise NotImplementedError(...)`` with real implementation. |
| 11 | Each method carries a detailed docstring explaining the contract. |
| 12 | |
| 13 | 4. Register the plugin in ``muse/plugins/registry.py``: |
| 14 | from muse.plugins.<your_domain>.plugin import <YourDomain>Plugin |
| 15 | _REGISTRY["<your_domain>"] = <YourDomain>Plugin() |
| 16 | |
| 17 | 5. Run ``muse init --domain <your_domain>`` in a project directory. |
| 18 | |
| 19 | 6. All 14 ``muse`` CLI commands work immediately — no core changes needed. |
| 20 | |
| 21 | See ``docs/guide/plugin-authoring-guide.md`` for the full walkthrough including |
| 22 | Domain Schema, OT merge, and CRDT convergent merge extensions. |
| 23 | |
| 24 | Protocol capabilities implemented here |
| 25 | --------------------------------------- |
| 26 | - Core: ``MuseDomainPlugin`` (required — 6 methods including ``schema()``) |
| 27 | - OT merge: ``StructuredMergePlugin`` (optional — remove if not needed) |
| 28 | - CRDT: ``CRDTPlugin`` (optional — remove if not needed) |
| 29 | """ |
| 30 | |
| 31 | from __future__ import annotations |
| 32 | |
| 33 | import hashlib |
| 34 | import json |
| 35 | import os |
| 36 | import pathlib |
| 37 | import stat as _stat |
| 38 | |
| 39 | from muse._version import __version__ |
| 40 | from muse.core.crdts import ORSet, VectorClock |
| 41 | from muse.core.diff_algorithms import snapshot_diff |
| 42 | from muse.core.op_transform import merge_op_lists |
| 43 | from muse.core.stat_cache import load_cache |
| 44 | from muse.core.schema import ( |
| 45 | CRDTDimensionSpec, |
| 46 | DimensionSpec, |
| 47 | DomainSchema, |
| 48 | SequenceSchema, |
| 49 | SetSchema, |
| 50 | ) |
| 51 | from muse.domain import ( |
| 52 | CRDTSnapshotManifest, |
| 53 | DomainOp, |
| 54 | DriftReport, |
| 55 | LiveState, |
| 56 | MergeResult, |
| 57 | SnapshotManifest, |
| 58 | StateDelta, |
| 59 | StateSnapshot, |
| 60 | StructuredDelta, |
| 61 | ) |
| 62 | |
| 63 | # --------------------------------------------------------------------------- |
| 64 | # TODO: replace with your domain name and the file extension(s) you version. |
| 65 | # --------------------------------------------------------------------------- |
| 66 | _DOMAIN_NAME = "scaffold" |
| 67 | _FILE_GLOB = "*.scaffold" # e.g. "*.mid" for music, "*.fasta" for genomics |
| 68 | |
| 69 | |
| 70 | class ScaffoldPlugin: |
| 71 | """Scaffold implementation — replace every NotImplementedError with real code. |
| 72 | |
| 73 | This class satisfies all three optional protocol levels (Phases 2–4) via |
| 74 | structural duck-typing — no explicit inheritance from the Protocol classes |
| 75 | is needed or desired (see ``MidiPlugin`` for the reference example). |
| 76 | |
| 77 | If your domain only needs Phases 1–2, delete ``merge_ops`` and the four |
| 78 | CRDT methods. |
| 79 | |
| 80 | See ``docs/guide/plugin-authoring-guide.md`` for detailed guidance. |
| 81 | """ |
| 82 | |
| 83 | # ------------------------------------------------------------------ |
| 84 | # MuseDomainPlugin — required core protocol |
| 85 | # ------------------------------------------------------------------ |
| 86 | |
| 87 | def snapshot(self, live_state: LiveState) -> StateSnapshot: |
| 88 | """Capture the current working tree as a content-addressed manifest. |
| 89 | |
| 90 | Walk every domain file under ``live_state`` and hash its raw bytes with |
| 91 | SHA-256. Paths matched by ``.museignore`` are excluded before hashing. |
| 92 | Returns a ``SnapshotManifest`` with ``files`` and ``domain``. |
| 93 | |
| 94 | Args: |
| 95 | live_state: Either a ``pathlib.Path`` pointing to the working tree |
| 96 | directory, or a ``SnapshotManifest`` dict for in-memory use. |
| 97 | |
| 98 | Returns: |
| 99 | A ``SnapshotManifest`` mapping workspace-relative POSIX paths to |
| 100 | their SHA-256 content digests. |
| 101 | |
| 102 | Note: |
| 103 | ``.museignore`` contract — ``.museignore`` lives in the repository |
| 104 | root (the working tree root). Global patterns and patterns |
| 105 | under ``[domain.<name>]`` matching this plugin's domain are applied. |
| 106 | """ |
| 107 | if isinstance(live_state, pathlib.Path): |
| 108 | from muse.core.ignore import is_ignored, load_ignore_config, resolve_patterns |
| 109 | |
| 110 | workdir = live_state |
| 111 | patterns = resolve_patterns(load_ignore_config(workdir), _DOMAIN_NAME) |
| 112 | cache = load_cache(workdir) |
| 113 | files: dict[str, str] = {} |
| 114 | root_str = str(workdir) |
| 115 | prefix_len = len(root_str) + 1 |
| 116 | |
| 117 | for dirpath, dirnames, filenames in os.walk(root_str, followlinks=False): |
| 118 | dirnames[:] = sorted(d for d in dirnames if not d.startswith(".")) |
| 119 | for fname in sorted(filenames): |
| 120 | if fname.startswith("."): |
| 121 | continue |
| 122 | abs_str = os.path.join(dirpath, fname) |
| 123 | try: |
| 124 | st = os.lstat(abs_str) |
| 125 | except OSError: |
| 126 | continue |
| 127 | if not _stat.S_ISREG(st.st_mode): |
| 128 | continue |
| 129 | rel = abs_str[prefix_len:] |
| 130 | if os.sep != "/": |
| 131 | rel = rel.replace(os.sep, "/") |
| 132 | if is_ignored(rel, patterns): |
| 133 | continue |
| 134 | files[rel] = cache.get_cached(rel, abs_str, st.st_mtime, st.st_size) |
| 135 | |
| 136 | cache.prune(set(files)) |
| 137 | cache.save() |
| 138 | return SnapshotManifest(files=files, domain=_DOMAIN_NAME) |
| 139 | |
| 140 | # SnapshotManifest dict path — used by merge / diff in memory |
| 141 | return live_state |
| 142 | |
| 143 | def diff( |
| 144 | self, |
| 145 | base: StateSnapshot, |
| 146 | target: StateSnapshot, |
| 147 | *, |
| 148 | repo_root: pathlib.Path | None = None, |
| 149 | ) -> StateDelta: |
| 150 | """Compute the typed operation list between two snapshots. |
| 151 | |
| 152 | For a file-level implementation this is set algebra on the ``files`` |
| 153 | dict: paths in target but not base → ``InsertOp``, paths in base but |
| 154 | not target → ``DeleteOp``, paths in both with different hashes → |
| 155 | ``ReplaceOp``. |
| 156 | |
| 157 | For sub-file granularity (Phases 2–3), parse each file and diff its |
| 158 | internal elements using ``diff_by_schema()`` from |
| 159 | ``muse.core.diff_algorithms``. |
| 160 | |
| 161 | Args: |
| 162 | base: Snapshot of the earlier state (e.g. HEAD). |
| 163 | target: Snapshot of the later state (e.g. working tree). |
| 164 | |
| 165 | Returns: |
| 166 | A ``StructuredDelta`` whose ``ops`` list describes every change. |
| 167 | """ |
| 168 | # snapshot_diff provides the "auto diff" promised by Phase 2: any plugin |
| 169 | # that declares a DomainSchema can call this instead of writing file-set |
| 170 | # algebra from scratch. For sub-file granularity, build PatchOps on top. |
| 171 | return snapshot_diff(self.schema(), base, target) |
| 172 | |
| 173 | def merge( |
| 174 | self, |
| 175 | base: StateSnapshot, |
| 176 | left: StateSnapshot, |
| 177 | right: StateSnapshot, |
| 178 | *, |
| 179 | repo_root: pathlib.Path | None = None, |
| 180 | ) -> MergeResult: |
| 181 | """Three-way merge at file granularity (fallback for cherry-pick etc.). |
| 182 | |
| 183 | Implements standard three-way logic: |
| 184 | - left and right agree → use the consensus |
| 185 | - only one side changed → take that side |
| 186 | - both sides changed differently → conflict |
| 187 | |
| 188 | If you implement OT merge (``merge_ops``), this method is only called |
| 189 | for ``muse cherry-pick`` and other non-OT operations. |
| 190 | |
| 191 | Args: |
| 192 | base: Common ancestor snapshot. |
| 193 | left: Snapshot from the current branch (ours). |
| 194 | right: Snapshot from the incoming branch (theirs). |
| 195 | repo_root: Path to the repository root for ``.museattributes``. |
| 196 | ``None`` in tests and non-file-system contexts. |
| 197 | |
| 198 | Returns: |
| 199 | A ``MergeResult`` with ``merged`` snapshot, ``conflicts`` path list, |
| 200 | ``applied_strategies``, and ``dimension_reports``. |
| 201 | """ |
| 202 | base_files = base["files"] |
| 203 | left_files = left["files"] |
| 204 | right_files = right["files"] |
| 205 | |
| 206 | merged: dict[str, str] = dict(base_files) |
| 207 | conflicts: list[str] = [] |
| 208 | |
| 209 | all_paths = set(base_files) | set(left_files) | set(right_files) |
| 210 | for path in sorted(all_paths): |
| 211 | b_val = base_files.get(path) |
| 212 | l_val = left_files.get(path) |
| 213 | r_val = right_files.get(path) |
| 214 | |
| 215 | if l_val == r_val: |
| 216 | # Both sides agree — consensus wins (including both deleted) |
| 217 | if l_val is None: |
| 218 | merged.pop(path, None) |
| 219 | else: |
| 220 | merged[path] = l_val |
| 221 | elif b_val == l_val: |
| 222 | # Only right changed |
| 223 | if r_val is None: |
| 224 | merged.pop(path, None) |
| 225 | else: |
| 226 | merged[path] = r_val |
| 227 | elif b_val == r_val: |
| 228 | # Only left changed |
| 229 | if l_val is None: |
| 230 | merged.pop(path, None) |
| 231 | else: |
| 232 | merged[path] = l_val |
| 233 | else: |
| 234 | # Both changed differently — conflict; keep left as placeholder |
| 235 | conflicts.append(path) |
| 236 | merged[path] = l_val or r_val or b_val or "" |
| 237 | |
| 238 | return MergeResult( |
| 239 | merged=SnapshotManifest(files=merged, domain=_DOMAIN_NAME), |
| 240 | conflicts=conflicts, |
| 241 | ) |
| 242 | |
| 243 | def drift(self, committed: StateSnapshot, live: LiveState) -> DriftReport: |
| 244 | """Report how much the working tree has drifted from the last commit. |
| 245 | |
| 246 | Called by ``muse status``. Produces a ``DriftReport`` dataclass with |
| 247 | ``has_drift``, ``summary``, and ``delta`` fields. |
| 248 | |
| 249 | Args: |
| 250 | committed: The last committed snapshot. |
| 251 | live: Current live state (path or snapshot manifest). |
| 252 | |
| 253 | Returns: |
| 254 | A ``DriftReport`` describing what has changed since the last commit. |
| 255 | """ |
| 256 | current = self.snapshot(live) |
| 257 | delta = self.diff(committed, current) |
| 258 | has_drift = len(delta["ops"]) > 0 |
| 259 | return DriftReport( |
| 260 | has_drift=has_drift, |
| 261 | summary=delta["summary"], |
| 262 | delta=delta, |
| 263 | ) |
| 264 | |
| 265 | def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState: |
| 266 | """Apply a delta to the working tree. |
| 267 | |
| 268 | Called by ``muse checkout`` after the core engine has already restored |
| 269 | file-level objects from the object store. Use this hook for any |
| 270 | domain-level post-processing (e.g. recompiling derived artefacts, |
| 271 | updating an index). |
| 272 | |
| 273 | For most domains this is a no-op — the core engine handles file |
| 274 | restoration and nothing more is needed. |
| 275 | |
| 276 | Args: |
| 277 | delta: The typed operation list to apply. |
| 278 | live_state: Current live state. |
| 279 | |
| 280 | Returns: |
| 281 | The updated live state. |
| 282 | """ |
| 283 | # TODO: add domain-level post-processing if needed. |
| 284 | return live_state |
| 285 | |
| 286 | # ------------------------------------------------------------------ |
| 287 | # Domain schema — required |
| 288 | # ------------------------------------------------------------------ |
| 289 | |
| 290 | def schema(self) -> DomainSchema: |
| 291 | """Declare the structural shape of this domain's data. |
| 292 | |
| 293 | The schema drives diff algorithm selection, the ``muse domains`` |
| 294 | capability display, and routing between three-way and CRDT merge. |
| 295 | |
| 296 | Returns: |
| 297 | A ``DomainSchema`` describing the top-level element type, semantic |
| 298 | dimensions, merge mode, and schema version. |
| 299 | """ |
| 300 | # TODO: replace with your domain's actual elements and dimensions. |
| 301 | return DomainSchema( |
| 302 | domain=_DOMAIN_NAME, |
| 303 | description=( |
| 304 | "Scaffold domain — replace this description with your domain's purpose. " |
| 305 | "TODO: update domain, description, top_level, and dimensions." |
| 306 | ), |
| 307 | top_level=SetSchema( |
| 308 | kind="set", |
| 309 | element_type="record", # TODO: rename to your element type |
| 310 | identity="by_content", |
| 311 | ), |
| 312 | dimensions=[ |
| 313 | DimensionSpec( |
| 314 | name="primary", |
| 315 | description=( |
| 316 | "Primary data dimension. " |
| 317 | "TODO: rename and describe what this dimension represents." |
| 318 | ), |
| 319 | schema=SequenceSchema( |
| 320 | kind="sequence", |
| 321 | element_type="record", # TODO: rename |
| 322 | identity="by_position", |
| 323 | diff_algorithm="lcs", |
| 324 | alphabet=None, |
| 325 | ), |
| 326 | independent_merge=True, |
| 327 | ), |
| 328 | DimensionSpec( |
| 329 | name="metadata", |
| 330 | description=( |
| 331 | "Metadata / annotation dimension. " |
| 332 | "TODO: rename or remove if not applicable." |
| 333 | ), |
| 334 | schema=SetSchema( |
| 335 | kind="set", |
| 336 | element_type="label", # TODO: rename |
| 337 | identity="by_content", |
| 338 | ), |
| 339 | independent_merge=True, |
| 340 | ), |
| 341 | ], |
| 342 | merge_mode="three_way", # TODO: change to "crdt" if implementing CRDT convergent merge |
| 343 | schema_version=__version__, |
| 344 | ) |
| 345 | |
| 346 | # ------------------------------------------------------------------ |
| 347 | # StructuredMergePlugin — optional OT merge extension |
| 348 | # Remove this method and StructuredMergePlugin from the base classes if |
| 349 | # your domain does not need sub-file OT merge. |
| 350 | # ------------------------------------------------------------------ |
| 351 | |
| 352 | def merge_ops( |
| 353 | self, |
| 354 | base: StateSnapshot, |
| 355 | ours_snap: StateSnapshot, |
| 356 | theirs_snap: StateSnapshot, |
| 357 | ours_ops: list[DomainOp], |
| 358 | theirs_ops: list[DomainOp], |
| 359 | *, |
| 360 | repo_root: pathlib.Path | None = None, |
| 361 | ) -> MergeResult: |
| 362 | """Operation-level three-way merge using Operational Transformation. |
| 363 | |
| 364 | The core engine calls this when both branches have a ``StructuredDelta``. |
| 365 | ``merge_op_lists`` determines which ops commute (auto-mergeable) and |
| 366 | which conflict (need human resolution). |
| 367 | |
| 368 | Args: |
| 369 | base: Common ancestor snapshot. |
| 370 | ours_snap: Our branch's final snapshot. |
| 371 | theirs_snap: Their branch's final snapshot. |
| 372 | ours_ops: Our branch's typed operation list. |
| 373 | theirs_ops: Their branch's typed operation list. |
| 374 | repo_root: Repository root path for ``.museattributes`` loading. |
| 375 | |
| 376 | Returns: |
| 377 | A ``MergeResult`` whose ``conflicts`` list is empty if all ops |
| 378 | commute (can auto-merge) or populated for genuine conflicts. |
| 379 | """ |
| 380 | result = merge_op_lists( |
| 381 | base_ops=[], |
| 382 | ours_ops=ours_ops, |
| 383 | theirs_ops=theirs_ops, |
| 384 | ) |
| 385 | |
| 386 | conflicts: list[str] = [] |
| 387 | if result.conflict_ops: |
| 388 | seen: set[str] = set() |
| 389 | for our_op, _their_op in result.conflict_ops: |
| 390 | seen.add(our_op["address"]) |
| 391 | conflicts = sorted(seen) |
| 392 | |
| 393 | # TODO: reconstruct the merged snapshot from merged_ops for finer |
| 394 | # granularity. This fallback re-runs the file-level three-way merge |
| 395 | # and uses the OT conflict list as the authoritative conflict set. |
| 396 | fallback = self.merge(base, ours_snap, theirs_snap, repo_root=repo_root) |
| 397 | return MergeResult( |
| 398 | merged=fallback.merged, |
| 399 | conflicts=conflicts if conflicts else fallback.conflicts, |
| 400 | applied_strategies=fallback.applied_strategies, |
| 401 | dimension_reports=fallback.dimension_reports, |
| 402 | ) |
| 403 | |
| 404 | # ------------------------------------------------------------------ |
| 405 | # CRDTPlugin — optional convergent merge extension |
| 406 | # Remove these methods and CRDTPlugin from the base classes if your |
| 407 | # domain does not need convergent multi-agent join semantics. |
| 408 | # ------------------------------------------------------------------ |
| 409 | |
| 410 | def crdt_schema(self) -> list[CRDTDimensionSpec]: |
| 411 | """Declare which dimensions use which CRDT primitive. |
| 412 | |
| 413 | Returns: |
| 414 | One ``CRDTDimensionSpec`` per CRDT-enabled dimension. |
| 415 | """ |
| 416 | # TODO: replace with your domain's CRDT dimensions. |
| 417 | return [ |
| 418 | CRDTDimensionSpec( |
| 419 | name="labels", |
| 420 | description="Annotation labels — concurrent adds win.", |
| 421 | crdt_type="or_set", |
| 422 | independent_merge=True, |
| 423 | ), |
| 424 | ] |
| 425 | |
| 426 | def join( |
| 427 | self, |
| 428 | a: CRDTSnapshotManifest, |
| 429 | b: CRDTSnapshotManifest, |
| 430 | ) -> CRDTSnapshotManifest: |
| 431 | """Convergent join of two CRDT snapshot manifests. |
| 432 | |
| 433 | ``join`` always succeeds — no conflict state ever exists. |
| 434 | |
| 435 | Args: |
| 436 | a: First CRDT snapshot manifest. |
| 437 | b: Second CRDT snapshot manifest. |
| 438 | |
| 439 | Returns: |
| 440 | The joined manifest (least upper bound of ``a`` and ``b``). |
| 441 | """ |
| 442 | # TODO: join each CRDT dimension declared in crdt_schema(). |
| 443 | vc_a = VectorClock.from_dict(a["vclock"]) |
| 444 | vc_b = VectorClock.from_dict(b["vclock"]) |
| 445 | merged_vc = vc_a.merge(vc_b) |
| 446 | |
| 447 | # ORSet stores per-label OR-Set state serialised as JSON strings |
| 448 | labels_a = ORSet.from_dict(json.loads(a["crdt_state"].get("labels", "{}"))) |
| 449 | labels_b = ORSet.from_dict(json.loads(b["crdt_state"].get("labels", "{}"))) |
| 450 | merged_labels = labels_a.join(labels_b) |
| 451 | |
| 452 | return CRDTSnapshotManifest( |
| 453 | files=a["files"], |
| 454 | domain=_DOMAIN_NAME, |
| 455 | vclock=merged_vc.to_dict(), |
| 456 | crdt_state={"labels": json.dumps(merged_labels.to_dict())}, |
| 457 | schema_version=__version__, |
| 458 | ) |
| 459 | |
| 460 | def to_crdt_state(self, snapshot: StateSnapshot) -> CRDTSnapshotManifest: |
| 461 | """Lift a plain snapshot into CRDT state. |
| 462 | |
| 463 | Called when merging a snapshot produced before CRDT mode was enabled, |
| 464 | or when bootstrapping CRDT state for the first time. |
| 465 | |
| 466 | Args: |
| 467 | snapshot: A plain ``SnapshotManifest``. |
| 468 | |
| 469 | Returns: |
| 470 | A ``CRDTSnapshotManifest`` with empty CRDT state. |
| 471 | """ |
| 472 | return CRDTSnapshotManifest( |
| 473 | files=snapshot["files"], |
| 474 | domain=_DOMAIN_NAME, |
| 475 | vclock=VectorClock().to_dict(), |
| 476 | crdt_state={"labels": json.dumps(ORSet().to_dict())}, |
| 477 | schema_version=__version__, |
| 478 | ) |
| 479 | |
| 480 | def from_crdt_state(self, crdt: CRDTSnapshotManifest) -> StateSnapshot: |
| 481 | """Materialise a CRDT manifest back into a plain snapshot. |
| 482 | |
| 483 | Called after a CRDT join to produce the snapshot the core engine writes |
| 484 | to the commit record. |
| 485 | |
| 486 | Args: |
| 487 | crdt: A ``CRDTSnapshotManifest``. |
| 488 | |
| 489 | Returns: |
| 490 | A plain ``SnapshotManifest``. |
| 491 | """ |
| 492 | return SnapshotManifest(files=crdt["files"], domain=_DOMAIN_NAME) |