muse/plugins/scaffold/plugin.py · cgcardona/muse

1

"""Scaffold domain plugin — copy-paste template for a new Muse domain.

2

3

How to use this file

4

--------------------

5

1. Copy this entire ``scaffold/`` directory:

6

cp -r muse/plugins/scaffold muse/plugins/<your_domain>

7

8

2. Rename ``ScaffoldPlugin`` to ``<YourDomain>Plugin`` throughout.

9

10

3. Replace every ``raise NotImplementedError(...)`` with real implementation.

11

Each method carries a detailed docstring explaining the contract.

12

13

4. Register the plugin in ``muse/plugins/registry.py``:

14

from muse.plugins.<your_domain>.plugin import <YourDomain>Plugin

15

_REGISTRY["<your_domain>"] = <YourDomain>Plugin()

16

17

5. Run ``muse init --domain <your_domain>`` in a project directory.

18

19

6. All 14 ``muse`` CLI commands work immediately — no core changes needed.

20

21

See ``docs/guide/plugin-authoring-guide.md`` for the full walkthrough including

22

Domain Schema, OT merge, and CRDT convergent merge extensions.

23

24

Protocol capabilities implemented here

25

---------------------------------------

26

- Core: ``MuseDomainPlugin`` (required — 6 methods including ``schema()``)

27

- OT merge: ``StructuredMergePlugin`` (optional — remove if not needed)

28

- CRDT: ``CRDTPlugin`` (optional — remove if not needed)

29

"""

30

from __future__ import annotations

import hashlib

import json

import pathlib

from muse.core.crdts import ORSet, VectorClock

37

from muse.core.diff_algorithms import snapshot_diff

38

from muse.core.op_transform import merge_op_lists

39

from muse.core.schema import (

CRDTDimensionSpec,

DimensionSpec,

DomainSchema,

SequenceSchema,

SetSchema,

)

from muse.domain import (

47

CRDTSnapshotManifest,

DomainOp,

DriftReport,

LiveState,

MergeResult,

SnapshotManifest,

StateDelta,

StateSnapshot,

StructuredDelta,

)

# ---------------------------------------------------------------------------

59

# TODO: replace with your domain name and the file extension(s) you version.

60

# ---------------------------------------------------------------------------

61

_DOMAIN_NAME = "scaffold"

62

_FILE_GLOB = "*.scaffold" # e.g. "*.mid" for music, "*.fasta" for genomics

63

64

65

class ScaffoldPlugin:

66

"""Scaffold implementation — replace every NotImplementedError with real code.

67

68

This class satisfies all three optional protocol levels (Phases 2–4) via

69

structural duck-typing — no explicit inheritance from the Protocol classes

70

is needed or desired (see ``MidiPlugin`` for the reference example).

71

72

If your domain only needs Phases 1–2, delete ``merge_ops`` and the four

73

CRDT methods.

74

75

See ``docs/guide/plugin-authoring-guide.md`` for detailed guidance.

76

"""

77

78

# ------------------------------------------------------------------

79

# MuseDomainPlugin — required core protocol

80

# ------------------------------------------------------------------

81

82

def snapshot(self, live_state: LiveState) -> StateSnapshot:

83

"""Capture the current working tree as a content-addressed manifest.

84

85

Walk every domain file under ``live_state`` and hash its raw bytes with

86

SHA-256. Returns a ``SnapshotManifest`` with ``files`` and ``domain``.

87

88

Args:

89

live_state: Either a ``pathlib.Path`` pointing to the working tree

90

directory, or a ``SnapshotManifest`` dict for in-memory use.

91

92

Returns:

93

A ``SnapshotManifest`` mapping workspace-relative POSIX paths to

94

their SHA-256 content digests.

95

"""

96

if isinstance(live_state, pathlib.Path):

97

files: dict[str, str] = {}

98

for p in sorted(live_state.rglob(_FILE_GLOB)):

99

raw = p.read_bytes()

100

sha = hashlib.sha256(raw).hexdigest()

101

files[str(p.relative_to(live_state))] = sha

102

return SnapshotManifest(files=files, domain=_DOMAIN_NAME)

103

104

# SnapshotManifest dict path — used by merge / diff in memory

return live_state

def diff(

self,

base: StateSnapshot,

target: StateSnapshot,

111

*,

112

repo_root: pathlib.Path | None = None,

113

) -> StateDelta:

114

"""Compute the typed operation list between two snapshots.

115

116

For a file-level implementation this is set algebra on the ``files``

117

dict: paths in target but not base → ``InsertOp``, paths in base but

118

not target → ``DeleteOp``, paths in both with different hashes →

119

``ReplaceOp``.

120

121

For sub-file granularity (Phases 2–3), parse each file and diff its

122

internal elements using ``diff_by_schema()`` from

123

``muse.core.diff_algorithms``.

124

125

Args:

126

base: Snapshot of the earlier state (e.g. HEAD).

127

target: Snapshot of the later state (e.g. working tree).

128

129

Returns:

130

A ``StructuredDelta`` whose ``ops`` list describes every change.

131

"""

132

# snapshot_diff provides the "auto diff" promised by Phase 2: any plugin

133

# that declares a DomainSchema can call this instead of writing file-set

134

# algebra from scratch. For sub-file granularity, build PatchOps on top.

135

return snapshot_diff(self.schema(), base, target)

def merge(

self,

base: StateSnapshot,

left: StateSnapshot,

right: StateSnapshot,

142

*,

143

repo_root: pathlib.Path | None = None,

144

) -> MergeResult:

145

"""Three-way merge at file granularity (fallback for cherry-pick etc.).

146

147

Implements standard three-way logic:

148

- left and right agree → use the consensus

149

- only one side changed → take that side

150

- both sides changed differently → conflict

151

152

If you implement OT merge (``merge_ops``), this method is only called

153

for ``muse cherry-pick`` and other non-OT operations.

154

155

Args:

156

base: Common ancestor snapshot.

157

left: Snapshot from the current branch (ours).

158

right: Snapshot from the incoming branch (theirs).

159

repo_root: Path to the repository root for ``.museattributes``.

160

``None`` in tests and non-file-system contexts.

161

162

Returns:

163

A ``MergeResult`` with ``merged`` snapshot, ``conflicts`` path list,

164

``applied_strategies``, and ``dimension_reports``.

165

"""

166

base_files = base["files"]

167

left_files = left["files"]

168

right_files = right["files"]

169

170

merged: dict[str, str] = dict(base_files)

171

conflicts: list[str] = []

172

173

all_paths = set(base_files) | set(left_files) | set(right_files)

174

for path in sorted(all_paths):

175

b_val = base_files.get(path)

176

l_val = left_files.get(path)

177

r_val = right_files.get(path)

178

179

if l_val == r_val:

180

# Both sides agree — consensus wins (including both deleted)

181

if l_val is None:

182

merged.pop(path, None)

else:

merged[path] = l_val

elif b_val == l_val:

# Only right changed

if r_val is None:

merged.pop(path, None)

else:

merged[path] = r_val

elif b_val == r_val:

# Only left changed

if l_val is None:

merged.pop(path, None)

else:

merged[path] = l_val

else:

# Both changed differently — conflict; keep left as placeholder

199

conflicts.append(path)

200

merged[path] = l_val or r_val or b_val or ""

201

202

return MergeResult(

203

merged=SnapshotManifest(files=merged, domain=_DOMAIN_NAME),

conflicts=conflicts,

)

def drift(self, committed: StateSnapshot, live: LiveState) -> DriftReport:

208

"""Report how much the working tree has drifted from the last commit.

209

210

Called by ``muse status``. Produces a ``DriftReport`` dataclass with

211

``has_drift``, ``summary``, and ``delta`` fields.

212

213

Args:

214

committed: The last committed snapshot.

215

live: Current live state (path or snapshot manifest).

216

217

Returns:

218

A ``DriftReport`` describing what has changed since the last commit.

219

"""

220

current = self.snapshot(live)

221

delta = self.diff(committed, current)

222

has_drift = len(delta["ops"]) > 0

223

return DriftReport(

224

has_drift=has_drift,

225

summary=delta["summary"],

delta=delta,

)

def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState:

230

"""Apply a delta to the working tree.

231

232

Called by ``muse checkout`` after the core engine has already restored

233

file-level objects from the object store. Use this hook for any

234

domain-level post-processing (e.g. recompiling derived artefacts,

235

updating an index).

236

237

For most domains this is a no-op — the core engine handles file

238

restoration and nothing more is needed.

239

240

Args:

241

delta: The typed operation list to apply.

242

live_state: Current live state.

243

244

Returns:

245

The updated live state.

246

"""

247

# TODO: add domain-level post-processing if needed.

248

return live_state

249

250

# ------------------------------------------------------------------

251

# Domain schema — required

252

# ------------------------------------------------------------------

253

254

def schema(self) -> DomainSchema:

255

"""Declare the structural shape of this domain's data.

256

257

The schema drives diff algorithm selection, the ``muse domains``

258

capability display, and routing between three-way and CRDT merge.

259

260

Returns:

261

A ``DomainSchema`` describing the top-level element type, semantic

262

dimensions, merge mode, and schema version.

263

"""

264

# TODO: replace with your domain's actual elements and dimensions.

return DomainSchema(

domain=_DOMAIN_NAME,

description=(

"Scaffold domain — replace this description with your domain's purpose. "

269

"TODO: update domain, description, top_level, and dimensions."

),

top_level=SetSchema(

kind="set",

element_type="record", # TODO: rename to your element type

274

identity="by_content",

),

dimensions=[

DimensionSpec(

name="primary",

description=(

"Primary data dimension. "

281

"TODO: rename and describe what this dimension represents."

282

),

283

schema=SequenceSchema(

284

kind="sequence",

285

element_type="record", # TODO: rename

286

identity="by_position",

287

diff_algorithm="lcs",

288

alphabet=None,

289

),

290

independent_merge=True,

),

DimensionSpec(

name="metadata",

description=(

"Metadata / annotation dimension. "

296

"TODO: rename or remove if not applicable."

),

schema=SetSchema(

kind="set",

element_type="label", # TODO: rename

301

identity="by_content",

302

),

303

independent_merge=True,

304

),

305

],

306

merge_mode="three_way", # TODO: change to "crdt" if implementing CRDT convergent merge

schema_version=1,

)

# ------------------------------------------------------------------

311

# StructuredMergePlugin — optional OT merge extension

312

# Remove this method and StructuredMergePlugin from the base classes if

313

# your domain does not need sub-file OT merge.

314

# ------------------------------------------------------------------

def merge_ops(

self,

base: StateSnapshot,

ours_snap: StateSnapshot,

320

theirs_snap: StateSnapshot,

321

ours_ops: list[DomainOp],

322

theirs_ops: list[DomainOp],

323

*,

324

repo_root: pathlib.Path | None = None,

325

) -> MergeResult:

326

"""Operation-level three-way merge using Operational Transformation.

327

328

The core engine calls this when both branches have a ``StructuredDelta``.

329

``merge_op_lists`` determines which ops commute (auto-mergeable) and

330

which conflict (need human resolution).

331

332

Args:

333

base: Common ancestor snapshot.

334

ours_snap: Our branch's final snapshot.

335

theirs_snap: Their branch's final snapshot.

336

ours_ops: Our branch's typed operation list.

337

theirs_ops: Their branch's typed operation list.

338

repo_root: Repository root path for ``.museattributes`` loading.

339

340

Returns:

341

A ``MergeResult`` whose ``conflicts`` list is empty if all ops

342

commute (can auto-merge) or populated for genuine conflicts.

343

"""

344

result = merge_op_lists(

345

base_ops=[],

346

ours_ops=ours_ops,

347

theirs_ops=theirs_ops,

348

)

349

350

conflicts: list[str] = []

351

if result.conflict_ops:

352

seen: set[str] = set()

353

for our_op, _their_op in result.conflict_ops:

354

seen.add(our_op["address"])

355

conflicts = sorted(seen)

356

357

# TODO: reconstruct the merged snapshot from merged_ops for finer

358

# granularity. This fallback re-runs the file-level three-way merge

359

# and uses the OT conflict list as the authoritative conflict set.

360

fallback = self.merge(base, ours_snap, theirs_snap, repo_root=repo_root)

361

return MergeResult(

362

merged=fallback.merged,

363

conflicts=conflicts if conflicts else fallback.conflicts,

364

applied_strategies=fallback.applied_strategies,

365

dimension_reports=fallback.dimension_reports,

366

)

367

368

# ------------------------------------------------------------------

369

# CRDTPlugin — optional convergent merge extension

370

# Remove these methods and CRDTPlugin from the base classes if your

371

# domain does not need convergent multi-agent join semantics.

372

# ------------------------------------------------------------------

373

374

def crdt_schema(self) -> list[CRDTDimensionSpec]:

375

"""Declare which dimensions use which CRDT primitive.

376

377

Returns:

378

One ``CRDTDimensionSpec`` per CRDT-enabled dimension.

379

"""

380

# TODO: replace with your domain's CRDT dimensions.

return [

CRDTDimensionSpec(

name="labels",

description="Annotation labels — concurrent adds win.",

385

crdt_type="or_set",

386

independent_merge=True,

),

]

def join(

self,

a: CRDTSnapshotManifest,

393

b: CRDTSnapshotManifest,

394

) -> CRDTSnapshotManifest:

395

"""Convergent join of two CRDT snapshot manifests.

396

397

``join`` always succeeds — no conflict state ever exists.

398

399

Args:

400

a: First CRDT snapshot manifest.

401

b: Second CRDT snapshot manifest.

402

403

Returns:

404

The joined manifest (least upper bound of ``a`` and ``b``).

405

"""

406

# TODO: join each CRDT dimension declared in crdt_schema().

407

vc_a = VectorClock.from_dict(a["vclock"])

408

vc_b = VectorClock.from_dict(b["vclock"])

409

merged_vc = vc_a.merge(vc_b)

410

411

# ORSet stores per-label OR-Set state serialised as JSON strings

412

labels_a = ORSet.from_dict(json.loads(a["crdt_state"].get("labels", "{}")))

413

labels_b = ORSet.from_dict(json.loads(b["crdt_state"].get("labels", "{}")))

414

merged_labels = labels_a.join(labels_b)

415

416

return CRDTSnapshotManifest(

417

files=a["files"],

418

domain=_DOMAIN_NAME,

419

vclock=merged_vc.to_dict(),

420

crdt_state={"labels": json.dumps(merged_labels.to_dict())},

schema_version=1,

)

def to_crdt_state(self, snapshot: StateSnapshot) -> CRDTSnapshotManifest:

425

"""Lift a plain snapshot into CRDT state.

426

427

Called when merging a snapshot produced before CRDT mode was enabled,

428

or when bootstrapping CRDT state for the first time.

429

430

Args:

431

snapshot: A plain ``SnapshotManifest``.

432

433

Returns:

434

A ``CRDTSnapshotManifest`` with empty CRDT state.

435

"""

436

return CRDTSnapshotManifest(

437

files=snapshot["files"],

438

domain=_DOMAIN_NAME,

439

vclock=VectorClock().to_dict(),

440

crdt_state={"labels": json.dumps(ORSet().to_dict())},

schema_version=1,

)

def from_crdt_state(self, crdt: CRDTSnapshotManifest) -> StateSnapshot:

445

"""Materialise a CRDT manifest back into a plain snapshot.

446

447

Called after a CRDT join to produce the snapshot the core engine writes

448

to the commit record.

449

450

Args:

451

crdt: A ``CRDTSnapshotManifest``.

452

453

Returns:

454

A plain ``SnapshotManifest``.

455

"""

456

return SnapshotManifest(files=crdt["files"], domain=_DOMAIN_NAME)