muse/plugins/scaffold/plugin.py · gabriel/muse

1

"""Scaffold domain plugin — copy-paste template for a new Muse domain.

2

3

How to use this file

4

--------------------

5

1. Copy this entire ``scaffold/`` directory:

6

cp -r muse/plugins/scaffold muse/plugins/<your_domain>

7

8

2. Rename ``ScaffoldPlugin`` to ``<YourDomain>Plugin`` throughout.

9

10

3. Replace every ``raise NotImplementedError(...)`` with real implementation.

11

Each method carries a detailed docstring explaining the contract.

12

13

4. Register the plugin in ``muse/plugins/registry.py``:

14

from muse.plugins.<your_domain>.plugin import <YourDomain>Plugin

15

_REGISTRY["<your_domain>"] = <YourDomain>Plugin()

16

17

5. Run ``muse init --domain <your_domain>`` in a project directory.

18

19

6. All 14 ``muse`` CLI commands work immediately — no core changes needed.

20

21

See ``docs/guide/plugin-authoring-guide.md`` for the full walkthrough including

22

Domain Schema, OT merge, and CRDT convergent merge extensions.

23

24

Protocol capabilities implemented here

25

---------------------------------------

26

- Core: ``MuseDomainPlugin`` (required — 6 methods including ``schema()``)

27

- OT merge: ``StructuredMergePlugin`` (optional — remove if not needed)

28

- CRDT: ``CRDTPlugin`` (optional — remove if not needed)

29

"""

30

31

from __future__ import annotations

import hashlib

import json

import os

import pathlib

import stat as _stat

from muse._version import __version__

40

from muse.core.crdts import ORSet, VectorClock

41

from muse.core.diff_algorithms import snapshot_diff

42

from muse.core.op_transform import merge_op_lists

43

from muse.core.stat_cache import load_cache

44

from muse.core.schema import (

CRDTDimensionSpec,

DimensionSpec,

DomainSchema,

SequenceSchema,

SetSchema,

)

from muse.domain import (

52

CRDTSnapshotManifest,

DomainOp,

DriftReport,

LiveState,

MergeResult,

SnapshotManifest,

StateDelta,

StateSnapshot,

StructuredDelta,

)

# ---------------------------------------------------------------------------

64

# TODO: replace with your domain name and the file extension(s) you version.

65

# ---------------------------------------------------------------------------

66

_DOMAIN_NAME = "scaffold"

67

_FILE_GLOB = "*.scaffold" # e.g. "*.mid" for music, "*.fasta" for genomics

68

69

70

class ScaffoldPlugin:

71

"""Scaffold implementation — replace every NotImplementedError with real code.

72

73

This class satisfies all three optional protocol levels (Phases 2–4) via

74

structural duck-typing — no explicit inheritance from the Protocol classes

75

is needed or desired (see ``MidiPlugin`` for the reference example).

76

77

If your domain only needs Phases 1–2, delete ``merge_ops`` and the four

78

CRDT methods.

79

80

See ``docs/guide/plugin-authoring-guide.md`` for detailed guidance.

81

"""

82

83

# ------------------------------------------------------------------

84

# MuseDomainPlugin — required core protocol

85

# ------------------------------------------------------------------

86

87

def snapshot(self, live_state: LiveState) -> StateSnapshot:

88

"""Capture the current working tree as a content-addressed manifest.

89

90

Walk every domain file under ``live_state`` and hash its raw bytes with

91

SHA-256. Paths matched by ``.museignore`` are excluded before hashing.

92

Returns a ``SnapshotManifest`` with ``files`` and ``domain``.

93

94

Args:

95

live_state: Either a ``pathlib.Path`` pointing to the working tree

96

directory, or a ``SnapshotManifest`` dict for in-memory use.

97

98

Returns:

99

A ``SnapshotManifest`` mapping workspace-relative POSIX paths to

100

their SHA-256 content digests.

101

102

Note:

103

``.museignore`` contract — ``.museignore`` lives in the repository

104

root (the working tree root). Global patterns and patterns

105

under ``[domain.<name>]`` matching this plugin's domain are applied.

106

"""

107

if isinstance(live_state, pathlib.Path):

108

from muse.core.ignore import is_ignored, load_ignore_config, resolve_patterns

109

110

workdir = live_state

111

patterns = resolve_patterns(load_ignore_config(workdir), _DOMAIN_NAME)

112

cache = load_cache(workdir)

113

files: dict[str, str] = {}

114

root_str = str(workdir)

115

prefix_len = len(root_str) + 1

116

117

for dirpath, dirnames, filenames in os.walk(root_str, followlinks=False):

118

dirnames[:] = sorted(d for d in dirnames if not d.startswith("."))

119

for fname in sorted(filenames):

120

if fname.startswith("."):

121

continue

122

abs_str = os.path.join(dirpath, fname)

123

try:

124

st = os.lstat(abs_str)

125

except OSError:

126

continue

127

if not _stat.S_ISREG(st.st_mode):

128

continue

129

rel = abs_str[prefix_len:]

130

if os.sep != "/":

131

rel = rel.replace(os.sep, "/")

132

if is_ignored(rel, patterns):

133

continue

134

files[rel] = cache.get_cached(rel, abs_str, st.st_mtime, st.st_size)

135

136

cache.prune(set(files))

137

cache.save()

138

return SnapshotManifest(files=files, domain=_DOMAIN_NAME)

139

140

# SnapshotManifest dict path — used by merge / diff in memory

return live_state

def diff(

self,

base: StateSnapshot,

target: StateSnapshot,

147

*,

148

repo_root: pathlib.Path | None = None,

149

) -> StateDelta:

150

"""Compute the typed operation list between two snapshots.

151

152

For a file-level implementation this is set algebra on the ``files``

153

dict: paths in target but not base → ``InsertOp``, paths in base but

154

not target → ``DeleteOp``, paths in both with different hashes →

155

``ReplaceOp``.

156

157

For sub-file granularity (Phases 2–3), parse each file and diff its

158

internal elements using ``diff_by_schema()`` from

159

``muse.core.diff_algorithms``.

160

161

Args:

162

base: Snapshot of the earlier state (e.g. HEAD).

163

target: Snapshot of the later state (e.g. working tree).

164

165

Returns:

166

A ``StructuredDelta`` whose ``ops`` list describes every change.

167

"""

168

# snapshot_diff provides the "auto diff" promised by Phase 2: any plugin

169

# that declares a DomainSchema can call this instead of writing file-set

170

# algebra from scratch. For sub-file granularity, build PatchOps on top.

171

return snapshot_diff(self.schema(), base, target)

def merge(

self,

base: StateSnapshot,

left: StateSnapshot,

right: StateSnapshot,

178

*,

179

repo_root: pathlib.Path | None = None,

180

) -> MergeResult:

181

"""Three-way merge at file granularity (fallback for cherry-pick etc.).

182

183

Implements standard three-way logic:

184

- left and right agree → use the consensus

185

- only one side changed → take that side

186

- both sides changed differently → conflict

187

188

If you implement OT merge (``merge_ops``), this method is only called

189

for ``muse cherry-pick`` and other non-OT operations.

190

191

Args:

192

base: Common ancestor snapshot.

193

left: Snapshot from the current branch (ours).

194

right: Snapshot from the incoming branch (theirs).

195

repo_root: Path to the repository root for ``.museattributes``.

196

``None`` in tests and non-file-system contexts.

197

198

Returns:

199

A ``MergeResult`` with ``merged`` snapshot, ``conflicts`` path list,

200

``applied_strategies``, and ``dimension_reports``.

201

"""

202

base_files = base["files"]

203

left_files = left["files"]

204

right_files = right["files"]

205

206

merged: dict[str, str] = dict(base_files)

207

conflicts: list[str] = []

208

209

all_paths = set(base_files) | set(left_files) | set(right_files)

210

for path in sorted(all_paths):

211

b_val = base_files.get(path)

212

l_val = left_files.get(path)

213

r_val = right_files.get(path)

214

215

if l_val == r_val:

216

# Both sides agree — consensus wins (including both deleted)

217

if l_val is None:

218

merged.pop(path, None)

else:

merged[path] = l_val

elif b_val == l_val:

# Only right changed

if r_val is None:

merged.pop(path, None)

else:

merged[path] = r_val

elif b_val == r_val:

# Only left changed

if l_val is None:

merged.pop(path, None)

else:

merged[path] = l_val

else:

# Both changed differently — conflict; keep left as placeholder

235

conflicts.append(path)

236

merged[path] = l_val or r_val or b_val or ""

237

238

return MergeResult(

239

merged=SnapshotManifest(files=merged, domain=_DOMAIN_NAME),

conflicts=conflicts,

)

def drift(self, committed: StateSnapshot, live: LiveState) -> DriftReport:

244

"""Report how much the working tree has drifted from the last commit.

245

246

Called by ``muse status``. Produces a ``DriftReport`` dataclass with

247

``has_drift``, ``summary``, and ``delta`` fields.

248

249

Args:

250

committed: The last committed snapshot.

251

live: Current live state (path or snapshot manifest).

252

253

Returns:

254

A ``DriftReport`` describing what has changed since the last commit.

255

"""

256

current = self.snapshot(live)

257

delta = self.diff(committed, current)

258

has_drift = len(delta["ops"]) > 0

259

return DriftReport(

260

has_drift=has_drift,

261

summary=delta["summary"],

delta=delta,

)

def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState:

266

"""Apply a delta to the working tree.

267

268

Called by ``muse checkout`` after the core engine has already restored

269

file-level objects from the object store. Use this hook for any

270

domain-level post-processing (e.g. recompiling derived artefacts,

271

updating an index).

272

273

For most domains this is a no-op — the core engine handles file

274

restoration and nothing more is needed.

275

276

Args:

277

delta: The typed operation list to apply.

278

live_state: Current live state.

279

280

Returns:

281

The updated live state.

282

"""

283

# TODO: add domain-level post-processing if needed.

284

return live_state

285

286

# ------------------------------------------------------------------

287

# Domain schema — required

288

# ------------------------------------------------------------------

289

290

def schema(self) -> DomainSchema:

291

"""Declare the structural shape of this domain's data.

292

293

The schema drives diff algorithm selection, the ``muse domains``

294

capability display, and routing between three-way and CRDT merge.

295

296

Returns:

297

A ``DomainSchema`` describing the top-level element type, semantic

298

dimensions, merge mode, and schema version.

299

"""

300

# TODO: replace with your domain's actual elements and dimensions.

return DomainSchema(

domain=_DOMAIN_NAME,

description=(

"Scaffold domain — replace this description with your domain's purpose. "

305

"TODO: update domain, description, top_level, and dimensions."

),

top_level=SetSchema(

kind="set",

element_type="record", # TODO: rename to your element type

310

identity="by_content",

),

dimensions=[

DimensionSpec(

name="primary",

description=(

"Primary data dimension. "

317

"TODO: rename and describe what this dimension represents."

318

),

319

schema=SequenceSchema(

320

kind="sequence",

321

element_type="record", # TODO: rename

322

identity="by_position",

323

diff_algorithm="lcs",

324

alphabet=None,

325

),

326

independent_merge=True,

),

DimensionSpec(

name="metadata",

description=(

"Metadata / annotation dimension. "

332

"TODO: rename or remove if not applicable."

),

schema=SetSchema(

kind="set",

element_type="label", # TODO: rename

337

identity="by_content",

338

),

339

independent_merge=True,

340

),

341

],

342

merge_mode="three_way", # TODO: change to "crdt" if implementing CRDT convergent merge

343

schema_version=__version__,

344

)

345

346

# ------------------------------------------------------------------

347

# StructuredMergePlugin — optional OT merge extension

348

# Remove this method and StructuredMergePlugin from the base classes if

349

# your domain does not need sub-file OT merge.

350

# ------------------------------------------------------------------

def merge_ops(

self,

base: StateSnapshot,

ours_snap: StateSnapshot,

356

theirs_snap: StateSnapshot,

357

ours_ops: list[DomainOp],

358

theirs_ops: list[DomainOp],

359

*,

360

repo_root: pathlib.Path | None = None,

361

) -> MergeResult:

362

"""Operation-level three-way merge using Operational Transformation.

363

364

The core engine calls this when both branches have a ``StructuredDelta``.

365

``merge_op_lists`` determines which ops commute (auto-mergeable) and

366

which conflict (need human resolution).

367

368

Args:

369

base: Common ancestor snapshot.

370

ours_snap: Our branch's final snapshot.

371

theirs_snap: Their branch's final snapshot.

372

ours_ops: Our branch's typed operation list.

373

theirs_ops: Their branch's typed operation list.

374

repo_root: Repository root path for ``.museattributes`` loading.

375

376

Returns:

377

A ``MergeResult`` whose ``conflicts`` list is empty if all ops

378

commute (can auto-merge) or populated for genuine conflicts.

379

"""

380

result = merge_op_lists(

381

base_ops=[],

382

ours_ops=ours_ops,

383

theirs_ops=theirs_ops,

384

)

385

386

conflicts: list[str] = []

387

if result.conflict_ops:

388

seen: set[str] = set()

389

for our_op, _their_op in result.conflict_ops:

390

seen.add(our_op["address"])

391

conflicts = sorted(seen)

392

393

# TODO: reconstruct the merged snapshot from merged_ops for finer

394

# granularity. This fallback re-runs the file-level three-way merge

395

# and uses the OT conflict list as the authoritative conflict set.

396

fallback = self.merge(base, ours_snap, theirs_snap, repo_root=repo_root)

397

return MergeResult(

398

merged=fallback.merged,

399

conflicts=conflicts if conflicts else fallback.conflicts,

400

applied_strategies=fallback.applied_strategies,

401

dimension_reports=fallback.dimension_reports,

402

)

403

404

# ------------------------------------------------------------------

405

# CRDTPlugin — optional convergent merge extension

406

# Remove these methods and CRDTPlugin from the base classes if your

407

# domain does not need convergent multi-agent join semantics.

408

# ------------------------------------------------------------------

409

410

def crdt_schema(self) -> list[CRDTDimensionSpec]:

411

"""Declare which dimensions use which CRDT primitive.

412

413

Returns:

414

One ``CRDTDimensionSpec`` per CRDT-enabled dimension.

415

"""

416

# TODO: replace with your domain's CRDT dimensions.

return [

CRDTDimensionSpec(

name="labels",

description="Annotation labels — concurrent adds win.",

421

crdt_type="or_set",

422

independent_merge=True,

),

]

def join(

self,

a: CRDTSnapshotManifest,

429

b: CRDTSnapshotManifest,

430

) -> CRDTSnapshotManifest:

431

"""Convergent join of two CRDT snapshot manifests.

432

433

``join`` always succeeds — no conflict state ever exists.

434

435

Args:

436

a: First CRDT snapshot manifest.

437

b: Second CRDT snapshot manifest.

438

439

Returns:

440

The joined manifest (least upper bound of ``a`` and ``b``).

441

"""

442

# TODO: join each CRDT dimension declared in crdt_schema().

443

vc_a = VectorClock.from_dict(a["vclock"])

444

vc_b = VectorClock.from_dict(b["vclock"])

445

merged_vc = vc_a.merge(vc_b)

446

447

# ORSet stores per-label OR-Set state serialised as JSON strings

448

labels_a = ORSet.from_dict(json.loads(a["crdt_state"].get("labels", "{}")))

449

labels_b = ORSet.from_dict(json.loads(b["crdt_state"].get("labels", "{}")))

450

merged_labels = labels_a.join(labels_b)

451

452

return CRDTSnapshotManifest(

453

files=a["files"],

454

domain=_DOMAIN_NAME,

455

vclock=merged_vc.to_dict(),

456

crdt_state={"labels": json.dumps(merged_labels.to_dict())},

457

schema_version=__version__,

458

)

459

460

def to_crdt_state(self, snapshot: StateSnapshot) -> CRDTSnapshotManifest:

461

"""Lift a plain snapshot into CRDT state.

462

463

Called when merging a snapshot produced before CRDT mode was enabled,

464

or when bootstrapping CRDT state for the first time.

465

466

Args:

467

snapshot: A plain ``SnapshotManifest``.

468

469

Returns:

470

A ``CRDTSnapshotManifest`` with empty CRDT state.

471

"""

472

return CRDTSnapshotManifest(

473

files=snapshot["files"],

474

domain=_DOMAIN_NAME,

475

vclock=VectorClock().to_dict(),

476

crdt_state={"labels": json.dumps(ORSet().to_dict())},

477

schema_version=__version__,

478

)

479

480

def from_crdt_state(self, crdt: CRDTSnapshotManifest) -> StateSnapshot:

481

"""Materialise a CRDT manifest back into a plain snapshot.

482

483

Called after a CRDT join to produce the snapshot the core engine writes

484

to the commit record.

485

486

Args:

487

crdt: A ``CRDTSnapshotManifest``.

488

489

Returns:

490

A plain ``SnapshotManifest``.

491

"""

492

return SnapshotManifest(files=crdt["files"], domain=_DOMAIN_NAME)