cgcardona / muse public
plugin.py python
555 lines 20.0 KB
d7054e63 feat(phase-1): typed delta algebra — replace DeltaManifest with Structu… Gabriel Cardona <gabriel@tellurstori.com> 2d ago
1 """Music domain plugin — reference implementation of :class:`MuseDomainPlugin`.
2
3 This plugin implements the five Muse domain interfaces for MIDI state:
4 notes, velocities, controller events (CC), pitch bends, and aftertouch.
5
6 It is the domain that proved the abstraction. Every other domain — scientific
7 simulation, genomics, 3D spatial design — is a new plugin that implements
8 the same five interfaces.
9
10 Live State
11 ----------
12 For the music domain, ``LiveState`` is either:
13
14 1. A ``muse-work/`` directory path (``pathlib.Path``) — the CLI path where
15 MIDI files live on disk and are managed by ``muse commit / checkout``.
16 2. A dict snapshot previously captured by :meth:`snapshot` — used when
17 constructing merges and diffs in memory.
18
19 Both forms are supported. The plugin detects which form it received by
20 checking for ``pathlib.Path`` vs ``dict``.
21
22 Snapshot Format
23 ---------------
24 A music snapshot is a JSON-serialisable dict:
25
26 .. code-block:: json
27
28 {
29 "files": {
30 "tracks/drums.mid": "<sha256>",
31 "tracks/bass.mid": "<sha256>"
32 },
33 "domain": "music"
34 }
35
36 The ``files`` key maps POSIX paths (relative to ``muse-work/``) to their
37 SHA-256 content digests.
38
39 Delta Format (Phase 1)
40 ----------------------
41 ``diff()`` returns a ``StructuredDelta`` with typed ``DomainOp`` entries:
42
43 - ``InsertOp`` — a file was added (``content_id`` = its SHA-256 hash).
44 - ``DeleteOp`` — a file was removed.
45 - ``ReplaceOp`` — a non-MIDI file's content changed.
46 - ``PatchOp`` — a ``.mid`` file changed; ``child_ops`` contains note-level
47 ``InsertOp`` / ``DeleteOp`` entries from the Myers LCS diff.
48
49 When ``repo_root`` is available, MIDI files are loaded from the object store
50 and diffed at note level. Without it, modified ``.mid`` files fall back to
51 ``ReplaceOp``.
52 """
53 from __future__ import annotations
54
55 import hashlib
56 import json
57 import logging
58 import pathlib
59
60 from muse.domain import (
61 DeleteOp,
62 DomainOp,
63 DriftReport,
64 InsertOp,
65 LiveState,
66 MergeResult,
67 MuseDomainPlugin,
68 PatchOp,
69 ReplaceOp,
70 SnapshotManifest,
71 StateDelta,
72 StateSnapshot,
73 StructuredDelta,
74 )
75
76 logger = logging.getLogger(__name__)
77
78 _DOMAIN_TAG = "music"
79
80
81 class MusicPlugin:
82 """Music domain plugin for the Muse VCS.
83
84 Implements :class:`~muse.domain.MuseDomainPlugin` for MIDI state stored
85 as files in ``muse-work/``. Use this plugin when running ``muse`` against
86 a directory of MIDI, audio, or other music production files.
87
88 This is the reference implementation. It demonstrates the five-interface
89 contract that every other domain plugin must satisfy.
90 """
91
92 # ------------------------------------------------------------------
93 # 1. snapshot — capture live state as a content-addressed dict
94 # ------------------------------------------------------------------
95
96 def snapshot(self, live_state: LiveState) -> StateSnapshot:
97 """Capture the current ``muse-work/`` directory as a snapshot dict.
98
99 Args:
100 live_state: Either a ``pathlib.Path`` pointing to ``muse-work/``
101 or an existing snapshot dict (returned as-is).
102
103 Returns:
104 A JSON-serialisable ``{"files": {path: sha256}, "domain": "music"}``
105 dict. The ``files`` mapping is the canonical snapshot manifest used
106 by the core VCS engine for commit / checkout / diff.
107
108 Ignore rules
109 ------------
110 When *live_state* is a ``pathlib.Path``, the plugin reads
111 ``.museignore`` from the repository root (the parent of ``muse-work/``)
112 and excludes any matching paths from the snapshot. Dotfiles are always
113 excluded regardless of ``.museignore``.
114 """
115 if isinstance(live_state, pathlib.Path):
116 from muse.core.ignore import is_ignored, load_patterns
117 workdir = live_state
118 repo_root = workdir.parent
119 patterns = load_patterns(repo_root)
120 files: dict[str, str] = {}
121 for file_path in sorted(workdir.rglob("*")):
122 if not file_path.is_file():
123 continue
124 if file_path.name.startswith("."):
125 continue
126 rel = file_path.relative_to(workdir).as_posix()
127 if is_ignored(rel, patterns):
128 continue
129 files[rel] = _hash_file(file_path)
130 return SnapshotManifest(files=files, domain=_DOMAIN_TAG)
131
132 return live_state
133
134 # ------------------------------------------------------------------
135 # 2. diff — compute the structured delta between two snapshots
136 # ------------------------------------------------------------------
137
138 def diff(
139 self,
140 base: StateSnapshot,
141 target: StateSnapshot,
142 *,
143 repo_root: pathlib.Path | None = None,
144 ) -> StateDelta:
145 """Compute a ``StructuredDelta`` between two music snapshots.
146
147 File additions and removals produce ``InsertOp`` and ``DeleteOp``
148 entries respectively. For modified files:
149
150 - ``.mid`` files: when ``repo_root`` is provided, load the MIDI bytes
151 from the object store and produce a ``PatchOp`` with note-level
152 ``child_ops`` from the Myers LCS diff. Falls back to ``ReplaceOp``
153 when the object store is unavailable or parsing fails.
154 - All other files: ``ReplaceOp`` with file-level content IDs.
155
156 Args:
157 base: The ancestor snapshot.
158 target: The later snapshot.
159 repo_root: Repository root directory. When provided, MIDI files are
160 loaded from ``.muse/objects/`` for note-level diffing.
161
162 Returns:
163 A ``StructuredDelta`` whose ``ops`` list transforms *base* into
164 *target* and whose ``summary`` is human-readable.
165 """
166 base_files = base["files"]
167 target_files = target["files"]
168
169 base_paths = set(base_files)
170 target_paths = set(target_files)
171
172 ops: list[DomainOp] = []
173
174 # Added files → InsertOp
175 for path in sorted(target_paths - base_paths):
176 ops.append(
177 InsertOp(
178 op="insert",
179 address=path,
180 position=None,
181 content_id=target_files[path],
182 content_summary=f"new file: {path}",
183 )
184 )
185
186 # Removed files → DeleteOp
187 for path in sorted(base_paths - target_paths):
188 ops.append(
189 DeleteOp(
190 op="delete",
191 address=path,
192 position=None,
193 content_id=base_files[path],
194 content_summary=f"deleted: {path}",
195 )
196 )
197
198 # Modified files
199 for path in sorted(
200 p for p in base_paths & target_paths if base_files[p] != target_files[p]
201 ):
202 op = _diff_modified_file(
203 path=path,
204 old_hash=base_files[path],
205 new_hash=target_files[path],
206 repo_root=repo_root,
207 )
208 ops.append(op)
209
210 summary = _summarise_ops(ops)
211 return StructuredDelta(domain=_DOMAIN_TAG, ops=ops, summary=summary)
212
213 # ------------------------------------------------------------------
214 # 3. merge — three-way reconciliation
215 # ------------------------------------------------------------------
216
217 def merge(
218 self,
219 base: StateSnapshot,
220 left: StateSnapshot,
221 right: StateSnapshot,
222 *,
223 repo_root: pathlib.Path | None = None,
224 ) -> MergeResult:
225 """Three-way merge two divergent music state lines against a common base.
226
227 A file is auto-merged when only one side changed it. When both sides
228 changed the same file, the merge proceeds in two stages:
229
230 1. **File-level strategy** — if ``.museattributes`` contains an
231 ``ours`` or ``theirs`` rule matching the path (dimension ``"*"``),
232 the rule is applied and the file is removed from the conflict list.
233
234 2. **Dimension-level merge** — for ``.mid`` files that survive the
235 file-level check, the MIDI event stream is split into orthogonal
236 dimension slices (notes/melodic, harmonic, dynamic, structural).
237 Each dimension is merged independently. Dimension-specific
238 ``ours``/``theirs`` rules in ``.museattributes`` are honoured.
239 Only dimensions where *both* sides changed AND no resolvable rule
240 exists cause a true file-level conflict.
241
242 3. **Manual override** — ``manual`` strategy in ``.museattributes``
243 forces a path into the conflict list even when the engine would
244 normally auto-resolve it.
245 """
246 import hashlib as _hashlib
247
248 from muse.core.attributes import load_attributes, resolve_strategy
249 from muse.core.object_store import read_object, write_object
250 from muse.plugins.music.midi_merge import merge_midi_dimensions
251
252 base_files = base["files"]
253 left_files = left["files"]
254 right_files = right["files"]
255
256 attrs = load_attributes(repo_root) if repo_root is not None else []
257
258 left_changed: set[str] = _changed_paths(base_files, left_files)
259 right_changed: set[str] = _changed_paths(base_files, right_files)
260 all_conflict_paths: set[str] = left_changed & right_changed
261
262 merged: dict[str, str] = dict(base_files)
263
264 # Apply clean single-side changes first.
265 for path in left_changed - all_conflict_paths:
266 if path in left_files:
267 merged[path] = left_files[path]
268 else:
269 merged.pop(path, None)
270
271 for path in right_changed - all_conflict_paths:
272 if path in right_files:
273 merged[path] = right_files[path]
274 else:
275 merged.pop(path, None)
276
277 # Consensus deletions (both sides removed the same file) — not a conflict.
278 consensus_deleted = {
279 p for p in all_conflict_paths
280 if p not in left_files and p not in right_files
281 }
282 for path in consensus_deleted:
283 merged.pop(path, None)
284
285 real_conflicts: set[str] = all_conflict_paths - consensus_deleted
286
287 applied_strategies: dict[str, str] = {}
288 dimension_reports: dict[str, dict[str, str]] = {}
289 final_conflicts: list[str] = []
290
291 for path in sorted(real_conflicts):
292 file_strategy = resolve_strategy(attrs, path, "*")
293
294 if file_strategy == "ours":
295 if path in left_files:
296 merged[path] = left_files[path]
297 else:
298 merged.pop(path, None)
299 applied_strategies[path] = "ours"
300 continue
301
302 if file_strategy == "theirs":
303 if path in right_files:
304 merged[path] = right_files[path]
305 else:
306 merged.pop(path, None)
307 applied_strategies[path] = "theirs"
308 continue
309
310 if (
311 repo_root is not None
312 and path.lower().endswith(".mid")
313 and path in left_files
314 and path in right_files
315 and path in base_files
316 ):
317 base_obj = read_object(repo_root, base_files[path])
318 left_obj = read_object(repo_root, left_files[path])
319 right_obj = read_object(repo_root, right_files[path])
320
321 if base_obj is not None and left_obj is not None and right_obj is not None:
322 try:
323 dim_result = merge_midi_dimensions(
324 base_obj, left_obj, right_obj,
325 attrs,
326 path,
327 )
328 except ValueError:
329 dim_result = None
330
331 if dim_result is not None:
332 merged_bytes, dim_report = dim_result
333 new_hash = _hashlib.sha256(merged_bytes).hexdigest()
334 write_object(repo_root, new_hash, merged_bytes)
335 merged[path] = new_hash
336 applied_strategies[path] = "dimension-merge"
337 dimension_reports[path] = dim_report
338 continue
339
340 final_conflicts.append(path)
341
342 for path in sorted((left_changed | right_changed) - real_conflicts):
343 if path in consensus_deleted:
344 continue
345 if resolve_strategy(attrs, path, "*") == "manual":
346 final_conflicts.append(path)
347 applied_strategies[path] = "manual"
348 if path in base_files:
349 merged[path] = base_files[path]
350 else:
351 merged.pop(path, None)
352
353 return MergeResult(
354 merged=SnapshotManifest(files=merged, domain=_DOMAIN_TAG),
355 conflicts=sorted(final_conflicts),
356 applied_strategies=applied_strategies,
357 dimension_reports=dimension_reports,
358 )
359
360 # ------------------------------------------------------------------
361 # 4. drift — compare committed state vs live state
362 # ------------------------------------------------------------------
363
364 def drift(
365 self,
366 committed: StateSnapshot,
367 live: LiveState,
368 ) -> DriftReport:
369 """Detect uncommitted changes in ``muse-work/`` relative to *committed*.
370
371 Args:
372 committed: The last committed snapshot.
373 live: Either a ``pathlib.Path`` (``muse-work/``) or a snapshot
374 dict representing current live state.
375
376 Returns:
377 A :class:`~muse.domain.DriftReport` describing whether and how the
378 live state differs from the committed snapshot.
379 """
380 live_snapshot = self.snapshot(live)
381 delta = self.diff(committed, live_snapshot)
382
383 inserts = sum(1 for op in delta["ops"] if op["op"] == "insert")
384 deletes = sum(1 for op in delta["ops"] if op["op"] == "delete")
385 modified = sum(1 for op in delta["ops"] if op["op"] in ("replace", "patch"))
386 has_drift = bool(inserts or deletes or modified)
387
388 parts: list[str] = []
389 if inserts:
390 parts.append(f"{inserts} added")
391 if deletes:
392 parts.append(f"{deletes} removed")
393 if modified:
394 parts.append(f"{modified} modified")
395
396 summary = ", ".join(parts) if parts else "working tree clean"
397 return DriftReport(has_drift=has_drift, summary=summary, delta=delta)
398
399 # ------------------------------------------------------------------
400 # 5. apply — execute a delta against live state (checkout)
401 # ------------------------------------------------------------------
402
403 def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState:
404 """Apply a structured delta to produce a new live state.
405
406 When ``live_state`` is a ``pathlib.Path`` the physical files have
407 already been updated by the caller (``muse checkout`` restores objects
408 from the store before calling this). Rescanning the directory is the
409 cheapest correct way to reflect the new state.
410
411 When ``live_state`` is a snapshot dict, only ``DeleteOp`` and
412 ``ReplaceOp`` at the file level can be applied in-memory. ``InsertOp``
413 at the file level requires the new content to be on disk; callers that
414 need those should pass the workdir ``pathlib.Path`` instead.
415 ``PatchOp`` entries are skipped in-memory since reconstructing patched
416 file content requires both the original bytes and the object store.
417
418 Args:
419 delta: A ``StructuredDelta`` produced by :meth:`diff`.
420 live_state: The workdir path (preferred) or a snapshot dict.
421
422 Returns:
423 The updated live state as a ``SnapshotManifest``.
424 """
425 if isinstance(live_state, pathlib.Path):
426 return self.snapshot(live_state)
427
428 current_files = dict(live_state["files"])
429
430 for op in delta["ops"]:
431 if op["op"] == "delete":
432 current_files.pop(op["address"], None)
433 elif op["op"] == "replace":
434 current_files[op["address"]] = op["new_content_id"]
435 elif op["op"] == "insert":
436 current_files[op["address"]] = op["content_id"]
437 # PatchOp and MoveOp: skip in-memory — caller must use workdir path.
438
439 return SnapshotManifest(files=current_files, domain=_DOMAIN_TAG)
440
441
442 # ---------------------------------------------------------------------------
443 # Module-level helpers
444 # ---------------------------------------------------------------------------
445
446
447 def _diff_modified_file(
448 *,
449 path: str,
450 old_hash: str,
451 new_hash: str,
452 repo_root: pathlib.Path | None,
453 ) -> DomainOp:
454 """Produce the best available op for a modified file.
455
456 Tries deep MIDI diff when possible; falls back to ``ReplaceOp``.
457 """
458 if path.lower().endswith(".mid") and repo_root is not None:
459 from muse.core.object_store import read_object
460 from muse.plugins.music.midi_diff import diff_midi_notes
461
462 base_bytes = read_object(repo_root, old_hash)
463 target_bytes = read_object(repo_root, new_hash)
464
465 if base_bytes is not None and target_bytes is not None:
466 try:
467 child_delta = diff_midi_notes(
468 base_bytes, target_bytes, file_path=path
469 )
470 return PatchOp(
471 op="patch",
472 address=path,
473 child_ops=child_delta["ops"],
474 child_domain=child_delta["domain"],
475 child_summary=child_delta["summary"],
476 )
477 except (ValueError, Exception) as exc:
478 logger.debug("⚠️ MIDI deep diff failed for %r: %s", path, exc)
479
480 return ReplaceOp(
481 op="replace",
482 address=path,
483 position=None,
484 old_content_id=old_hash,
485 new_content_id=new_hash,
486 old_summary=f"{path} (previous)",
487 new_summary=f"{path} (updated)",
488 )
489
490
491 def _summarise_ops(ops: list[DomainOp]) -> str:
492 """Build a human-readable summary string from a list of domain ops."""
493 inserts = 0
494 deletes = 0
495 replaces = 0
496 patches = 0
497
498 for op in ops:
499 kind = op["op"]
500 if kind == "insert":
501 inserts += 1
502 elif kind == "delete":
503 deletes += 1
504 elif kind == "replace":
505 replaces += 1
506 elif kind == "patch":
507 patches += 1
508
509 parts: list[str] = []
510 if inserts:
511 parts.append(f"{inserts} file{'s' if inserts != 1 else ''} added")
512 if deletes:
513 parts.append(f"{deletes} file{'s' if deletes != 1 else ''} removed")
514 if replaces:
515 parts.append(f"{replaces} file{'s' if replaces != 1 else ''} modified")
516 if patches:
517 parts.append(f"{patches} file{'s' if patches != 1 else ''} patched")
518
519 return ", ".join(parts) if parts else "no changes"
520
521
522 def _hash_file(path: pathlib.Path) -> str:
523 """Return the SHA-256 hex digest of a file's raw bytes."""
524 h = hashlib.sha256()
525 with path.open("rb") as fh:
526 for chunk in iter(lambda: fh.read(65536), b""):
527 h.update(chunk)
528 return h.hexdigest()
529
530
531 def _changed_paths(
532 base: dict[str, str], other: dict[str, str]
533 ) -> set[str]:
534 """Return paths that differ between *base* and *other*."""
535 base_p = set(base)
536 other_p = set(other)
537 added = other_p - base_p
538 deleted = base_p - other_p
539 common = base_p & other_p
540 modified = {p for p in common if base[p] != other[p]}
541 return added | deleted | modified
542
543
544 def content_hash(snapshot: StateSnapshot) -> str:
545 """Return a stable SHA-256 digest of a snapshot for content-addressing."""
546 canonical = json.dumps(snapshot, sort_keys=True, separators=(",", ":"))
547 return hashlib.sha256(canonical.encode()).hexdigest()
548
549
550 #: Module-level singleton — import and use directly.
551 plugin = MusicPlugin()
552
553 assert isinstance(plugin, MuseDomainPlugin), (
554 "MusicPlugin does not satisfy the MuseDomainPlugin protocol"
555 )