gabriel / muse public
plugin.py python
965 lines 36.7 KB
53d2d9ce feat(phase-3): operation-level merge engine — OT-based auto-merge for n… Gabriel Cardona <cgcardona@gmail.com> 6d ago
1 """Music domain plugin — reference implementation of :class:`MuseDomainPlugin`.
2
3 This plugin implements the six Muse domain interfaces for MIDI state:
4 notes, velocities, controller events (CC), pitch bends, and aftertouch.
5
6 It is the domain that proved the abstraction. Every other domain — scientific
7 simulation, genomics, 3D spatial design — is a new plugin that implements
8 the same six interfaces.
9
10 Live State
11 ----------
12 For the music domain, ``LiveState`` is either:
13
14 1. A ``muse-work/`` directory path (``pathlib.Path``) — the CLI path where
15 MIDI files live on disk and are managed by ``muse commit / checkout``.
16 2. A dict snapshot previously captured by :meth:`snapshot` — used when
17 constructing merges and diffs in memory.
18
19 Both forms are supported. The plugin detects which form it received by
20 checking for ``pathlib.Path`` vs ``dict``.
21
22 Snapshot Format
23 ---------------
24 A music snapshot is a JSON-serialisable dict:
25
26 .. code-block:: json
27
28 {
29 "files": {
30 "tracks/drums.mid": "<sha256>",
31 "tracks/bass.mid": "<sha256>"
32 },
33 "domain": "music"
34 }
35
36 The ``files`` key maps POSIX paths (relative to ``muse-work/``) to their
37 SHA-256 content digests.
38
39 Delta Format (Phase 1)
40 ----------------------
41 ``diff()`` returns a ``StructuredDelta`` with typed ``DomainOp`` entries:
42
43 - ``InsertOp`` — a file was added (``content_id`` = its SHA-256 hash).
44 - ``DeleteOp`` — a file was removed.
45 - ``ReplaceOp`` — a non-MIDI file's content changed.
46 - ``PatchOp`` — a ``.mid`` file changed; ``child_ops`` contains note-level
47 ``InsertOp`` / ``DeleteOp`` entries from the Myers LCS diff.
48
49 When ``repo_root`` is available, MIDI files are loaded from the object store
50 and diffed at note level. Without it, modified ``.mid`` files fall back to
51 ``ReplaceOp``.
52 """
53 from __future__ import annotations
54
55 import hashlib
56 import json
57 import logging
58 import pathlib
59
60 from muse.core.schema import (
61 DimensionSpec,
62 DomainSchema,
63 SequenceSchema,
64 SetSchema,
65 TensorSchema,
66 TreeSchema,
67 )
68 from muse.domain import (
69 DeleteOp,
70 DomainOp,
71 DriftReport,
72 InsertOp,
73 LiveState,
74 MergeResult,
75 MuseDomainPlugin,
76 PatchOp,
77 ReplaceOp,
78 SnapshotManifest,
79 StateDelta,
80 StateSnapshot,
81 StructuredDelta,
82 StructuredMergePlugin,
83 )
84 from muse.plugins.music.midi_diff import NoteKey
85
86 logger = logging.getLogger(__name__)
87
88 _DOMAIN_TAG = "music"
89
90
91 class MusicPlugin:
92 """Music domain plugin for the Muse VCS.
93
94 Implements :class:`~muse.domain.MuseDomainPlugin` (six core interfaces)
95 and :class:`~muse.domain.StructuredMergePlugin` (Phase 3 operation-level
96 merge) for MIDI state stored as files in ``muse-work/``.
97
98 This is the reference implementation. Every other domain plugin implements
99 the same six core interfaces; the :class:`~muse.domain.StructuredMergePlugin`
100 extension is optional but strongly recommended for domains that produce
101 note-level (sub-file) diffs.
102 """
103
104 # ------------------------------------------------------------------
105 # 1. snapshot — capture live state as a content-addressed dict
106 # ------------------------------------------------------------------
107
108 def snapshot(self, live_state: LiveState) -> StateSnapshot:
109 """Capture the current ``muse-work/`` directory as a snapshot dict.
110
111 Args:
112 live_state: Either a ``pathlib.Path`` pointing to ``muse-work/``
113 or an existing snapshot dict (returned as-is).
114
115 Returns:
116 A JSON-serialisable ``{"files": {path: sha256}, "domain": "music"}``
117 dict. The ``files`` mapping is the canonical snapshot manifest used
118 by the core VCS engine for commit / checkout / diff.
119
120 Ignore rules
121 ------------
122 When *live_state* is a ``pathlib.Path``, the plugin reads
123 ``.museignore`` from the repository root (the parent of ``muse-work/``)
124 and excludes any matching paths from the snapshot. Dotfiles are always
125 excluded regardless of ``.museignore``.
126 """
127 if isinstance(live_state, pathlib.Path):
128 from muse.core.ignore import is_ignored, load_patterns
129 workdir = live_state
130 repo_root = workdir.parent
131 patterns = load_patterns(repo_root)
132 files: dict[str, str] = {}
133 for file_path in sorted(workdir.rglob("*")):
134 if not file_path.is_file():
135 continue
136 if file_path.name.startswith("."):
137 continue
138 rel = file_path.relative_to(workdir).as_posix()
139 if is_ignored(rel, patterns):
140 continue
141 files[rel] = _hash_file(file_path)
142 return SnapshotManifest(files=files, domain=_DOMAIN_TAG)
143
144 return live_state
145
146 # ------------------------------------------------------------------
147 # 2. diff — compute the structured delta between two snapshots
148 # ------------------------------------------------------------------
149
150 def diff(
151 self,
152 base: StateSnapshot,
153 target: StateSnapshot,
154 *,
155 repo_root: pathlib.Path | None = None,
156 ) -> StateDelta:
157 """Compute a ``StructuredDelta`` between two music snapshots.
158
159 File additions and removals produce ``InsertOp`` and ``DeleteOp``
160 entries respectively. For modified files:
161
162 - ``.mid`` files: when ``repo_root`` is provided, load the MIDI bytes
163 from the object store and produce a ``PatchOp`` with note-level
164 ``child_ops`` from the Myers LCS diff. Falls back to ``ReplaceOp``
165 when the object store is unavailable or parsing fails.
166 - All other files: ``ReplaceOp`` with file-level content IDs.
167
168 Args:
169 base: The ancestor snapshot.
170 target: The later snapshot.
171 repo_root: Repository root directory. When provided, MIDI files are
172 loaded from ``.muse/objects/`` for note-level diffing.
173
174 Returns:
175 A ``StructuredDelta`` whose ``ops`` list transforms *base* into
176 *target* and whose ``summary`` is human-readable.
177 """
178 base_files = base["files"]
179 target_files = target["files"]
180
181 base_paths = set(base_files)
182 target_paths = set(target_files)
183
184 ops: list[DomainOp] = []
185
186 # Added files → InsertOp
187 for path in sorted(target_paths - base_paths):
188 ops.append(
189 InsertOp(
190 op="insert",
191 address=path,
192 position=None,
193 content_id=target_files[path],
194 content_summary=f"new file: {path}",
195 )
196 )
197
198 # Removed files → DeleteOp
199 for path in sorted(base_paths - target_paths):
200 ops.append(
201 DeleteOp(
202 op="delete",
203 address=path,
204 position=None,
205 content_id=base_files[path],
206 content_summary=f"deleted: {path}",
207 )
208 )
209
210 # Modified files
211 for path in sorted(
212 p for p in base_paths & target_paths if base_files[p] != target_files[p]
213 ):
214 op = _diff_modified_file(
215 path=path,
216 old_hash=base_files[path],
217 new_hash=target_files[path],
218 repo_root=repo_root,
219 )
220 ops.append(op)
221
222 summary = _summarise_ops(ops)
223 return StructuredDelta(domain=_DOMAIN_TAG, ops=ops, summary=summary)
224
225 # ------------------------------------------------------------------
226 # 3. merge — three-way reconciliation
227 # ------------------------------------------------------------------
228
229 def merge(
230 self,
231 base: StateSnapshot,
232 left: StateSnapshot,
233 right: StateSnapshot,
234 *,
235 repo_root: pathlib.Path | None = None,
236 ) -> MergeResult:
237 """Three-way merge two divergent music state lines against a common base.
238
239 A file is auto-merged when only one side changed it. When both sides
240 changed the same file, the merge proceeds in two stages:
241
242 1. **File-level strategy** — if ``.museattributes`` contains an
243 ``ours`` or ``theirs`` rule matching the path (dimension ``"*"``),
244 the rule is applied and the file is removed from the conflict list.
245
246 2. **Dimension-level merge** — for ``.mid`` files that survive the
247 file-level check, the MIDI event stream is split into orthogonal
248 dimension slices (notes/melodic, harmonic, dynamic, structural).
249 Each dimension is merged independently. Dimension-specific
250 ``ours``/``theirs`` rules in ``.museattributes`` are honoured.
251 Only dimensions where *both* sides changed AND no resolvable rule
252 exists cause a true file-level conflict.
253
254 3. **Manual override** — ``manual`` strategy in ``.museattributes``
255 forces a path into the conflict list even when the engine would
256 normally auto-resolve it.
257 """
258 import hashlib as _hashlib
259
260 from muse.core.attributes import load_attributes, resolve_strategy
261 from muse.core.object_store import read_object, write_object
262 from muse.plugins.music.midi_merge import merge_midi_dimensions
263
264 base_files = base["files"]
265 left_files = left["files"]
266 right_files = right["files"]
267
268 attrs = load_attributes(repo_root) if repo_root is not None else []
269
270 left_changed: set[str] = _changed_paths(base_files, left_files)
271 right_changed: set[str] = _changed_paths(base_files, right_files)
272 all_conflict_paths: set[str] = left_changed & right_changed
273
274 merged: dict[str, str] = dict(base_files)
275
276 # Apply clean single-side changes first.
277 for path in left_changed - all_conflict_paths:
278 if path in left_files:
279 merged[path] = left_files[path]
280 else:
281 merged.pop(path, None)
282
283 for path in right_changed - all_conflict_paths:
284 if path in right_files:
285 merged[path] = right_files[path]
286 else:
287 merged.pop(path, None)
288
289 # Consensus deletions (both sides removed the same file) — not a conflict.
290 consensus_deleted = {
291 p for p in all_conflict_paths
292 if p not in left_files and p not in right_files
293 }
294 for path in consensus_deleted:
295 merged.pop(path, None)
296
297 real_conflicts: set[str] = all_conflict_paths - consensus_deleted
298
299 applied_strategies: dict[str, str] = {}
300 dimension_reports: dict[str, dict[str, str]] = {}
301 final_conflicts: list[str] = []
302
303 for path in sorted(real_conflicts):
304 file_strategy = resolve_strategy(attrs, path, "*")
305
306 if file_strategy == "ours":
307 if path in left_files:
308 merged[path] = left_files[path]
309 else:
310 merged.pop(path, None)
311 applied_strategies[path] = "ours"
312 continue
313
314 if file_strategy == "theirs":
315 if path in right_files:
316 merged[path] = right_files[path]
317 else:
318 merged.pop(path, None)
319 applied_strategies[path] = "theirs"
320 continue
321
322 if (
323 repo_root is not None
324 and path.lower().endswith(".mid")
325 and path in left_files
326 and path in right_files
327 and path in base_files
328 ):
329 base_obj = read_object(repo_root, base_files[path])
330 left_obj = read_object(repo_root, left_files[path])
331 right_obj = read_object(repo_root, right_files[path])
332
333 if base_obj is not None and left_obj is not None and right_obj is not None:
334 try:
335 dim_result = merge_midi_dimensions(
336 base_obj, left_obj, right_obj,
337 attrs,
338 path,
339 )
340 except ValueError:
341 dim_result = None
342
343 if dim_result is not None:
344 merged_bytes, dim_report = dim_result
345 new_hash = _hashlib.sha256(merged_bytes).hexdigest()
346 write_object(repo_root, new_hash, merged_bytes)
347 merged[path] = new_hash
348 applied_strategies[path] = "dimension-merge"
349 dimension_reports[path] = dim_report
350 continue
351
352 final_conflicts.append(path)
353
354 for path in sorted((left_changed | right_changed) - real_conflicts):
355 if path in consensus_deleted:
356 continue
357 if resolve_strategy(attrs, path, "*") == "manual":
358 final_conflicts.append(path)
359 applied_strategies[path] = "manual"
360 if path in base_files:
361 merged[path] = base_files[path]
362 else:
363 merged.pop(path, None)
364
365 return MergeResult(
366 merged=SnapshotManifest(files=merged, domain=_DOMAIN_TAG),
367 conflicts=sorted(final_conflicts),
368 applied_strategies=applied_strategies,
369 dimension_reports=dimension_reports,
370 )
371
372 # ------------------------------------------------------------------
373 # 4. drift — compare committed state vs live state
374 # ------------------------------------------------------------------
375
376 def drift(
377 self,
378 committed: StateSnapshot,
379 live: LiveState,
380 ) -> DriftReport:
381 """Detect uncommitted changes in ``muse-work/`` relative to *committed*.
382
383 Args:
384 committed: The last committed snapshot.
385 live: Either a ``pathlib.Path`` (``muse-work/``) or a snapshot
386 dict representing current live state.
387
388 Returns:
389 A :class:`~muse.domain.DriftReport` describing whether and how the
390 live state differs from the committed snapshot.
391 """
392 live_snapshot = self.snapshot(live)
393 delta = self.diff(committed, live_snapshot)
394
395 inserts = sum(1 for op in delta["ops"] if op["op"] == "insert")
396 deletes = sum(1 for op in delta["ops"] if op["op"] == "delete")
397 modified = sum(1 for op in delta["ops"] if op["op"] in ("replace", "patch"))
398 has_drift = bool(inserts or deletes or modified)
399
400 parts: list[str] = []
401 if inserts:
402 parts.append(f"{inserts} added")
403 if deletes:
404 parts.append(f"{deletes} removed")
405 if modified:
406 parts.append(f"{modified} modified")
407
408 summary = ", ".join(parts) if parts else "working tree clean"
409 return DriftReport(has_drift=has_drift, summary=summary, delta=delta)
410
411 # ------------------------------------------------------------------
412 # 5. apply — execute a delta against live state (checkout)
413 # ------------------------------------------------------------------
414
415 def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState:
416 """Apply a structured delta to produce a new live state.
417
418 When ``live_state`` is a ``pathlib.Path`` the physical files have
419 already been updated by the caller (``muse checkout`` restores objects
420 from the store before calling this). Rescanning the directory is the
421 cheapest correct way to reflect the new state.
422
423 When ``live_state`` is a snapshot dict, only ``DeleteOp`` and
424 ``ReplaceOp`` at the file level can be applied in-memory. ``InsertOp``
425 at the file level requires the new content to be on disk; callers that
426 need those should pass the workdir ``pathlib.Path`` instead.
427 ``PatchOp`` entries are skipped in-memory since reconstructing patched
428 file content requires both the original bytes and the object store.
429
430 Args:
431 delta: A ``StructuredDelta`` produced by :meth:`diff`.
432 live_state: The workdir path (preferred) or a snapshot dict.
433
434 Returns:
435 The updated live state as a ``SnapshotManifest``.
436 """
437 if isinstance(live_state, pathlib.Path):
438 return self.snapshot(live_state)
439
440 current_files = dict(live_state["files"])
441
442 for op in delta["ops"]:
443 if op["op"] == "delete":
444 current_files.pop(op["address"], None)
445 elif op["op"] == "replace":
446 current_files[op["address"]] = op["new_content_id"]
447 elif op["op"] == "insert":
448 current_files[op["address"]] = op["content_id"]
449 # PatchOp and MoveOp: skip in-memory — caller must use workdir path.
450
451 return SnapshotManifest(files=current_files, domain=_DOMAIN_TAG)
452
453 # ------------------------------------------------------------------
454 # 6. schema — declare structural schema for the algorithm library
455 # ------------------------------------------------------------------
456
457 def schema(self) -> DomainSchema:
458 """Return the full structural schema for the music domain.
459
460 Declares four semantic dimensions — melodic, harmonic, dynamic, and
461 structural — that the core diff algorithm library (Phase 2) and merge
462 engine (Phase 3) use to drive per-dimension operations.
463
464 Top level is a ``SetSchema``: the music workspace is an unordered
465 collection of audio/MIDI files, each identified by its SHA-256 content
466 hash.
467
468 Dimensions:
469
470 - **melodic** — the sequence of note events over time. LCS-diffed so
471 that insertions and deletions of individual notes are surfaced.
472 - **harmonic** — the sequence of chord events and key-signature changes.
473 LCS-diffed independently of the melodic dimension.
474 - **dynamic** — velocity and expression curves as a 1-D float tensor.
475 Epsilon of 1.0 ignores sub-1-velocity noise; sparse mode emits one
476 ``ReplaceOp`` per changed event.
477 - **structural** — track layout, time signatures, and tempo map as a
478 labeled ordered tree. Structural changes are non-independent: they
479 block merging all other dimensions until resolved, because a tempo
480 change shifts the meaning of every subsequent note position.
481 """
482 return DomainSchema(
483 domain=_DOMAIN_TAG,
484 description="MIDI and audio file versioning with note-level diff",
485 top_level=SetSchema(
486 kind="set",
487 element_type="audio_file",
488 identity="by_content",
489 ),
490 dimensions=[
491 DimensionSpec(
492 name="melodic",
493 description="Note pitches and durations over time",
494 schema=SequenceSchema(
495 kind="sequence",
496 element_type="note_event",
497 identity="by_position",
498 diff_algorithm="lcs",
499 alphabet=None,
500 ),
501 independent_merge=True,
502 ),
503 DimensionSpec(
504 name="harmonic",
505 description="Chord progressions and key signatures",
506 schema=SequenceSchema(
507 kind="sequence",
508 element_type="chord_event",
509 identity="by_position",
510 diff_algorithm="lcs",
511 alphabet=None,
512 ),
513 independent_merge=True,
514 ),
515 DimensionSpec(
516 name="dynamic",
517 description="Velocity and expression curves",
518 schema=TensorSchema(
519 kind="tensor",
520 dtype="float32",
521 rank=1,
522 epsilon=1.0,
523 diff_mode="sparse",
524 ),
525 independent_merge=True,
526 ),
527 DimensionSpec(
528 name="structural",
529 description="Track layout, time signatures, tempo map",
530 schema=TreeSchema(
531 kind="tree",
532 node_type="track_node",
533 diff_algorithm="zhang_shasha",
534 ),
535 independent_merge=False,
536 ),
537 ],
538 merge_mode="three_way",
539 schema_version=1,
540 )
541
542 # ------------------------------------------------------------------
543 # 7. merge_ops — Phase 3 operation-level merge (StructuredMergePlugin)
544 # ------------------------------------------------------------------
545
546 def merge_ops(
547 self,
548 base: StateSnapshot,
549 ours_snap: StateSnapshot,
550 theirs_snap: StateSnapshot,
551 ours_ops: list[DomainOp],
552 theirs_ops: list[DomainOp],
553 *,
554 repo_root: pathlib.Path | None = None,
555 ) -> MergeResult:
556 """Operation-level three-way merge using the Phase 3 OT engine.
557
558 Extends the file-level ``merge()`` method with sub-file granularity: two
559 changes to non-overlapping notes in the same MIDI file no longer produce
560 a conflict.
561
562 Algorithm
563 ---------
564 1. Run :func:`~muse.core.op_transform.merge_op_lists` on the flat op
565 lists to classify each (ours, theirs) pair as commuting or
566 conflicting.
567 2. Build the merged manifest from *base* by applying all clean merged
568 ops. ``InsertOp`` and ``ReplaceOp`` entries supply a ``content_id``
569 / ``new_content_id`` directly. For ``PatchOp`` entries (sub-file
570 note changes), the final file hash is looked up from *ours_snap* or
571 *theirs_snap*. When both sides produced a ``PatchOp`` for the same
572 MIDI file and the note-level ops commute, an attempt is made to
573 reconstruct the merged MIDI bytes; on failure the file falls back to
574 a conflict.
575 3. For conflicting pairs, consult ``.museattributes``. Strategies
576 ``"ours"`` and ``"theirs"`` are applied automatically; everything
577 else enters ``MergeResult.conflicts``.
578
579 Args:
580 base: Common ancestor snapshot.
581 ours_snap: Final snapshot of our branch.
582 theirs_snap: Final snapshot of their branch.
583 ours_ops: Operations from our branch delta (base → ours).
584 theirs_ops: Operations from their branch delta (base → theirs).
585 repo_root: Repository root for object store and attributes.
586
587 Returns:
588 A :class:`~muse.domain.MergeResult` with the reconciled snapshot
589 and any remaining unresolvable conflicts.
590 """
591 from muse.core.attributes import load_attributes, resolve_strategy
592 from muse.core.op_transform import merge_op_lists
593
594 attrs = load_attributes(repo_root) if repo_root is not None else []
595
596 # OT classification: find commuting and conflicting op pairs.
597 ot_result = merge_op_lists([], ours_ops, theirs_ops)
598
599 # Build the merged manifest starting from base.
600 merged_files: dict[str, str] = dict(base["files"])
601 applied_strategies: dict[str, str] = {}
602 final_conflicts: list[str] = []
603 op_log: list[DomainOp] = list(ot_result.merged_ops)
604
605 # Group PatchOps by address so we can detect same-file note merges.
606 ours_patches: dict[str, PatchOp] = {}
607 theirs_patches: dict[str, PatchOp] = {}
608 for op in ours_ops:
609 if op["op"] == "patch":
610 ours_patches[op["address"]] = op
611 for op in theirs_ops:
612 if op["op"] == "patch":
613 theirs_patches[op["address"]] = op
614
615 # Track which addresses are involved in a conflict.
616 conflicting_addresses: set[str] = {
617 our_op["address"] for our_op, _ in ot_result.conflict_ops
618 }
619
620 # --- Apply clean merged ops ---
621 for op in ot_result.merged_ops:
622 addr = op["address"]
623 if addr in conflicting_addresses:
624 continue # handled in conflict resolution below
625
626 if op["op"] == "insert":
627 merged_files[addr] = op["content_id"]
628
629 elif op["op"] == "delete":
630 merged_files.pop(addr, None)
631
632 elif op["op"] == "replace":
633 merged_files[addr] = op["new_content_id"]
634
635 elif op["op"] == "patch":
636 # PatchOp: determine which side(s) patched this file.
637 has_ours = addr in ours_patches
638 has_theirs = addr in theirs_patches
639
640 if has_ours and not has_theirs:
641 # Only our side changed this file — take our version.
642 if addr in ours_snap["files"]:
643 merged_files[addr] = ours_snap["files"][addr]
644 else:
645 merged_files.pop(addr, None)
646
647 elif has_theirs and not has_ours:
648 # Only their side changed this file — take their version.
649 if addr in theirs_snap["files"]:
650 merged_files[addr] = theirs_snap["files"][addr]
651 else:
652 merged_files.pop(addr, None)
653
654 else:
655 # Both sides patched the same file with commuting note ops.
656 # Attempt note-level MIDI reconstruction.
657 merged_content_id = _merge_patch_ops(
658 addr=addr,
659 ours_patch=ours_patches[addr],
660 theirs_patch=theirs_patches[addr],
661 base_files=dict(base["files"]),
662 ours_snap_files=dict(ours_snap["files"]),
663 theirs_snap_files=dict(theirs_snap["files"]),
664 repo_root=repo_root,
665 )
666 if merged_content_id is not None:
667 merged_files[addr] = merged_content_id
668 else:
669 # Reconstruction failed — treat as manual conflict.
670 final_conflicts.append(addr)
671
672 # --- Resolve conflicts ---
673 for our_op, their_op in ot_result.conflict_ops:
674 addr = our_op["address"]
675 strategy = resolve_strategy(attrs, addr, "*")
676
677 if strategy == "ours":
678 if addr in ours_snap["files"]:
679 merged_files[addr] = ours_snap["files"][addr]
680 else:
681 merged_files.pop(addr, None)
682 applied_strategies[addr] = "ours"
683
684 elif strategy == "theirs":
685 if addr in theirs_snap["files"]:
686 merged_files[addr] = theirs_snap["files"][addr]
687 else:
688 merged_files.pop(addr, None)
689 applied_strategies[addr] = "theirs"
690
691 else:
692 # Strategy "manual" or "auto" without a clear resolution.
693 final_conflicts.append(addr)
694
695 return MergeResult(
696 merged=SnapshotManifest(files=merged_files, domain=_DOMAIN_TAG),
697 conflicts=sorted(set(final_conflicts)),
698 applied_strategies=applied_strategies,
699 op_log=op_log,
700 )
701
702
703 # ---------------------------------------------------------------------------
704 # Module-level helpers
705 # ---------------------------------------------------------------------------
706
707
708 def _merge_patch_ops(
709 *,
710 addr: str,
711 ours_patch: PatchOp,
712 theirs_patch: PatchOp,
713 base_files: dict[str, str],
714 ours_snap_files: dict[str, str],
715 theirs_snap_files: dict[str, str],
716 repo_root: pathlib.Path | None,
717 ) -> str | None:
718 """Attempt note-level MIDI merge for two ``PatchOp``\\s on the same file.
719
720 Runs OT on the child_ops of each PatchOp. If the note-level ops all
721 commute, reconstructs the merged MIDI by:
722
723 1. Loading base, ours, and theirs MIDI bytes from the object store.
724 2. Extracting note sequences from all three versions.
725 3. Building ``content_id → NoteKey`` look-ups for the ours and theirs
726 sequences (so that InsertOp content IDs can be resolved to real notes).
727 4. Applying the merged note ops (deletions then insertions) to the base
728 note sequence.
729 5. Calling :func:`~muse.plugins.music.midi_diff.reconstruct_midi` and
730 storing the resulting bytes.
731
732 Returns the SHA-256 hash of the reconstructed MIDI (ready to store in the
733 object store) on success, or ``None`` when:
734
735 - *repo_root* is ``None`` (cannot access object store).
736 - Base or branch bytes are not in the local object store.
737 - Note-level OT found conflicts.
738 - MIDI reconstruction raised any exception.
739
740 Args:
741 addr: Workspace-relative MIDI file path.
742 ours_patch: Our PatchOp for this file.
743 theirs_patch: Their PatchOp for this file.
744 base_files: Content-ID map for the common ancestor snapshot.
745 ours_snap_files: Content-ID map for our branch's final snapshot.
746 theirs_snap_files: Content-ID map for their branch's final snapshot.
747 repo_root: Repository root for object store access.
748
749 Returns:
750 Content-ID (SHA-256 hex) of the merged MIDI, or ``None`` on failure.
751 """
752 if repo_root is None or addr not in base_files:
753 return None
754
755 from muse.core.object_store import read_object, write_object
756 from muse.core.op_transform import merge_op_lists
757 from muse.plugins.music.midi_diff import NoteKey, extract_notes, reconstruct_midi
758
759 # Run OT on note-level ops to classify conflicts.
760 note_result = merge_op_lists([], ours_patch["child_ops"], theirs_patch["child_ops"])
761 if not note_result.is_clean:
762 logger.debug(
763 "⚠️ Note-level conflict in %r: %d pair(s) — falling back to file conflict",
764 addr,
765 len(note_result.conflict_ops),
766 )
767 return None
768
769 try:
770 base_bytes = read_object(repo_root, base_files[addr])
771 if base_bytes is None:
772 return None
773
774 ours_hash = ours_snap_files.get(addr)
775 theirs_hash = theirs_snap_files.get(addr)
776 ours_bytes = read_object(repo_root, ours_hash) if ours_hash else None
777 theirs_bytes = read_object(repo_root, theirs_hash) if theirs_hash else None
778
779 base_notes, ticks_per_beat = extract_notes(base_bytes)
780
781 # Build content_id → NoteKey lookups from ours and theirs versions.
782 ours_by_id: dict[str, NoteKey] = {}
783 if ours_bytes is not None:
784 ours_notes, _ = extract_notes(ours_bytes)
785 ours_by_id = {_note_content_id(n): n for n in ours_notes}
786
787 theirs_by_id: dict[str, NoteKey] = {}
788 if theirs_bytes is not None:
789 theirs_notes, _ = extract_notes(theirs_bytes)
790 theirs_by_id = {_note_content_id(n): n for n in theirs_notes}
791
792 # Collect content IDs to delete.
793 delete_ids: set[str] = {
794 op["content_id"] for op in note_result.merged_ops if op["op"] == "delete"
795 }
796
797 # Apply deletions to base note list.
798 base_note_by_id = {_note_content_id(n): n for n in base_notes}
799 surviving: list[NoteKey] = [
800 n for n in base_notes if _note_content_id(n) not in delete_ids
801 ]
802
803 # Collect insertions: resolve content_id → NoteKey via ours then theirs.
804 inserted: list[NoteKey] = []
805 for op in note_result.merged_ops:
806 if op["op"] == "insert":
807 cid = op["content_id"]
808 note = ours_by_id.get(cid) or theirs_by_id.get(cid)
809 if note is None:
810 # Fallback: base itself shouldn't have it, but check anyway.
811 note = base_note_by_id.get(cid)
812 if note is None:
813 logger.debug(
814 "⚠️ Cannot resolve note content_id %s for %r — skipping",
815 cid[:12],
816 addr,
817 )
818 continue
819 inserted.append(note)
820
821 merged_notes = surviving + inserted
822 merged_bytes = reconstruct_midi(merged_notes, ticks_per_beat=ticks_per_beat)
823
824 merged_hash = hashlib.sha256(merged_bytes).hexdigest()
825 write_object(repo_root, merged_hash, merged_bytes)
826
827 logger.info(
828 "✅ Note-level MIDI merge for %r: %d ops clean, %d notes in result",
829 addr,
830 len(note_result.merged_ops),
831 len(merged_notes),
832 )
833 return merged_hash
834
835 except Exception as exc: # noqa: BLE001 intentional broad catch
836 logger.debug("⚠️ MIDI note-level reconstruction failed for %r: %s", addr, exc)
837 return None
838
839
840 def _note_content_id(note: NoteKey) -> str:
841 """Return the SHA-256 content ID for a :class:`~muse.plugins.music.midi_diff.NoteKey`.
842
843 Delegates to the same algorithm used in :mod:`muse.plugins.music.midi_diff`
844 so that content IDs computed here are identical to those stored in
845 ``InsertOp`` / ``DeleteOp`` entries.
846 """
847 payload = (
848 f"{note['pitch']}:{note['velocity']}:"
849 f"{note['start_tick']}:{note['duration_ticks']}:{note['channel']}"
850 )
851 return hashlib.sha256(payload.encode()).hexdigest()
852
853
854 def _diff_modified_file(
855 *,
856 path: str,
857 old_hash: str,
858 new_hash: str,
859 repo_root: pathlib.Path | None,
860 ) -> DomainOp:
861 """Produce the best available op for a modified file.
862
863 Tries deep MIDI diff when possible; falls back to ``ReplaceOp``.
864 """
865 if path.lower().endswith(".mid") and repo_root is not None:
866 from muse.core.object_store import read_object
867 from muse.plugins.music.midi_diff import diff_midi_notes
868
869 base_bytes = read_object(repo_root, old_hash)
870 target_bytes = read_object(repo_root, new_hash)
871
872 if base_bytes is not None and target_bytes is not None:
873 try:
874 child_delta = diff_midi_notes(
875 base_bytes, target_bytes, file_path=path
876 )
877 return PatchOp(
878 op="patch",
879 address=path,
880 child_ops=child_delta["ops"],
881 child_domain=child_delta["domain"],
882 child_summary=child_delta["summary"],
883 )
884 except (ValueError, Exception) as exc:
885 logger.debug("⚠️ MIDI deep diff failed for %r: %s", path, exc)
886
887 return ReplaceOp(
888 op="replace",
889 address=path,
890 position=None,
891 old_content_id=old_hash,
892 new_content_id=new_hash,
893 old_summary=f"{path} (previous)",
894 new_summary=f"{path} (updated)",
895 )
896
897
898 def _summarise_ops(ops: list[DomainOp]) -> str:
899 """Build a human-readable summary string from a list of domain ops."""
900 inserts = 0
901 deletes = 0
902 replaces = 0
903 patches = 0
904
905 for op in ops:
906 kind = op["op"]
907 if kind == "insert":
908 inserts += 1
909 elif kind == "delete":
910 deletes += 1
911 elif kind == "replace":
912 replaces += 1
913 elif kind == "patch":
914 patches += 1
915
916 parts: list[str] = []
917 if inserts:
918 parts.append(f"{inserts} file{'s' if inserts != 1 else ''} added")
919 if deletes:
920 parts.append(f"{deletes} file{'s' if deletes != 1 else ''} removed")
921 if replaces:
922 parts.append(f"{replaces} file{'s' if replaces != 1 else ''} modified")
923 if patches:
924 parts.append(f"{patches} file{'s' if patches != 1 else ''} patched")
925
926 return ", ".join(parts) if parts else "no changes"
927
928
929 def _hash_file(path: pathlib.Path) -> str:
930 """Return the SHA-256 hex digest of a file's raw bytes."""
931 h = hashlib.sha256()
932 with path.open("rb") as fh:
933 for chunk in iter(lambda: fh.read(65536), b""):
934 h.update(chunk)
935 return h.hexdigest()
936
937
938 def _changed_paths(
939 base: dict[str, str], other: dict[str, str]
940 ) -> set[str]:
941 """Return paths that differ between *base* and *other*."""
942 base_p = set(base)
943 other_p = set(other)
944 added = other_p - base_p
945 deleted = base_p - other_p
946 common = base_p & other_p
947 modified = {p for p in common if base[p] != other[p]}
948 return added | deleted | modified
949
950
951 def content_hash(snapshot: StateSnapshot) -> str:
952 """Return a stable SHA-256 digest of a snapshot for content-addressing."""
953 canonical = json.dumps(snapshot, sort_keys=True, separators=(",", ":"))
954 return hashlib.sha256(canonical.encode()).hexdigest()
955
956
957 #: Module-level singleton — import and use directly.
958 plugin = MusicPlugin()
959
960 assert isinstance(plugin, MuseDomainPlugin), (
961 "MusicPlugin does not satisfy the MuseDomainPlugin protocol"
962 )
963 assert isinstance(plugin, StructuredMergePlugin), (
964 "MusicPlugin does not satisfy the StructuredMergePlugin protocol"
965 )