gabriel / muse public
plugin.py python
1013 lines 39.2 KB
9e852bc1 fix: add rhythmic as 5th music domain dimension everywhere Gabriel Cardona <gabriel@tellurstori.com> 6d ago
1 """Music domain plugin — reference implementation of :class:`MuseDomainPlugin`.
2
3 This plugin implements the six Muse domain interfaces for MIDI state:
4 notes, velocities, controller events (CC), pitch bends, and aftertouch.
5
6 It is the domain that proved the abstraction. Every other domain — scientific
7 simulation, genomics, 3D spatial design — is a new plugin that implements
8 the same six interfaces.
9
10 Live State
11 ----------
12 For the music domain, ``LiveState`` is either:
13
14 1. A ``muse-work/`` directory path (``pathlib.Path``) — the CLI path where
15 MIDI files live on disk and are managed by ``muse commit / checkout``.
16 2. A dict snapshot previously captured by :meth:`snapshot` — used when
17 constructing merges and diffs in memory.
18
19 Both forms are supported. The plugin detects which form it received by
20 checking for ``pathlib.Path`` vs ``dict``.
21
22 Snapshot Format
23 ---------------
24 A music snapshot is a JSON-serialisable dict:
25
26 .. code-block:: json
27
28 {
29 "files": {
30 "tracks/drums.mid": "<sha256>",
31 "tracks/bass.mid": "<sha256>"
32 },
33 "domain": "music"
34 }
35
36 The ``files`` key maps POSIX paths (relative to ``muse-work/``) to their
37 SHA-256 content digests.
38
39 Delta Format
40 ----------------------
41 ``diff()`` returns a ``StructuredDelta`` with typed ``DomainOp`` entries:
42
43 - ``InsertOp`` — a file was added (``content_id`` = its SHA-256 hash).
44 - ``DeleteOp`` — a file was removed.
45 - ``ReplaceOp`` — a non-MIDI file's content changed.
46 - ``PatchOp`` — a ``.mid`` file changed; ``child_ops`` contains note-level
47 ``InsertOp`` / ``DeleteOp`` entries from the Myers LCS diff.
48
49 When ``repo_root`` is available, MIDI files are loaded from the object store
50 and diffed at note level. Without it, modified ``.mid`` files fall back to
51 ``ReplaceOp``.
52 """
53 from __future__ import annotations
54
55 import hashlib
56 import json
57 import logging
58 import pathlib
59
60 from muse.core.schema import (
61 DimensionSpec,
62 DomainSchema,
63 SequenceSchema,
64 SetSchema,
65 TensorSchema,
66 TreeSchema,
67 )
68 from muse.domain import (
69 DeleteOp,
70 DomainOp,
71 DriftReport,
72 InsertOp,
73 LiveState,
74 MergeResult,
75 MuseDomainPlugin,
76 PatchOp,
77 ReplaceOp,
78 SnapshotManifest,
79 StateDelta,
80 StateSnapshot,
81 StructuredDelta,
82 StructuredMergePlugin,
83 )
84 from muse.plugins.music.midi_diff import NoteKey
85
86 logger = logging.getLogger(__name__)
87
88 _DOMAIN_TAG = "music"
89
90
91 class MusicPlugin:
92 """Music domain plugin for the Muse VCS.
93
94 Implements :class:`~muse.domain.MuseDomainPlugin` (six core interfaces)
95 and :class:`~muse.domain.StructuredMergePlugin` (operation-level
96 merge) for MIDI state stored as files in ``muse-work/``.
97
98 This is the reference implementation. Every other domain plugin implements
99 the same six core interfaces; the :class:`~muse.domain.StructuredMergePlugin`
100 extension is optional but strongly recommended for domains that produce
101 note-level (sub-file) diffs.
102 """
103
104 # ------------------------------------------------------------------
105 # 1. snapshot — capture live state as a content-addressed dict
106 # ------------------------------------------------------------------
107
108 def snapshot(self, live_state: LiveState) -> StateSnapshot:
109 """Capture the current ``muse-work/`` directory as a snapshot dict.
110
111 Args:
112 live_state: Either a ``pathlib.Path`` pointing to ``muse-work/``
113 or an existing snapshot dict (returned as-is).
114
115 Returns:
116 A JSON-serialisable ``{"files": {path: sha256}, "domain": "music"}``
117 dict. The ``files`` mapping is the canonical snapshot manifest used
118 by the core VCS engine for commit / checkout / diff.
119
120 Ignore rules
121 ------------
122 When *live_state* is a ``pathlib.Path``, the plugin reads
123 ``.museignore`` from the repository root (the parent of ``muse-work/``)
124 and excludes any matching paths from the snapshot. Dotfiles are always
125 excluded regardless of ``.museignore``.
126 """
127 if isinstance(live_state, pathlib.Path):
128 from muse.core.ignore import is_ignored, load_patterns
129 workdir = live_state
130 repo_root = workdir.parent
131 patterns = load_patterns(repo_root)
132 files: dict[str, str] = {}
133 for file_path in sorted(workdir.rglob("*")):
134 if not file_path.is_file():
135 continue
136 if file_path.name.startswith("."):
137 continue
138 rel = file_path.relative_to(workdir).as_posix()
139 if is_ignored(rel, patterns):
140 continue
141 files[rel] = _hash_file(file_path)
142 return SnapshotManifest(files=files, domain=_DOMAIN_TAG)
143
144 return live_state
145
146 # ------------------------------------------------------------------
147 # 2. diff — compute the structured delta between two snapshots
148 # ------------------------------------------------------------------
149
150 def diff(
151 self,
152 base: StateSnapshot,
153 target: StateSnapshot,
154 *,
155 repo_root: pathlib.Path | None = None,
156 ) -> StateDelta:
157 """Compute a ``StructuredDelta`` between two music snapshots.
158
159 File additions and removals produce ``InsertOp`` and ``DeleteOp``
160 entries respectively. For modified files:
161
162 - ``.mid`` files: when ``repo_root`` is provided, load the MIDI bytes
163 from the object store and produce a ``PatchOp`` with note-level
164 ``child_ops`` from the Myers LCS diff. Falls back to ``ReplaceOp``
165 when the object store is unavailable or parsing fails.
166 - All other files: ``ReplaceOp`` with file-level content IDs.
167
168 Args:
169 base: The ancestor snapshot.
170 target: The later snapshot.
171 repo_root: Repository root directory. When provided, MIDI files are
172 loaded from ``.muse/objects/`` for note-level diffing.
173
174 Returns:
175 A ``StructuredDelta`` whose ``ops`` list transforms *base* into
176 *target* and whose ``summary`` is human-readable.
177 """
178 base_files = base["files"]
179 target_files = target["files"]
180
181 base_paths = set(base_files)
182 target_paths = set(target_files)
183
184 ops: list[DomainOp] = []
185
186 # Added files → InsertOp
187 for path in sorted(target_paths - base_paths):
188 ops.append(
189 InsertOp(
190 op="insert",
191 address=path,
192 position=None,
193 content_id=target_files[path],
194 content_summary=f"new file: {path}",
195 )
196 )
197
198 # Removed files → DeleteOp
199 for path in sorted(base_paths - target_paths):
200 ops.append(
201 DeleteOp(
202 op="delete",
203 address=path,
204 position=None,
205 content_id=base_files[path],
206 content_summary=f"deleted: {path}",
207 )
208 )
209
210 # Modified files
211 for path in sorted(
212 p for p in base_paths & target_paths if base_files[p] != target_files[p]
213 ):
214 op = _diff_modified_file(
215 path=path,
216 old_hash=base_files[path],
217 new_hash=target_files[path],
218 repo_root=repo_root,
219 )
220 ops.append(op)
221
222 summary = _summarise_ops(ops)
223 return StructuredDelta(domain=_DOMAIN_TAG, ops=ops, summary=summary)
224
225 # ------------------------------------------------------------------
226 # 3. merge — three-way reconciliation
227 # ------------------------------------------------------------------
228
229 def merge(
230 self,
231 base: StateSnapshot,
232 left: StateSnapshot,
233 right: StateSnapshot,
234 *,
235 repo_root: pathlib.Path | None = None,
236 ) -> MergeResult:
237 """Three-way merge two divergent music state lines against a common base.
238
239 A file is auto-merged when only one side changed it. When both sides
240 changed the same file, the merge proceeds in two stages:
241
242 1. **File-level strategy** — if ``.museattributes`` contains an
243 ``ours`` or ``theirs`` rule matching the path (dimension ``"*"``),
244 the rule is applied and the file is removed from the conflict list.
245
246 2. **Dimension-level merge** — for ``.mid`` files that survive the
247 file-level check, the MIDI event stream is split into orthogonal
248 dimension slices (notes/melodic/rhythmic, harmonic, dynamic, structural).
249 Each dimension is merged independently. Dimension-specific
250 ``ours``/``theirs`` rules in ``.museattributes`` are honoured.
251 Only dimensions where *both* sides changed AND no resolvable rule
252 exists cause a true file-level conflict.
253
254 3. **Manual override** — ``manual`` strategy in ``.museattributes``
255 forces a path into the conflict list even when the engine would
256 normally auto-resolve it.
257
258 Args:
259 base: Snapshot at the common ancestor commit.
260 left: Snapshot for the *ours* (current) branch. The distinction
261 between ``left`` and ``right`` only affects the ``applied_strategies``
262 key in the result; the merge is symmetric for clean paths.
263 right: Snapshot for the *theirs* (incoming) branch.
264 repo_root: Path to the repository root so ``.museattributes`` and the
265 object store can be located. ``None`` disables attribute
266 loading and MIDI reconstruction (all conflicts become hard).
267
268 Returns:
269 A :class:`~muse.domain.MergeResult` whose ``snapshot`` holds the
270 merged manifest (conflict paths absent), ``conflicts`` lists the
271 unresolvable paths, and ``applied_strategies`` records which
272 ``.museattributes`` rules were used.
273 """
274 import hashlib as _hashlib
275
276 from muse.core.attributes import load_attributes, resolve_strategy
277 from muse.core.object_store import read_object, write_object
278 from muse.plugins.music.midi_merge import merge_midi_dimensions
279
280 base_files = base["files"]
281 left_files = left["files"]
282 right_files = right["files"]
283
284 attrs = load_attributes(repo_root, domain=_DOMAIN_TAG) if repo_root is not None else []
285
286 left_changed: set[str] = _changed_paths(base_files, left_files)
287 right_changed: set[str] = _changed_paths(base_files, right_files)
288 all_conflict_paths: set[str] = left_changed & right_changed
289
290 merged: dict[str, str] = dict(base_files)
291
292 # Apply clean single-side changes first.
293 for path in left_changed - all_conflict_paths:
294 if path in left_files:
295 merged[path] = left_files[path]
296 else:
297 merged.pop(path, None)
298
299 for path in right_changed - all_conflict_paths:
300 if path in right_files:
301 merged[path] = right_files[path]
302 else:
303 merged.pop(path, None)
304
305 # Consensus deletions (both sides removed the same file) — not a conflict.
306 consensus_deleted = {
307 p for p in all_conflict_paths
308 if p not in left_files and p not in right_files
309 }
310 for path in consensus_deleted:
311 merged.pop(path, None)
312
313 real_conflicts: set[str] = all_conflict_paths - consensus_deleted
314
315 applied_strategies: dict[str, str] = {}
316 dimension_reports: dict[str, dict[str, str]] = {}
317 final_conflicts: list[str] = []
318
319 for path in sorted(real_conflicts):
320 file_strategy = resolve_strategy(attrs, path, "*")
321
322 if file_strategy == "ours":
323 if path in left_files:
324 merged[path] = left_files[path]
325 else:
326 merged.pop(path, None)
327 applied_strategies[path] = "ours"
328 continue
329
330 if file_strategy == "theirs":
331 if path in right_files:
332 merged[path] = right_files[path]
333 else:
334 merged.pop(path, None)
335 applied_strategies[path] = "theirs"
336 continue
337
338 if (
339 repo_root is not None
340 and path.lower().endswith(".mid")
341 and path in left_files
342 and path in right_files
343 and path in base_files
344 ):
345 base_obj = read_object(repo_root, base_files[path])
346 left_obj = read_object(repo_root, left_files[path])
347 right_obj = read_object(repo_root, right_files[path])
348
349 if base_obj is not None and left_obj is not None and right_obj is not None:
350 try:
351 dim_result = merge_midi_dimensions(
352 base_obj, left_obj, right_obj,
353 attrs,
354 path,
355 )
356 except ValueError:
357 dim_result = None
358
359 if dim_result is not None:
360 merged_bytes, dim_report = dim_result
361 new_hash = _hashlib.sha256(merged_bytes).hexdigest()
362 write_object(repo_root, new_hash, merged_bytes)
363 merged[path] = new_hash
364 applied_strategies[path] = "dimension-merge"
365 dimension_reports[path] = dim_report
366 continue
367
368 final_conflicts.append(path)
369
370 for path in sorted((left_changed | right_changed) - real_conflicts):
371 if path in consensus_deleted:
372 continue
373 if resolve_strategy(attrs, path, "*") == "manual":
374 final_conflicts.append(path)
375 applied_strategies[path] = "manual"
376 if path in base_files:
377 merged[path] = base_files[path]
378 else:
379 merged.pop(path, None)
380
381 return MergeResult(
382 merged=SnapshotManifest(files=merged, domain=_DOMAIN_TAG),
383 conflicts=sorted(final_conflicts),
384 applied_strategies=applied_strategies,
385 dimension_reports=dimension_reports,
386 )
387
388 # ------------------------------------------------------------------
389 # 4. drift — compare committed state vs live state
390 # ------------------------------------------------------------------
391
392 def drift(
393 self,
394 committed: StateSnapshot,
395 live: LiveState,
396 ) -> DriftReport:
397 """Detect uncommitted changes in ``muse-work/`` relative to *committed*.
398
399 Args:
400 committed: The last committed snapshot.
401 live: Either a ``pathlib.Path`` (``muse-work/``) or a snapshot
402 dict representing current live state.
403
404 Returns:
405 A :class:`~muse.domain.DriftReport` describing whether and how the
406 live state differs from the committed snapshot.
407 """
408 live_snapshot = self.snapshot(live)
409 delta = self.diff(committed, live_snapshot)
410
411 inserts = sum(1 for op in delta["ops"] if op["op"] == "insert")
412 deletes = sum(1 for op in delta["ops"] if op["op"] == "delete")
413 modified = sum(1 for op in delta["ops"] if op["op"] in ("replace", "patch"))
414 has_drift = bool(inserts or deletes or modified)
415
416 parts: list[str] = []
417 if inserts:
418 parts.append(f"{inserts} added")
419 if deletes:
420 parts.append(f"{deletes} removed")
421 if modified:
422 parts.append(f"{modified} modified")
423
424 summary = ", ".join(parts) if parts else "working tree clean"
425 return DriftReport(has_drift=has_drift, summary=summary, delta=delta)
426
427 # ------------------------------------------------------------------
428 # 5. apply — execute a delta against live state (checkout)
429 # ------------------------------------------------------------------
430
431 def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState:
432 """Apply a structured delta to produce a new live state.
433
434 When ``live_state`` is a ``pathlib.Path`` the physical files have
435 already been updated by the caller (``muse checkout`` restores objects
436 from the store before calling this). Rescanning the directory is the
437 cheapest correct way to reflect the new state.
438
439 When ``live_state`` is a snapshot dict, only ``DeleteOp`` and
440 ``ReplaceOp`` at the file level can be applied in-memory. ``InsertOp``
441 at the file level requires the new content to be on disk; callers that
442 need those should pass the workdir ``pathlib.Path`` instead.
443 ``PatchOp`` entries are skipped in-memory since reconstructing patched
444 file content requires both the original bytes and the object store.
445
446 Args:
447 delta: A ``StructuredDelta`` produced by :meth:`diff`.
448 live_state: The workdir path (preferred) or a snapshot dict.
449
450 Returns:
451 The updated live state as a ``SnapshotManifest``.
452 """
453 if isinstance(live_state, pathlib.Path):
454 return self.snapshot(live_state)
455
456 current_files = dict(live_state["files"])
457
458 for op in delta["ops"]:
459 if op["op"] == "delete":
460 current_files.pop(op["address"], None)
461 elif op["op"] == "replace":
462 current_files[op["address"]] = op["new_content_id"]
463 elif op["op"] == "insert":
464 current_files[op["address"]] = op["content_id"]
465 # PatchOp and MoveOp: skip in-memory — caller must use workdir path.
466
467 return SnapshotManifest(files=current_files, domain=_DOMAIN_TAG)
468
469 # ------------------------------------------------------------------
470 # 6. schema — declare structural schema for the algorithm library
471 # ------------------------------------------------------------------
472
473 def schema(self) -> DomainSchema:
474 """Return the full structural schema for the music domain.
475
476 Declares five semantic dimensions — melodic, rhythmic, harmonic,
477 dynamic, and structural — that the core diff algorithm library and OT
478 merge engine use to drive per-dimension operations.
479
480 Top level is a ``SetSchema``: the music workspace is an unordered
481 collection of audio/MIDI files, each identified by its SHA-256 content
482 hash.
483
484 Dimensions:
485
486 - **melodic** — the sequence of note events over time. LCS-diffed so
487 that insertions and deletions of individual notes are surfaced.
488 - **rhythmic** — timing, groove, and quantisation. Shares the internal
489 ``notes`` bucket with melodic (MIDI interleaves pitch and timing in
490 the same event stream), so melodic and rhythmic changes are resolved
491 together during merge.
492 - **harmonic** — the sequence of chord events and key-signature changes.
493 LCS-diffed independently of the melodic dimension.
494 - **dynamic** — velocity and expression curves as a 1-D float tensor.
495 Epsilon of 1.0 ignores sub-1-velocity noise; sparse mode emits one
496 ``ReplaceOp`` per changed event.
497 - **structural** — track layout, time signatures, and tempo map as a
498 labeled ordered tree. Structural changes are non-independent: they
499 block merging all other dimensions until resolved, because a tempo
500 change shifts the meaning of every subsequent note position.
501 """
502 return DomainSchema(
503 domain=_DOMAIN_TAG,
504 description="MIDI and audio file versioning with note-level diff",
505 top_level=SetSchema(
506 kind="set",
507 element_type="audio_file",
508 identity="by_content",
509 ),
510 dimensions=[
511 DimensionSpec(
512 name="melodic",
513 description="Note pitches and durations over time",
514 schema=SequenceSchema(
515 kind="sequence",
516 element_type="note_event",
517 identity="by_position",
518 diff_algorithm="lcs",
519 alphabet=None,
520 ),
521 independent_merge=True,
522 ),
523 DimensionSpec(
524 name="rhythmic",
525 description="Timing, groove, and quantisation (shares notes bucket with melodic)",
526 schema=SequenceSchema(
527 kind="sequence",
528 element_type="note_event",
529 identity="by_position",
530 diff_algorithm="lcs",
531 alphabet=None,
532 ),
533 independent_merge=True,
534 ),
535 DimensionSpec(
536 name="harmonic",
537 description="Chord progressions and key signatures",
538 schema=SequenceSchema(
539 kind="sequence",
540 element_type="chord_event",
541 identity="by_position",
542 diff_algorithm="lcs",
543 alphabet=None,
544 ),
545 independent_merge=True,
546 ),
547 DimensionSpec(
548 name="dynamic",
549 description="Velocity and expression curves",
550 schema=TensorSchema(
551 kind="tensor",
552 dtype="float32",
553 rank=1,
554 epsilon=1.0,
555 diff_mode="sparse",
556 ),
557 independent_merge=True,
558 ),
559 DimensionSpec(
560 name="structural",
561 description="Track layout, time signatures, tempo map",
562 schema=TreeSchema(
563 kind="tree",
564 node_type="track_node",
565 diff_algorithm="zhang_shasha",
566 ),
567 independent_merge=False,
568 ),
569 ],
570 merge_mode="three_way",
571 schema_version=1,
572 )
573
574 # ------------------------------------------------------------------
575 # 7. merge_ops — operation-level OT merge (StructuredMergePlugin)
576 # ------------------------------------------------------------------
577
578 def merge_ops(
579 self,
580 base: StateSnapshot,
581 ours_snap: StateSnapshot,
582 theirs_snap: StateSnapshot,
583 ours_ops: list[DomainOp],
584 theirs_ops: list[DomainOp],
585 *,
586 repo_root: pathlib.Path | None = None,
587 ) -> MergeResult:
588 """Operation-level three-way merge using the OT engine.
589
590 Extends the file-level ``merge()`` method with sub-file granularity: two
591 changes to non-overlapping notes in the same MIDI file no longer produce
592 a conflict.
593
594 Algorithm
595 ---------
596 1. Run :func:`~muse.core.op_transform.merge_op_lists` on the flat op
597 lists to classify each (ours, theirs) pair as commuting or
598 conflicting.
599 2. Build the merged manifest from *base* by applying all clean merged
600 ops. ``InsertOp`` and ``ReplaceOp`` entries supply a ``content_id``
601 / ``new_content_id`` directly. For ``PatchOp`` entries (sub-file
602 note changes), the final file hash is looked up from *ours_snap* or
603 *theirs_snap*. When both sides produced a ``PatchOp`` for the same
604 MIDI file and the note-level ops commute, an attempt is made to
605 reconstruct the merged MIDI bytes; on failure the file falls back to
606 a conflict.
607 3. For conflicting pairs, consult ``.museattributes``. Strategies
608 ``"ours"`` and ``"theirs"`` are applied automatically; everything
609 else enters ``MergeResult.conflicts``.
610
611 Args:
612 base: Common ancestor snapshot.
613 ours_snap: Final snapshot of our branch.
614 theirs_snap: Final snapshot of their branch.
615 ours_ops: Operations from our branch delta (base → ours).
616 theirs_ops: Operations from their branch delta (base → theirs).
617 repo_root: Repository root for object store and attributes.
618
619 Returns:
620 A :class:`~muse.domain.MergeResult` with the reconciled snapshot
621 and any remaining unresolvable conflicts.
622 """
623 from muse.core.attributes import load_attributes, resolve_strategy
624 from muse.core.op_transform import merge_op_lists
625
626 attrs = load_attributes(repo_root, domain=_DOMAIN_TAG) if repo_root is not None else []
627
628 # OT classification: find commuting and conflicting op pairs.
629 ot_result = merge_op_lists([], ours_ops, theirs_ops)
630
631 # Build the merged manifest starting from base.
632 merged_files: dict[str, str] = dict(base["files"])
633 applied_strategies: dict[str, str] = {}
634 final_conflicts: list[str] = []
635 op_log: list[DomainOp] = list(ot_result.merged_ops)
636
637 # Group PatchOps by address so we can detect same-file note merges.
638 ours_patches: dict[str, PatchOp] = {}
639 theirs_patches: dict[str, PatchOp] = {}
640 for op in ours_ops:
641 if op["op"] == "patch":
642 ours_patches[op["address"]] = op
643 for op in theirs_ops:
644 if op["op"] == "patch":
645 theirs_patches[op["address"]] = op
646
647 # Track which addresses are involved in a conflict.
648 conflicting_addresses: set[str] = {
649 our_op["address"] for our_op, _ in ot_result.conflict_ops
650 }
651
652 # --- Apply clean merged ops ---
653 for op in ot_result.merged_ops:
654 addr = op["address"]
655 if addr in conflicting_addresses:
656 continue # handled in conflict resolution below
657
658 if op["op"] == "insert":
659 merged_files[addr] = op["content_id"]
660
661 elif op["op"] == "delete":
662 merged_files.pop(addr, None)
663
664 elif op["op"] == "replace":
665 merged_files[addr] = op["new_content_id"]
666
667 elif op["op"] == "patch":
668 # PatchOp: determine which side(s) patched this file.
669 has_ours = addr in ours_patches
670 has_theirs = addr in theirs_patches
671
672 if has_ours and not has_theirs:
673 # Only our side changed this file — take our version.
674 if addr in ours_snap["files"]:
675 merged_files[addr] = ours_snap["files"][addr]
676 else:
677 merged_files.pop(addr, None)
678
679 elif has_theirs and not has_ours:
680 # Only their side changed this file — take their version.
681 if addr in theirs_snap["files"]:
682 merged_files[addr] = theirs_snap["files"][addr]
683 else:
684 merged_files.pop(addr, None)
685
686 else:
687 # Both sides patched the same file with commuting note ops.
688 # Attempt note-level MIDI reconstruction.
689 merged_content_id = _merge_patch_ops(
690 addr=addr,
691 ours_patch=ours_patches[addr],
692 theirs_patch=theirs_patches[addr],
693 base_files=dict(base["files"]),
694 ours_snap_files=dict(ours_snap["files"]),
695 theirs_snap_files=dict(theirs_snap["files"]),
696 repo_root=repo_root,
697 )
698 if merged_content_id is not None:
699 merged_files[addr] = merged_content_id
700 else:
701 # Reconstruction failed — treat as manual conflict.
702 final_conflicts.append(addr)
703
704 # --- Resolve conflicts ---
705 for our_op, their_op in ot_result.conflict_ops:
706 addr = our_op["address"]
707 strategy = resolve_strategy(attrs, addr, "*")
708
709 if strategy == "ours":
710 if addr in ours_snap["files"]:
711 merged_files[addr] = ours_snap["files"][addr]
712 else:
713 merged_files.pop(addr, None)
714 applied_strategies[addr] = "ours"
715
716 elif strategy == "theirs":
717 if addr in theirs_snap["files"]:
718 merged_files[addr] = theirs_snap["files"][addr]
719 else:
720 merged_files.pop(addr, None)
721 applied_strategies[addr] = "theirs"
722
723 else:
724 # Strategy "manual" or "auto" without a clear resolution.
725 final_conflicts.append(addr)
726
727 return MergeResult(
728 merged=SnapshotManifest(files=merged_files, domain=_DOMAIN_TAG),
729 conflicts=sorted(set(final_conflicts)),
730 applied_strategies=applied_strategies,
731 op_log=op_log,
732 )
733
734
735 # ---------------------------------------------------------------------------
736 # Module-level helpers
737 # ---------------------------------------------------------------------------
738
739
740 def _merge_patch_ops(
741 *,
742 addr: str,
743 ours_patch: PatchOp,
744 theirs_patch: PatchOp,
745 base_files: dict[str, str],
746 ours_snap_files: dict[str, str],
747 theirs_snap_files: dict[str, str],
748 repo_root: pathlib.Path | None,
749 ) -> str | None:
750 """Attempt note-level MIDI merge for two ``PatchOp``\\s on the same file.
751
752 Runs OT on the child_ops of each PatchOp. If the note-level ops all
753 commute, reconstructs the merged MIDI by:
754
755 1. Loading base, ours, and theirs MIDI bytes from the object store.
756 2. Extracting note sequences from all three versions.
757 3. Building ``content_id → NoteKey`` look-ups for the ours and theirs
758 sequences (so that InsertOp content IDs can be resolved to real notes).
759 4. Applying the merged note ops (deletions then insertions) to the base
760 note sequence.
761 5. Calling :func:`~muse.plugins.music.midi_diff.reconstruct_midi` and
762 storing the resulting bytes.
763
764 Returns the SHA-256 hash of the reconstructed MIDI (ready to store in the
765 object store) on success, or ``None`` when:
766
767 - *repo_root* is ``None`` (cannot access object store).
768 - Base or branch bytes are not in the local object store.
769 - Note-level OT found conflicts.
770 - MIDI reconstruction raised any exception.
771
772 Args:
773 addr: Workspace-relative MIDI file path.
774 ours_patch: Our PatchOp for this file.
775 theirs_patch: Their PatchOp for this file.
776 base_files: Content-ID map for the common ancestor snapshot.
777 ours_snap_files: Content-ID map for our branch's final snapshot.
778 theirs_snap_files: Content-ID map for their branch's final snapshot.
779 repo_root: Repository root for object store access.
780
781 Returns:
782 Content-ID (SHA-256 hex) of the merged MIDI, or ``None`` on failure.
783 """
784 if repo_root is None or addr not in base_files:
785 return None
786
787 from muse.core.object_store import read_object, write_object
788 from muse.core.op_transform import merge_op_lists
789 from muse.plugins.music.midi_diff import NoteKey, extract_notes, reconstruct_midi
790
791 # Run OT on note-level ops to classify conflicts.
792 note_result = merge_op_lists([], ours_patch["child_ops"], theirs_patch["child_ops"])
793 if not note_result.is_clean:
794 logger.debug(
795 "⚠️ Note-level conflict in %r: %d pair(s) — falling back to file conflict",
796 addr,
797 len(note_result.conflict_ops),
798 )
799 return None
800
801 try:
802 base_bytes = read_object(repo_root, base_files[addr])
803 if base_bytes is None:
804 return None
805
806 ours_hash = ours_snap_files.get(addr)
807 theirs_hash = theirs_snap_files.get(addr)
808 ours_bytes = read_object(repo_root, ours_hash) if ours_hash else None
809 theirs_bytes = read_object(repo_root, theirs_hash) if theirs_hash else None
810
811 base_notes, ticks_per_beat = extract_notes(base_bytes)
812
813 # Build content_id → NoteKey lookups from ours and theirs versions.
814 ours_by_id: dict[str, NoteKey] = {}
815 if ours_bytes is not None:
816 ours_notes, _ = extract_notes(ours_bytes)
817 ours_by_id = {_note_content_id(n): n for n in ours_notes}
818
819 theirs_by_id: dict[str, NoteKey] = {}
820 if theirs_bytes is not None:
821 theirs_notes, _ = extract_notes(theirs_bytes)
822 theirs_by_id = {_note_content_id(n): n for n in theirs_notes}
823
824 # Collect content IDs to delete.
825 delete_ids: set[str] = {
826 op["content_id"] for op in note_result.merged_ops if op["op"] == "delete"
827 }
828
829 # Apply deletions to base note list.
830 base_note_by_id = {_note_content_id(n): n for n in base_notes}
831 surviving: list[NoteKey] = [
832 n for n in base_notes if _note_content_id(n) not in delete_ids
833 ]
834
835 # Collect insertions: resolve content_id → NoteKey via ours then theirs.
836 inserted: list[NoteKey] = []
837 for op in note_result.merged_ops:
838 if op["op"] == "insert":
839 cid = op["content_id"]
840 note = ours_by_id.get(cid) or theirs_by_id.get(cid)
841 if note is None:
842 # Fallback: base itself shouldn't have it, but check anyway.
843 note = base_note_by_id.get(cid)
844 if note is None:
845 logger.debug(
846 "⚠️ Cannot resolve note content_id %s for %r — skipping",
847 cid[:12],
848 addr,
849 )
850 continue
851 inserted.append(note)
852
853 merged_notes = surviving + inserted
854 merged_bytes = reconstruct_midi(merged_notes, ticks_per_beat=ticks_per_beat)
855
856 merged_hash = hashlib.sha256(merged_bytes).hexdigest()
857 write_object(repo_root, merged_hash, merged_bytes)
858
859 logger.info(
860 "✅ Note-level MIDI merge for %r: %d ops clean, %d notes in result",
861 addr,
862 len(note_result.merged_ops),
863 len(merged_notes),
864 )
865 return merged_hash
866
867 except Exception as exc: # noqa: BLE001 intentional broad catch
868 logger.debug("⚠️ MIDI note-level reconstruction failed for %r: %s", addr, exc)
869 return None
870
871
872 def _note_content_id(note: NoteKey) -> str:
873 """Return the SHA-256 content ID for a :class:`~muse.plugins.music.midi_diff.NoteKey`.
874
875 Delegates to the same algorithm used in :mod:`muse.plugins.music.midi_diff`
876 so that content IDs computed here are identical to those stored in
877 ``InsertOp`` / ``DeleteOp`` entries.
878 """
879 payload = (
880 f"{note['pitch']}:{note['velocity']}:"
881 f"{note['start_tick']}:{note['duration_ticks']}:{note['channel']}"
882 )
883 return hashlib.sha256(payload.encode()).hexdigest()
884
885
886 def _diff_modified_file(
887 *,
888 path: str,
889 old_hash: str,
890 new_hash: str,
891 repo_root: pathlib.Path | None,
892 ) -> DomainOp:
893 """Produce the richest available operation for a modified file.
894
895 For ``.mid`` files where both content revisions are readable from the
896 object store, performs a full note-level MIDI diff and returns a
897 ``PatchOp`` carrying the individual ``InsertOp``/``DeleteOp`` child
898 operations. Falls back to a ``ReplaceOp`` (opaque before/after hash
899 pair) when the file is not a MIDI file, ``repo_root`` is ``None``, or
900 either content revision cannot be retrieved from the store.
901
902 Args:
903 path: Workspace-relative POSIX path of the modified file.
904 old_hash: SHA-256 of the base content in the object store.
905 new_hash: SHA-256 of the current content in the object store.
906 repo_root: Repository root for object store access. ``None`` forces
907 immediate fallback to ``ReplaceOp``.
908
909 Returns:
910 A ``PatchOp`` with note-level child ops when deep diff succeeds,
911 otherwise a ``ReplaceOp`` with the opaque before/after content hashes.
912 """
913 if path.lower().endswith(".mid") and repo_root is not None:
914 from muse.core.object_store import read_object
915 from muse.plugins.music.midi_diff import diff_midi_notes
916
917 base_bytes = read_object(repo_root, old_hash)
918 target_bytes = read_object(repo_root, new_hash)
919
920 if base_bytes is not None and target_bytes is not None:
921 try:
922 child_delta = diff_midi_notes(
923 base_bytes, target_bytes, file_path=path
924 )
925 return PatchOp(
926 op="patch",
927 address=path,
928 child_ops=child_delta["ops"],
929 child_domain=child_delta["domain"],
930 child_summary=child_delta["summary"],
931 )
932 except (ValueError, Exception) as exc:
933 logger.debug("⚠️ MIDI deep diff failed for %r: %s", path, exc)
934
935 return ReplaceOp(
936 op="replace",
937 address=path,
938 position=None,
939 old_content_id=old_hash,
940 new_content_id=new_hash,
941 old_summary=f"{path} (previous)",
942 new_summary=f"{path} (updated)",
943 )
944
945
946 def _summarise_ops(ops: list[DomainOp]) -> str:
947 """Build a human-readable summary string from a list of domain ops."""
948 inserts = 0
949 deletes = 0
950 replaces = 0
951 patches = 0
952
953 for op in ops:
954 kind = op["op"]
955 if kind == "insert":
956 inserts += 1
957 elif kind == "delete":
958 deletes += 1
959 elif kind == "replace":
960 replaces += 1
961 elif kind == "patch":
962 patches += 1
963
964 parts: list[str] = []
965 if inserts:
966 parts.append(f"{inserts} file{'s' if inserts != 1 else ''} added")
967 if deletes:
968 parts.append(f"{deletes} file{'s' if deletes != 1 else ''} removed")
969 if replaces:
970 parts.append(f"{replaces} file{'s' if replaces != 1 else ''} modified")
971 if patches:
972 parts.append(f"{patches} file{'s' if patches != 1 else ''} patched")
973
974 return ", ".join(parts) if parts else "no changes"
975
976
977 def _hash_file(path: pathlib.Path) -> str:
978 """Return the SHA-256 hex digest of a file's raw bytes."""
979 h = hashlib.sha256()
980 with path.open("rb") as fh:
981 for chunk in iter(lambda: fh.read(65536), b""):
982 h.update(chunk)
983 return h.hexdigest()
984
985
986 def _changed_paths(
987 base: dict[str, str], other: dict[str, str]
988 ) -> set[str]:
989 """Return paths that differ between *base* and *other*."""
990 base_p = set(base)
991 other_p = set(other)
992 added = other_p - base_p
993 deleted = base_p - other_p
994 common = base_p & other_p
995 modified = {p for p in common if base[p] != other[p]}
996 return added | deleted | modified
997
998
999 def content_hash(snapshot: StateSnapshot) -> str:
1000 """Return a stable SHA-256 digest of a snapshot for content-addressing."""
1001 canonical = json.dumps(snapshot, sort_keys=True, separators=(",", ":"))
1002 return hashlib.sha256(canonical.encode()).hexdigest()
1003
1004
1005 #: Module-level singleton — import and use directly.
1006 plugin = MusicPlugin()
1007
1008 assert isinstance(plugin, MuseDomainPlugin), (
1009 "MusicPlugin does not satisfy the MuseDomainPlugin protocol"
1010 )
1011 assert isinstance(plugin, StructuredMergePlugin), (
1012 "MusicPlugin does not satisfy the StructuredMergePlugin protocol"
1013 )