cgcardona / muse public
plugin.py python
1139 lines 44.2 KB
e6786943 feat: upgrade to Python 3.14, drop from __future__ import annotations Gabriel Cardona <cgcardona@gmail.com> 1d ago
1 """MIDI domain plugin — reference implementation of :class:`MuseDomainPlugin`.
2
3 This plugin implements the six Muse domain interfaces for MIDI state:
4 notes, velocities, controller events (CC), pitch bends, and aftertouch.
5
6 It is the domain that proved the abstraction. Every other domain — scientific
7 simulation, genomics, 3D spatial design — is a new plugin that implements
8 the same six interfaces.
9
10 Live State
11 ----------
12 For the MIDI domain, ``LiveState`` is either:
13
14 1. A ``muse-work/`` directory path (``pathlib.Path``) — the CLI path where
15 MIDI files live on disk and are managed by ``muse commit / checkout``.
16 2. A dict snapshot previously captured by :meth:`snapshot` — used when
17 constructing merges and diffs in memory.
18
19 Both forms are supported. The plugin detects which form it received by
20 checking for ``pathlib.Path`` vs ``dict``.
21
22 Snapshot Format
23 ---------------
24 A music snapshot is a JSON-serialisable dict:
25
26 .. code-block:: json
27
28 {
29 "files": {
30 "tracks/drums.mid": "<sha256>",
31 "tracks/bass.mid": "<sha256>"
32 },
33 "domain": "midi"
34 }
35
36 The ``files`` key maps POSIX paths (relative to ``muse-work/``) to their
37 SHA-256 content digests.
38
39 Delta Format
40 ----------------------
41 ``diff()`` returns a ``StructuredDelta`` with typed ``DomainOp`` entries:
42
43 - ``InsertOp`` — a file was added (``content_id`` = its SHA-256 hash).
44 - ``DeleteOp`` — a file was removed.
45 - ``ReplaceOp`` — a non-MIDI file's content changed.
46 - ``PatchOp`` — a ``.mid`` file changed; ``child_ops`` contains note-level
47 ``InsertOp`` / ``DeleteOp`` entries from the Myers LCS diff.
48
49 When ``repo_root`` is available, MIDI files are loaded from the object store
50 and diffed at note level. Without it, modified ``.mid`` files fall back to
51 ``ReplaceOp``.
52 """
53
54 import hashlib
55 import json
56 import logging
57 import pathlib
58
59 from muse.core.schema import (
60 DimensionSpec,
61 DomainSchema,
62 SequenceSchema,
63 SetSchema,
64 TensorSchema,
65 TreeSchema,
66 )
67 from muse.domain import (
68 DeleteOp,
69 DomainOp,
70 DriftReport,
71 InsertOp,
72 LiveState,
73 MergeResult,
74 MuseDomainPlugin,
75 PatchOp,
76 ReplaceOp,
77 SnapshotManifest,
78 StateDelta,
79 StateSnapshot,
80 StructuredDelta,
81 StructuredMergePlugin,
82 )
83 from muse.plugins.midi.midi_diff import NoteKey
84
85 logger = logging.getLogger(__name__)
86
87 _DOMAIN_TAG = "midi"
88
89
90 class MidiPlugin:
91 """MIDI domain plugin for the Muse VCS.
92
93 Implements :class:`~muse.domain.MuseDomainPlugin` (six core interfaces)
94 and :class:`~muse.domain.StructuredMergePlugin` (operation-level
95 merge) for MIDI state stored as files in ``muse-work/``.
96
97 This is the reference implementation. Every other domain plugin implements
98 the same six core interfaces; the :class:`~muse.domain.StructuredMergePlugin`
99 extension is optional but strongly recommended for domains that produce
100 note-level (sub-file) diffs.
101 """
102
103 # ------------------------------------------------------------------
104 # 1. snapshot — capture live state as a content-addressed dict
105 # ------------------------------------------------------------------
106
107 def snapshot(self, live_state: LiveState) -> StateSnapshot:
108 """Capture the current ``muse-work/`` directory as a snapshot dict.
109
110 Args:
111 live_state: Either a ``pathlib.Path`` pointing to ``muse-work/``
112 or an existing snapshot dict (returned as-is).
113
114 Returns:
115 A JSON-serialisable ``{"files": {path: sha256}, "domain": "midi"}``
116 dict. The ``files`` mapping is the canonical snapshot manifest used
117 by the core VCS engine for commit / checkout / diff.
118
119 Ignore rules
120 ------------
121 When *live_state* is a ``pathlib.Path``, the plugin reads
122 ``.museignore`` from the repository root (the parent of ``muse-work/``)
123 and excludes any matching paths from the snapshot. Dotfiles are always
124 excluded regardless of ``.museignore``.
125 """
126 if isinstance(live_state, pathlib.Path):
127 from muse.core.ignore import is_ignored, load_patterns
128 workdir = live_state
129 repo_root = workdir.parent
130 patterns = load_patterns(repo_root)
131 files: dict[str, str] = {}
132 for file_path in sorted(workdir.rglob("*")):
133 if not file_path.is_file():
134 continue
135 if file_path.name.startswith("."):
136 continue
137 rel = file_path.relative_to(workdir).as_posix()
138 if is_ignored(rel, patterns):
139 continue
140 files[rel] = _hash_file(file_path)
141 return SnapshotManifest(files=files, domain=_DOMAIN_TAG)
142
143 return live_state
144
145 # ------------------------------------------------------------------
146 # 2. diff — compute the structured delta between two snapshots
147 # ------------------------------------------------------------------
148
149 def diff(
150 self,
151 base: StateSnapshot,
152 target: StateSnapshot,
153 *,
154 repo_root: pathlib.Path | None = None,
155 ) -> StateDelta:
156 """Compute a ``StructuredDelta`` between two music snapshots.
157
158 File additions and removals produce ``InsertOp`` and ``DeleteOp``
159 entries respectively. For modified files:
160
161 - ``.mid`` files: when ``repo_root`` is provided, load the MIDI bytes
162 from the object store and produce a ``PatchOp`` with note-level
163 ``child_ops`` from the Myers LCS diff. Falls back to ``ReplaceOp``
164 when the object store is unavailable or parsing fails.
165 - All other files: ``ReplaceOp`` with file-level content IDs.
166
167 Args:
168 base: The ancestor snapshot.
169 target: The later snapshot.
170 repo_root: Repository root directory. When provided, MIDI files are
171 loaded from ``.muse/objects/`` for note-level diffing.
172
173 Returns:
174 A ``StructuredDelta`` whose ``ops`` list transforms *base* into
175 *target* and whose ``summary`` is human-readable.
176 """
177 base_files = base["files"]
178 target_files = target["files"]
179
180 base_paths = set(base_files)
181 target_paths = set(target_files)
182
183 ops: list[DomainOp] = []
184
185 # Added files → InsertOp
186 for path in sorted(target_paths - base_paths):
187 ops.append(
188 InsertOp(
189 op="insert",
190 address=path,
191 position=None,
192 content_id=target_files[path],
193 content_summary=f"new file: {path}",
194 )
195 )
196
197 # Removed files → DeleteOp
198 for path in sorted(base_paths - target_paths):
199 ops.append(
200 DeleteOp(
201 op="delete",
202 address=path,
203 position=None,
204 content_id=base_files[path],
205 content_summary=f"deleted: {path}",
206 )
207 )
208
209 # Modified files
210 for path in sorted(
211 p for p in base_paths & target_paths if base_files[p] != target_files[p]
212 ):
213 op = _diff_modified_file(
214 path=path,
215 old_hash=base_files[path],
216 new_hash=target_files[path],
217 repo_root=repo_root,
218 )
219 ops.append(op)
220
221 summary = _summarise_ops(ops)
222 return StructuredDelta(domain=_DOMAIN_TAG, ops=ops, summary=summary)
223
224 # ------------------------------------------------------------------
225 # 3. merge — three-way reconciliation
226 # ------------------------------------------------------------------
227
228 def merge(
229 self,
230 base: StateSnapshot,
231 left: StateSnapshot,
232 right: StateSnapshot,
233 *,
234 repo_root: pathlib.Path | None = None,
235 ) -> MergeResult:
236 """Three-way merge two divergent music state lines against a common base.
237
238 A file is auto-merged when only one side changed it. When both sides
239 changed the same file, the merge proceeds in two stages:
240
241 1. **File-level strategy** — if ``.museattributes`` contains an
242 ``ours`` or ``theirs`` rule matching the path (dimension ``"*"``),
243 the rule is applied and the file is removed from the conflict list.
244
245 2. **Dimension-level merge** — for ``.mid`` files that survive the
246 file-level check, the MIDI event stream is split into orthogonal
247 dimension slices (notes/melodic/rhythmic, harmonic, dynamic, structural).
248 Each dimension is merged independently. Dimension-specific
249 ``ours``/``theirs`` rules in ``.museattributes`` are honoured.
250 Only dimensions where *both* sides changed AND no resolvable rule
251 exists cause a true file-level conflict.
252
253 3. **Manual override** — ``manual`` strategy in ``.museattributes``
254 forces a path into the conflict list even when the engine would
255 normally auto-resolve it.
256
257 Args:
258 base: Snapshot at the common ancestor commit.
259 left: Snapshot for the *ours* (current) branch. The distinction
260 between ``left`` and ``right`` only affects the ``applied_strategies``
261 key in the result; the merge is symmetric for clean paths.
262 right: Snapshot for the *theirs* (incoming) branch.
263 repo_root: Path to the repository root so ``.museattributes`` and the
264 object store can be located. ``None`` disables attribute
265 loading and MIDI reconstruction (all conflicts become hard).
266
267 Returns:
268 A :class:`~muse.domain.MergeResult` whose ``snapshot`` holds the
269 merged manifest (conflict paths absent), ``conflicts`` lists the
270 unresolvable paths, and ``applied_strategies`` records which
271 ``.museattributes`` rules were used.
272 """
273 import hashlib as _hashlib
274
275 from muse.core.attributes import load_attributes, resolve_strategy
276 from muse.core.object_store import read_object, write_object
277 from muse.plugins.midi.midi_merge import merge_midi_dimensions
278
279 base_files = base["files"]
280 left_files = left["files"]
281 right_files = right["files"]
282
283 attrs = load_attributes(repo_root, domain=_DOMAIN_TAG) if repo_root is not None else []
284
285 left_changed: set[str] = _changed_paths(base_files, left_files)
286 right_changed: set[str] = _changed_paths(base_files, right_files)
287 all_conflict_paths: set[str] = left_changed & right_changed
288
289 merged: dict[str, str] = dict(base_files)
290
291 # Apply clean single-side changes first.
292 for path in left_changed - all_conflict_paths:
293 if path in left_files:
294 merged[path] = left_files[path]
295 else:
296 merged.pop(path, None)
297
298 for path in right_changed - all_conflict_paths:
299 if path in right_files:
300 merged[path] = right_files[path]
301 else:
302 merged.pop(path, None)
303
304 # Consensus deletions (both sides removed the same file) — not a conflict.
305 consensus_deleted = {
306 p for p in all_conflict_paths
307 if p not in left_files and p not in right_files
308 }
309 for path in consensus_deleted:
310 merged.pop(path, None)
311
312 real_conflicts: set[str] = all_conflict_paths - consensus_deleted
313
314 applied_strategies: dict[str, str] = {}
315 dimension_reports: dict[str, dict[str, str]] = {}
316 final_conflicts: list[str] = []
317
318 for path in sorted(real_conflicts):
319 file_strategy = resolve_strategy(attrs, path, "*")
320
321 if file_strategy == "ours":
322 if path in left_files:
323 merged[path] = left_files[path]
324 else:
325 merged.pop(path, None)
326 applied_strategies[path] = "ours"
327 continue
328
329 if file_strategy == "theirs":
330 if path in right_files:
331 merged[path] = right_files[path]
332 else:
333 merged.pop(path, None)
334 applied_strategies[path] = "theirs"
335 continue
336
337 if (
338 repo_root is not None
339 and path.lower().endswith(".mid")
340 and path in left_files
341 and path in right_files
342 and path in base_files
343 ):
344 base_obj = read_object(repo_root, base_files[path])
345 left_obj = read_object(repo_root, left_files[path])
346 right_obj = read_object(repo_root, right_files[path])
347
348 if base_obj is not None and left_obj is not None and right_obj is not None:
349 try:
350 dim_result = merge_midi_dimensions(
351 base_obj, left_obj, right_obj,
352 attrs,
353 path,
354 )
355 except ValueError:
356 dim_result = None
357
358 if dim_result is not None:
359 merged_bytes, dim_report = dim_result
360 new_hash = _hashlib.sha256(merged_bytes).hexdigest()
361 write_object(repo_root, new_hash, merged_bytes)
362 merged[path] = new_hash
363 applied_strategies[path] = "dimension-merge"
364 dimension_reports[path] = dim_report
365 continue
366
367 final_conflicts.append(path)
368
369 for path in sorted((left_changed | right_changed) - real_conflicts):
370 if path in consensus_deleted:
371 continue
372 if resolve_strategy(attrs, path, "*") == "manual":
373 final_conflicts.append(path)
374 applied_strategies[path] = "manual"
375 if path in base_files:
376 merged[path] = base_files[path]
377 else:
378 merged.pop(path, None)
379
380 return MergeResult(
381 merged=SnapshotManifest(files=merged, domain=_DOMAIN_TAG),
382 conflicts=sorted(final_conflicts),
383 applied_strategies=applied_strategies,
384 dimension_reports=dimension_reports,
385 )
386
387 # ------------------------------------------------------------------
388 # 4. drift — compare committed state vs live state
389 # ------------------------------------------------------------------
390
391 def drift(
392 self,
393 committed: StateSnapshot,
394 live: LiveState,
395 ) -> DriftReport:
396 """Detect uncommitted changes in ``muse-work/`` relative to *committed*.
397
398 Args:
399 committed: The last committed snapshot.
400 live: Either a ``pathlib.Path`` (``muse-work/``) or a snapshot
401 dict representing current live state.
402
403 Returns:
404 A :class:`~muse.domain.DriftReport` describing whether and how the
405 live state differs from the committed snapshot.
406 """
407 live_snapshot = self.snapshot(live)
408 delta = self.diff(committed, live_snapshot)
409
410 inserts = sum(1 for op in delta["ops"] if op["op"] == "insert")
411 deletes = sum(1 for op in delta["ops"] if op["op"] == "delete")
412 modified = sum(1 for op in delta["ops"] if op["op"] in ("replace", "patch"))
413 has_drift = bool(inserts or deletes or modified)
414
415 parts: list[str] = []
416 if inserts:
417 parts.append(f"{inserts} added")
418 if deletes:
419 parts.append(f"{deletes} removed")
420 if modified:
421 parts.append(f"{modified} modified")
422
423 summary = ", ".join(parts) if parts else "working tree clean"
424 return DriftReport(has_drift=has_drift, summary=summary, delta=delta)
425
426 # ------------------------------------------------------------------
427 # 5. apply — execute a delta against live state (checkout)
428 # ------------------------------------------------------------------
429
430 def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState:
431 """Apply a structured delta to produce a new live state.
432
433 When ``live_state`` is a ``pathlib.Path`` the physical files have
434 already been updated by the caller (``muse checkout`` restores objects
435 from the store before calling this). Rescanning the directory is the
436 cheapest correct way to reflect the new state.
437
438 When ``live_state`` is a snapshot dict, only ``DeleteOp`` and
439 ``ReplaceOp`` at the file level can be applied in-memory. ``InsertOp``
440 at the file level requires the new content to be on disk; callers that
441 need those should pass the workdir ``pathlib.Path`` instead.
442 ``PatchOp`` entries are skipped in-memory since reconstructing patched
443 file content requires both the original bytes and the object store.
444
445 Args:
446 delta: A ``StructuredDelta`` produced by :meth:`diff`.
447 live_state: The workdir path (preferred) or a snapshot dict.
448
449 Returns:
450 The updated live state as a ``SnapshotManifest``.
451 """
452 if isinstance(live_state, pathlib.Path):
453 return self.snapshot(live_state)
454
455 current_files = dict(live_state["files"])
456
457 for op in delta["ops"]:
458 if op["op"] == "delete":
459 current_files.pop(op["address"], None)
460 elif op["op"] == "replace":
461 current_files[op["address"]] = op["new_content_id"]
462 elif op["op"] == "insert":
463 current_files[op["address"]] = op["content_id"]
464 # PatchOp and MoveOp: skip in-memory — caller must use workdir path.
465
466 return SnapshotManifest(files=current_files, domain=_DOMAIN_TAG)
467
468 # ------------------------------------------------------------------
469 # 6. schema — declare structural schema for the algorithm library
470 # ------------------------------------------------------------------
471
472 def schema(self) -> DomainSchema:
473 """Return the full structural schema for the MIDI domain.
474
475 Declares 21 semantic dimensions — one per independent MIDI event class
476 — that the core diff algorithm library and OT merge engine use to drive
477 per-dimension operations. This is a significant expansion from the
478 original 5 dimensions; the finer granularity means two agents can edit
479 completely different aspects of the same MIDI file (e.g. sustain pedal
480 and channel volume) without ever creating a merge conflict.
481
482 Top level is a ``SetSchema``: the music workspace is an unordered
483 collection of audio/MIDI files, each identified by its SHA-256 content
484 hash.
485
486 Independent dimensions (conflicts do not block merging others):
487 - **notes** (melodic/rhythmic) — note_on / note_off events
488 - **pitch_bend** — pitchwheel controller
489 - **channel_pressure** — monophonic aftertouch
490 - **poly_pressure** — per-note polyphonic aftertouch
491 - **cc_modulation** — CC 1 modulation wheel
492 - **cc_volume** — CC 7 channel volume
493 - **cc_pan** — CC 10 stereo pan
494 - **cc_expression** — CC 11 expression controller
495 - **cc_sustain** — CC 64 damper / sustain pedal
496 - **cc_portamento** — CC 65 portamento on/off
497 - **cc_sostenuto** — CC 66 sostenuto pedal
498 - **cc_soft_pedal** — CC 67 soft pedal (una corda)
499 - **cc_reverb** — CC 91 reverb send level
500 - **cc_chorus** — CC 93 chorus send level
501 - **cc_other** — all other numbered CC controllers
502 - **program_change** — instrument / patch selection
503 - **key_signatures** — key signature meta events
504 - **markers** — section markers, cue points, text annotations
505
506 Non-independent dimensions (conflicts block all others):
507 - **tempo_map** — set_tempo meta events; tempo changes shift the
508 musical meaning of every subsequent tick position, so a bilateral
509 tempo conflict requires human resolution before other dimensions
510 can be finalised.
511 - **time_signatures** — time_signature meta events; bar structure
512 changes have the same semantic blocking effect as tempo changes.
513 - **track_structure** — track name, instrument name, sysex, and
514 unknown meta events affecting routing and session layout.
515 """
516 seq_schema = SequenceSchema(
517 kind="sequence",
518 element_type="note_event",
519 identity="by_position",
520 diff_algorithm="lcs",
521 alphabet=None,
522 )
523 cc_schema = TensorSchema(
524 kind="tensor",
525 dtype="float32",
526 rank=1,
527 epsilon=0.5,
528 diff_mode="sparse",
529 )
530 tree_schema = TreeSchema(
531 kind="tree",
532 node_type="track_node",
533 diff_algorithm="zhang_shasha",
534 )
535 meta_schema = SequenceSchema(
536 kind="sequence",
537 element_type="meta_event",
538 identity="by_position",
539 diff_algorithm="lcs",
540 alphabet=None,
541 )
542 return DomainSchema(
543 domain=_DOMAIN_TAG,
544 description=(
545 "MIDI and audio file versioning with note-level diff and "
546 "21-dimension independent merge"
547 ),
548 top_level=SetSchema(
549 kind="set",
550 element_type="audio_file",
551 identity="by_content",
552 ),
553 dimensions=[
554 # --- Expressive note content ---
555 DimensionSpec(
556 name="notes",
557 description="Note pitches, durations, and timing (melodic + rhythmic)",
558 schema=seq_schema,
559 independent_merge=True,
560 ),
561 DimensionSpec(
562 name="pitch_bend",
563 description="Pitchwheel controller — expressive pitch deviation",
564 schema=cc_schema,
565 independent_merge=True,
566 ),
567 DimensionSpec(
568 name="channel_pressure",
569 description="Monophonic aftertouch — channel-wide pressure",
570 schema=cc_schema,
571 independent_merge=True,
572 ),
573 DimensionSpec(
574 name="poly_pressure",
575 description="Polyphonic aftertouch — per-note pressure",
576 schema=cc_schema,
577 independent_merge=True,
578 ),
579 # --- Named CC controllers ---
580 DimensionSpec(
581 name="cc_modulation",
582 description="CC 1 — modulation wheel",
583 schema=cc_schema,
584 independent_merge=True,
585 ),
586 DimensionSpec(
587 name="cc_volume",
588 description="CC 7 — channel volume",
589 schema=cc_schema,
590 independent_merge=True,
591 ),
592 DimensionSpec(
593 name="cc_pan",
594 description="CC 10 — stereo pan position",
595 schema=cc_schema,
596 independent_merge=True,
597 ),
598 DimensionSpec(
599 name="cc_expression",
600 description="CC 11 — expression controller",
601 schema=cc_schema,
602 independent_merge=True,
603 ),
604 DimensionSpec(
605 name="cc_sustain",
606 description="CC 64 — damper / sustain pedal",
607 schema=cc_schema,
608 independent_merge=True,
609 ),
610 DimensionSpec(
611 name="cc_portamento",
612 description="CC 65 — portamento on/off",
613 schema=cc_schema,
614 independent_merge=True,
615 ),
616 DimensionSpec(
617 name="cc_sostenuto",
618 description="CC 66 — sostenuto pedal",
619 schema=cc_schema,
620 independent_merge=True,
621 ),
622 DimensionSpec(
623 name="cc_soft_pedal",
624 description="CC 67 — soft pedal (una corda)",
625 schema=cc_schema,
626 independent_merge=True,
627 ),
628 DimensionSpec(
629 name="cc_reverb",
630 description="CC 91 — reverb send level",
631 schema=cc_schema,
632 independent_merge=True,
633 ),
634 DimensionSpec(
635 name="cc_chorus",
636 description="CC 93 — chorus send level",
637 schema=cc_schema,
638 independent_merge=True,
639 ),
640 DimensionSpec(
641 name="cc_other",
642 description="All other numbered CC controllers",
643 schema=cc_schema,
644 independent_merge=True,
645 ),
646 # --- Patch / program selection ---
647 DimensionSpec(
648 name="program_change",
649 description="Instrument / patch selection events",
650 schema=meta_schema,
651 independent_merge=True,
652 ),
653 # --- Non-independent timeline metadata ---
654 DimensionSpec(
655 name="tempo_map",
656 description=(
657 "Tempo (BPM) changes — non-independent: a conflict "
658 "blocks merging all other dimensions"
659 ),
660 schema=meta_schema,
661 independent_merge=False,
662 ),
663 DimensionSpec(
664 name="time_signatures",
665 description=(
666 "Time signature changes — non-independent: affects "
667 "bar structure for all other dimensions"
668 ),
669 schema=meta_schema,
670 independent_merge=False,
671 ),
672 # --- Tonal and annotation metadata ---
673 DimensionSpec(
674 name="key_signatures",
675 description="Key signature events",
676 schema=meta_schema,
677 independent_merge=True,
678 ),
679 DimensionSpec(
680 name="markers",
681 description="Section markers, cue points, text, lyrics, copyright",
682 schema=meta_schema,
683 independent_merge=True,
684 ),
685 # --- Track structure (non-independent) ---
686 DimensionSpec(
687 name="track_structure",
688 description=(
689 "Track name, instrument name, sysex, unknown meta — "
690 "non-independent: routing changes affect all tracks"
691 ),
692 schema=tree_schema,
693 independent_merge=False,
694 ),
695 ],
696 merge_mode="three_way",
697 schema_version=1,
698 )
699
700 # ------------------------------------------------------------------
701 # 7. merge_ops — operation-level OT merge (StructuredMergePlugin)
702 # ------------------------------------------------------------------
703
704 def merge_ops(
705 self,
706 base: StateSnapshot,
707 ours_snap: StateSnapshot,
708 theirs_snap: StateSnapshot,
709 ours_ops: list[DomainOp],
710 theirs_ops: list[DomainOp],
711 *,
712 repo_root: pathlib.Path | None = None,
713 ) -> MergeResult:
714 """Operation-level three-way merge using the OT engine.
715
716 Extends the file-level ``merge()`` method with sub-file granularity: two
717 changes to non-overlapping notes in the same MIDI file no longer produce
718 a conflict.
719
720 Algorithm
721 ---------
722 1. Run :func:`~muse.core.op_transform.merge_op_lists` on the flat op
723 lists to classify each (ours, theirs) pair as commuting or
724 conflicting.
725 2. Build the merged manifest from *base* by applying all clean merged
726 ops. ``InsertOp`` and ``ReplaceOp`` entries supply a ``content_id``
727 / ``new_content_id`` directly. For ``PatchOp`` entries (sub-file
728 note changes), the final file hash is looked up from *ours_snap* or
729 *theirs_snap*. When both sides produced a ``PatchOp`` for the same
730 MIDI file and the note-level ops commute, an attempt is made to
731 reconstruct the merged MIDI bytes; on failure the file falls back to
732 a conflict.
733 3. For conflicting pairs, consult ``.museattributes``. Strategies
734 ``"ours"`` and ``"theirs"`` are applied automatically; everything
735 else enters ``MergeResult.conflicts``.
736
737 Args:
738 base: Common ancestor snapshot.
739 ours_snap: Final snapshot of our branch.
740 theirs_snap: Final snapshot of their branch.
741 ours_ops: Operations from our branch delta (base → ours).
742 theirs_ops: Operations from their branch delta (base → theirs).
743 repo_root: Repository root for object store and attributes.
744
745 Returns:
746 A :class:`~muse.domain.MergeResult` with the reconciled snapshot
747 and any remaining unresolvable conflicts.
748 """
749 from muse.core.attributes import load_attributes, resolve_strategy
750 from muse.core.op_transform import merge_op_lists
751
752 attrs = load_attributes(repo_root, domain=_DOMAIN_TAG) if repo_root is not None else []
753
754 # OT classification: find commuting and conflicting op pairs.
755 ot_result = merge_op_lists([], ours_ops, theirs_ops)
756
757 # Build the merged manifest starting from base.
758 merged_files: dict[str, str] = dict(base["files"])
759 applied_strategies: dict[str, str] = {}
760 final_conflicts: list[str] = []
761 op_log: list[DomainOp] = list(ot_result.merged_ops)
762
763 # Group PatchOps by address so we can detect same-file note merges.
764 ours_patches: dict[str, PatchOp] = {}
765 theirs_patches: dict[str, PatchOp] = {}
766 for op in ours_ops:
767 if op["op"] == "patch":
768 ours_patches[op["address"]] = op
769 for op in theirs_ops:
770 if op["op"] == "patch":
771 theirs_patches[op["address"]] = op
772
773 # Track which addresses are involved in a conflict.
774 conflicting_addresses: set[str] = {
775 our_op["address"] for our_op, _ in ot_result.conflict_ops
776 }
777
778 # --- Apply clean merged ops ---
779 for op in ot_result.merged_ops:
780 addr = op["address"]
781 if addr in conflicting_addresses:
782 continue # handled in conflict resolution below
783
784 if op["op"] == "insert":
785 merged_files[addr] = op["content_id"]
786
787 elif op["op"] == "delete":
788 merged_files.pop(addr, None)
789
790 elif op["op"] == "replace":
791 merged_files[addr] = op["new_content_id"]
792
793 elif op["op"] == "patch":
794 # PatchOp: determine which side(s) patched this file.
795 has_ours = addr in ours_patches
796 has_theirs = addr in theirs_patches
797
798 if has_ours and not has_theirs:
799 # Only our side changed this file — take our version.
800 if addr in ours_snap["files"]:
801 merged_files[addr] = ours_snap["files"][addr]
802 else:
803 merged_files.pop(addr, None)
804
805 elif has_theirs and not has_ours:
806 # Only their side changed this file — take their version.
807 if addr in theirs_snap["files"]:
808 merged_files[addr] = theirs_snap["files"][addr]
809 else:
810 merged_files.pop(addr, None)
811
812 else:
813 # Both sides patched the same file with commuting note ops.
814 # Attempt note-level MIDI reconstruction.
815 merged_content_id = _merge_patch_ops(
816 addr=addr,
817 ours_patch=ours_patches[addr],
818 theirs_patch=theirs_patches[addr],
819 base_files=dict(base["files"]),
820 ours_snap_files=dict(ours_snap["files"]),
821 theirs_snap_files=dict(theirs_snap["files"]),
822 repo_root=repo_root,
823 )
824 if merged_content_id is not None:
825 merged_files[addr] = merged_content_id
826 else:
827 # Reconstruction failed — treat as manual conflict.
828 final_conflicts.append(addr)
829
830 # --- Resolve conflicts ---
831 for our_op, their_op in ot_result.conflict_ops:
832 addr = our_op["address"]
833 strategy = resolve_strategy(attrs, addr, "*")
834
835 if strategy == "ours":
836 if addr in ours_snap["files"]:
837 merged_files[addr] = ours_snap["files"][addr]
838 else:
839 merged_files.pop(addr, None)
840 applied_strategies[addr] = "ours"
841
842 elif strategy == "theirs":
843 if addr in theirs_snap["files"]:
844 merged_files[addr] = theirs_snap["files"][addr]
845 else:
846 merged_files.pop(addr, None)
847 applied_strategies[addr] = "theirs"
848
849 else:
850 # Strategy "manual" or "auto" without a clear resolution.
851 final_conflicts.append(addr)
852
853 return MergeResult(
854 merged=SnapshotManifest(files=merged_files, domain=_DOMAIN_TAG),
855 conflicts=sorted(set(final_conflicts)),
856 applied_strategies=applied_strategies,
857 op_log=op_log,
858 )
859
860
861 # ---------------------------------------------------------------------------
862 # Module-level helpers
863 # ---------------------------------------------------------------------------
864
865
866 def _merge_patch_ops(
867 *,
868 addr: str,
869 ours_patch: PatchOp,
870 theirs_patch: PatchOp,
871 base_files: dict[str, str],
872 ours_snap_files: dict[str, str],
873 theirs_snap_files: dict[str, str],
874 repo_root: pathlib.Path | None,
875 ) -> str | None:
876 """Attempt note-level MIDI merge for two ``PatchOp``\\s on the same file.
877
878 Runs OT on the child_ops of each PatchOp. If the note-level ops all
879 commute, reconstructs the merged MIDI by:
880
881 1. Loading base, ours, and theirs MIDI bytes from the object store.
882 2. Extracting note sequences from all three versions.
883 3. Building ``content_id → NoteKey`` look-ups for the ours and theirs
884 sequences (so that InsertOp content IDs can be resolved to real notes).
885 4. Applying the merged note ops (deletions then insertions) to the base
886 note sequence.
887 5. Calling :func:`~muse.plugins.midi.midi_diff.reconstruct_midi` and
888 storing the resulting bytes.
889
890 Returns the SHA-256 hash of the reconstructed MIDI (ready to store in the
891 object store) on success, or ``None`` when:
892
893 - *repo_root* is ``None`` (cannot access object store).
894 - Base or branch bytes are not in the local object store.
895 - Note-level OT found conflicts.
896 - MIDI reconstruction raised any exception.
897
898 Args:
899 addr: Workspace-relative MIDI file path.
900 ours_patch: Our PatchOp for this file.
901 theirs_patch: Their PatchOp for this file.
902 base_files: Content-ID map for the common ancestor snapshot.
903 ours_snap_files: Content-ID map for our branch's final snapshot.
904 theirs_snap_files: Content-ID map for their branch's final snapshot.
905 repo_root: Repository root for object store access.
906
907 Returns:
908 Content-ID (SHA-256 hex) of the merged MIDI, or ``None`` on failure.
909 """
910 if repo_root is None or addr not in base_files:
911 return None
912
913 from muse.core.object_store import read_object, write_object
914 from muse.core.op_transform import merge_op_lists
915 from muse.plugins.midi.midi_diff import NoteKey, extract_notes, reconstruct_midi
916
917 # Run OT on note-level ops to classify conflicts.
918 note_result = merge_op_lists([], ours_patch["child_ops"], theirs_patch["child_ops"])
919 if not note_result.is_clean:
920 logger.debug(
921 "⚠️ Note-level conflict in %r: %d pair(s) — falling back to file conflict",
922 addr,
923 len(note_result.conflict_ops),
924 )
925 return None
926
927 try:
928 base_bytes = read_object(repo_root, base_files[addr])
929 if base_bytes is None:
930 return None
931
932 ours_hash = ours_snap_files.get(addr)
933 theirs_hash = theirs_snap_files.get(addr)
934 ours_bytes = read_object(repo_root, ours_hash) if ours_hash else None
935 theirs_bytes = read_object(repo_root, theirs_hash) if theirs_hash else None
936
937 base_notes, ticks_per_beat = extract_notes(base_bytes)
938
939 # Build content_id → NoteKey lookups from ours and theirs versions.
940 ours_by_id: dict[str, NoteKey] = {}
941 if ours_bytes is not None:
942 ours_notes, _ = extract_notes(ours_bytes)
943 ours_by_id = {_note_content_id(n): n for n in ours_notes}
944
945 theirs_by_id: dict[str, NoteKey] = {}
946 if theirs_bytes is not None:
947 theirs_notes, _ = extract_notes(theirs_bytes)
948 theirs_by_id = {_note_content_id(n): n for n in theirs_notes}
949
950 # Collect content IDs to delete.
951 delete_ids: set[str] = {
952 op["content_id"] for op in note_result.merged_ops if op["op"] == "delete"
953 }
954
955 # Apply deletions to base note list.
956 base_note_by_id = {_note_content_id(n): n for n in base_notes}
957 surviving: list[NoteKey] = [
958 n for n in base_notes if _note_content_id(n) not in delete_ids
959 ]
960
961 # Collect insertions: resolve content_id → NoteKey via ours then theirs.
962 inserted: list[NoteKey] = []
963 for op in note_result.merged_ops:
964 if op["op"] == "insert":
965 cid = op["content_id"]
966 note = ours_by_id.get(cid) or theirs_by_id.get(cid)
967 if note is None:
968 # Fallback: base itself shouldn't have it, but check anyway.
969 note = base_note_by_id.get(cid)
970 if note is None:
971 logger.debug(
972 "⚠️ Cannot resolve note content_id %s for %r — skipping",
973 cid[:12],
974 addr,
975 )
976 continue
977 inserted.append(note)
978
979 merged_notes = surviving + inserted
980 merged_bytes = reconstruct_midi(merged_notes, ticks_per_beat=ticks_per_beat)
981
982 merged_hash = hashlib.sha256(merged_bytes).hexdigest()
983 write_object(repo_root, merged_hash, merged_bytes)
984
985 logger.info(
986 "✅ Note-level MIDI merge for %r: %d ops clean, %d notes in result",
987 addr,
988 len(note_result.merged_ops),
989 len(merged_notes),
990 )
991 return merged_hash
992
993 except Exception as exc: # noqa: BLE001 intentional broad catch
994 logger.debug("⚠️ MIDI note-level reconstruction failed for %r: %s", addr, exc)
995 return None
996
997
998 def _note_content_id(note: NoteKey) -> str:
999 """Return the SHA-256 content ID for a :class:`~muse.plugins.midi.midi_diff.NoteKey`.
1000
1001 Delegates to the same algorithm used in :mod:`muse.plugins.midi.midi_diff`
1002 so that content IDs computed here are identical to those stored in
1003 ``InsertOp`` / ``DeleteOp`` entries.
1004 """
1005 payload = (
1006 f"{note['pitch']}:{note['velocity']}:"
1007 f"{note['start_tick']}:{note['duration_ticks']}:{note['channel']}"
1008 )
1009 return hashlib.sha256(payload.encode()).hexdigest()
1010
1011
1012 def _diff_modified_file(
1013 *,
1014 path: str,
1015 old_hash: str,
1016 new_hash: str,
1017 repo_root: pathlib.Path | None,
1018 ) -> DomainOp:
1019 """Produce the richest available operation for a modified file.
1020
1021 For ``.mid`` files where both content revisions are readable from the
1022 object store, performs a full note-level MIDI diff and returns a
1023 ``PatchOp`` carrying the individual ``InsertOp``/``DeleteOp`` child
1024 operations. Falls back to a ``ReplaceOp`` (opaque before/after hash
1025 pair) when the file is not a MIDI file, ``repo_root`` is ``None``, or
1026 either content revision cannot be retrieved from the store.
1027
1028 Args:
1029 path: Workspace-relative POSIX path of the modified file.
1030 old_hash: SHA-256 of the base content in the object store.
1031 new_hash: SHA-256 of the current content in the object store.
1032 repo_root: Repository root for object store access. ``None`` forces
1033 immediate fallback to ``ReplaceOp``.
1034
1035 Returns:
1036 A ``PatchOp`` with note-level child ops when deep diff succeeds,
1037 otherwise a ``ReplaceOp`` with the opaque before/after content hashes.
1038 """
1039 if path.lower().endswith(".mid") and repo_root is not None:
1040 from muse.core.object_store import read_object
1041 from muse.plugins.midi.midi_diff import diff_midi_notes
1042
1043 base_bytes = read_object(repo_root, old_hash)
1044 target_bytes = read_object(repo_root, new_hash)
1045
1046 if base_bytes is not None and target_bytes is not None:
1047 try:
1048 child_delta = diff_midi_notes(
1049 base_bytes, target_bytes, file_path=path
1050 )
1051 return PatchOp(
1052 op="patch",
1053 address=path,
1054 child_ops=child_delta["ops"],
1055 child_domain=child_delta["domain"],
1056 child_summary=child_delta["summary"],
1057 )
1058 except (ValueError, Exception) as exc:
1059 logger.debug("⚠️ MIDI deep diff failed for %r: %s", path, exc)
1060
1061 return ReplaceOp(
1062 op="replace",
1063 address=path,
1064 position=None,
1065 old_content_id=old_hash,
1066 new_content_id=new_hash,
1067 old_summary=f"{path} (previous)",
1068 new_summary=f"{path} (updated)",
1069 )
1070
1071
1072 def _summarise_ops(ops: list[DomainOp]) -> str:
1073 """Build a human-readable summary string from a list of domain ops."""
1074 inserts = 0
1075 deletes = 0
1076 replaces = 0
1077 patches = 0
1078
1079 for op in ops:
1080 kind = op["op"]
1081 if kind == "insert":
1082 inserts += 1
1083 elif kind == "delete":
1084 deletes += 1
1085 elif kind == "replace":
1086 replaces += 1
1087 elif kind == "patch":
1088 patches += 1
1089
1090 parts: list[str] = []
1091 if inserts:
1092 parts.append(f"{inserts} file{'s' if inserts != 1 else ''} added")
1093 if deletes:
1094 parts.append(f"{deletes} file{'s' if deletes != 1 else ''} removed")
1095 if replaces:
1096 parts.append(f"{replaces} file{'s' if replaces != 1 else ''} modified")
1097 if patches:
1098 parts.append(f"{patches} file{'s' if patches != 1 else ''} patched")
1099
1100 return ", ".join(parts) if parts else "no changes"
1101
1102
1103 def _hash_file(path: pathlib.Path) -> str:
1104 """Return the SHA-256 hex digest of a file's raw bytes."""
1105 h = hashlib.sha256()
1106 with path.open("rb") as fh:
1107 for chunk in iter(lambda: fh.read(65536), b""):
1108 h.update(chunk)
1109 return h.hexdigest()
1110
1111
1112 def _changed_paths(
1113 base: dict[str, str], other: dict[str, str]
1114 ) -> set[str]:
1115 """Return paths that differ between *base* and *other*."""
1116 base_p = set(base)
1117 other_p = set(other)
1118 added = other_p - base_p
1119 deleted = base_p - other_p
1120 common = base_p & other_p
1121 modified = {p for p in common if base[p] != other[p]}
1122 return added | deleted | modified
1123
1124
1125 def content_hash(snapshot: StateSnapshot) -> str:
1126 """Return a stable SHA-256 digest of a snapshot for content-addressing."""
1127 canonical = json.dumps(snapshot, sort_keys=True, separators=(",", ":"))
1128 return hashlib.sha256(canonical.encode()).hexdigest()
1129
1130
1131 #: Module-level singleton — import and use directly.
1132 plugin = MidiPlugin()
1133
1134 assert isinstance(plugin, MuseDomainPlugin), (
1135 "MidiPlugin does not satisfy the MuseDomainPlugin protocol"
1136 )
1137 assert isinstance(plugin, StructuredMergePlugin), (
1138 "MidiPlugin does not satisfy the StructuredMergePlugin protocol"
1139 )