gabriel / muse public
plugin.py python
997 lines 38.4 KB
45fd2148 fix: config and versioning audit — TOML attributes, v0.1.1, no Phase N labels Gabriel Cardona <cgcardona@gmail.com> 6d ago
1 """Music domain plugin — reference implementation of :class:`MuseDomainPlugin`.
2
3 This plugin implements the six Muse domain interfaces for MIDI state:
4 notes, velocities, controller events (CC), pitch bends, and aftertouch.
5
6 It is the domain that proved the abstraction. Every other domain — scientific
7 simulation, genomics, 3D spatial design — is a new plugin that implements
8 the same six interfaces.
9
10 Live State
11 ----------
12 For the music domain, ``LiveState`` is either:
13
14 1. A ``muse-work/`` directory path (``pathlib.Path``) — the CLI path where
15 MIDI files live on disk and are managed by ``muse commit / checkout``.
16 2. A dict snapshot previously captured by :meth:`snapshot` — used when
17 constructing merges and diffs in memory.
18
19 Both forms are supported. The plugin detects which form it received by
20 checking for ``pathlib.Path`` vs ``dict``.
21
22 Snapshot Format
23 ---------------
24 A music snapshot is a JSON-serialisable dict:
25
26 .. code-block:: json
27
28 {
29 "files": {
30 "tracks/drums.mid": "<sha256>",
31 "tracks/bass.mid": "<sha256>"
32 },
33 "domain": "music"
34 }
35
36 The ``files`` key maps POSIX paths (relative to ``muse-work/``) to their
37 SHA-256 content digests.
38
39 Delta Format
40 ----------------------
41 ``diff()`` returns a ``StructuredDelta`` with typed ``DomainOp`` entries:
42
43 - ``InsertOp`` — a file was added (``content_id`` = its SHA-256 hash).
44 - ``DeleteOp`` — a file was removed.
45 - ``ReplaceOp`` — a non-MIDI file's content changed.
46 - ``PatchOp`` — a ``.mid`` file changed; ``child_ops`` contains note-level
47 ``InsertOp`` / ``DeleteOp`` entries from the Myers LCS diff.
48
49 When ``repo_root`` is available, MIDI files are loaded from the object store
50 and diffed at note level. Without it, modified ``.mid`` files fall back to
51 ``ReplaceOp``.
52 """
53 from __future__ import annotations
54
55 import hashlib
56 import json
57 import logging
58 import pathlib
59
60 from muse.core.schema import (
61 DimensionSpec,
62 DomainSchema,
63 SequenceSchema,
64 SetSchema,
65 TensorSchema,
66 TreeSchema,
67 )
68 from muse.domain import (
69 DeleteOp,
70 DomainOp,
71 DriftReport,
72 InsertOp,
73 LiveState,
74 MergeResult,
75 MuseDomainPlugin,
76 PatchOp,
77 ReplaceOp,
78 SnapshotManifest,
79 StateDelta,
80 StateSnapshot,
81 StructuredDelta,
82 StructuredMergePlugin,
83 )
84 from muse.plugins.music.midi_diff import NoteKey
85
86 logger = logging.getLogger(__name__)
87
88 _DOMAIN_TAG = "music"
89
90
91 class MusicPlugin:
92 """Music domain plugin for the Muse VCS.
93
94 Implements :class:`~muse.domain.MuseDomainPlugin` (six core interfaces)
95 and :class:`~muse.domain.StructuredMergePlugin` (operation-level
96 merge) for MIDI state stored as files in ``muse-work/``.
97
98 This is the reference implementation. Every other domain plugin implements
99 the same six core interfaces; the :class:`~muse.domain.StructuredMergePlugin`
100 extension is optional but strongly recommended for domains that produce
101 note-level (sub-file) diffs.
102 """
103
104 # ------------------------------------------------------------------
105 # 1. snapshot — capture live state as a content-addressed dict
106 # ------------------------------------------------------------------
107
108 def snapshot(self, live_state: LiveState) -> StateSnapshot:
109 """Capture the current ``muse-work/`` directory as a snapshot dict.
110
111 Args:
112 live_state: Either a ``pathlib.Path`` pointing to ``muse-work/``
113 or an existing snapshot dict (returned as-is).
114
115 Returns:
116 A JSON-serialisable ``{"files": {path: sha256}, "domain": "music"}``
117 dict. The ``files`` mapping is the canonical snapshot manifest used
118 by the core VCS engine for commit / checkout / diff.
119
120 Ignore rules
121 ------------
122 When *live_state* is a ``pathlib.Path``, the plugin reads
123 ``.museignore`` from the repository root (the parent of ``muse-work/``)
124 and excludes any matching paths from the snapshot. Dotfiles are always
125 excluded regardless of ``.museignore``.
126 """
127 if isinstance(live_state, pathlib.Path):
128 from muse.core.ignore import is_ignored, load_patterns
129 workdir = live_state
130 repo_root = workdir.parent
131 patterns = load_patterns(repo_root)
132 files: dict[str, str] = {}
133 for file_path in sorted(workdir.rglob("*")):
134 if not file_path.is_file():
135 continue
136 if file_path.name.startswith("."):
137 continue
138 rel = file_path.relative_to(workdir).as_posix()
139 if is_ignored(rel, patterns):
140 continue
141 files[rel] = _hash_file(file_path)
142 return SnapshotManifest(files=files, domain=_DOMAIN_TAG)
143
144 return live_state
145
146 # ------------------------------------------------------------------
147 # 2. diff — compute the structured delta between two snapshots
148 # ------------------------------------------------------------------
149
150 def diff(
151 self,
152 base: StateSnapshot,
153 target: StateSnapshot,
154 *,
155 repo_root: pathlib.Path | None = None,
156 ) -> StateDelta:
157 """Compute a ``StructuredDelta`` between two music snapshots.
158
159 File additions and removals produce ``InsertOp`` and ``DeleteOp``
160 entries respectively. For modified files:
161
162 - ``.mid`` files: when ``repo_root`` is provided, load the MIDI bytes
163 from the object store and produce a ``PatchOp`` with note-level
164 ``child_ops`` from the Myers LCS diff. Falls back to ``ReplaceOp``
165 when the object store is unavailable or parsing fails.
166 - All other files: ``ReplaceOp`` with file-level content IDs.
167
168 Args:
169 base: The ancestor snapshot.
170 target: The later snapshot.
171 repo_root: Repository root directory. When provided, MIDI files are
172 loaded from ``.muse/objects/`` for note-level diffing.
173
174 Returns:
175 A ``StructuredDelta`` whose ``ops`` list transforms *base* into
176 *target* and whose ``summary`` is human-readable.
177 """
178 base_files = base["files"]
179 target_files = target["files"]
180
181 base_paths = set(base_files)
182 target_paths = set(target_files)
183
184 ops: list[DomainOp] = []
185
186 # Added files → InsertOp
187 for path in sorted(target_paths - base_paths):
188 ops.append(
189 InsertOp(
190 op="insert",
191 address=path,
192 position=None,
193 content_id=target_files[path],
194 content_summary=f"new file: {path}",
195 )
196 )
197
198 # Removed files → DeleteOp
199 for path in sorted(base_paths - target_paths):
200 ops.append(
201 DeleteOp(
202 op="delete",
203 address=path,
204 position=None,
205 content_id=base_files[path],
206 content_summary=f"deleted: {path}",
207 )
208 )
209
210 # Modified files
211 for path in sorted(
212 p for p in base_paths & target_paths if base_files[p] != target_files[p]
213 ):
214 op = _diff_modified_file(
215 path=path,
216 old_hash=base_files[path],
217 new_hash=target_files[path],
218 repo_root=repo_root,
219 )
220 ops.append(op)
221
222 summary = _summarise_ops(ops)
223 return StructuredDelta(domain=_DOMAIN_TAG, ops=ops, summary=summary)
224
225 # ------------------------------------------------------------------
226 # 3. merge — three-way reconciliation
227 # ------------------------------------------------------------------
228
229 def merge(
230 self,
231 base: StateSnapshot,
232 left: StateSnapshot,
233 right: StateSnapshot,
234 *,
235 repo_root: pathlib.Path | None = None,
236 ) -> MergeResult:
237 """Three-way merge two divergent music state lines against a common base.
238
239 A file is auto-merged when only one side changed it. When both sides
240 changed the same file, the merge proceeds in two stages:
241
242 1. **File-level strategy** — if ``.museattributes`` contains an
243 ``ours`` or ``theirs`` rule matching the path (dimension ``"*"``),
244 the rule is applied and the file is removed from the conflict list.
245
246 2. **Dimension-level merge** — for ``.mid`` files that survive the
247 file-level check, the MIDI event stream is split into orthogonal
248 dimension slices (notes/melodic, harmonic, dynamic, structural).
249 Each dimension is merged independently. Dimension-specific
250 ``ours``/``theirs`` rules in ``.museattributes`` are honoured.
251 Only dimensions where *both* sides changed AND no resolvable rule
252 exists cause a true file-level conflict.
253
254 3. **Manual override** — ``manual`` strategy in ``.museattributes``
255 forces a path into the conflict list even when the engine would
256 normally auto-resolve it.
257
258 Args:
259 base: Snapshot at the common ancestor commit.
260 left: Snapshot for the *ours* (current) branch. The distinction
261 between ``left`` and ``right`` only affects the ``applied_strategies``
262 key in the result; the merge is symmetric for clean paths.
263 right: Snapshot for the *theirs* (incoming) branch.
264 repo_root: Path to the repository root so ``.museattributes`` and the
265 object store can be located. ``None`` disables attribute
266 loading and MIDI reconstruction (all conflicts become hard).
267
268 Returns:
269 A :class:`~muse.domain.MergeResult` whose ``snapshot`` holds the
270 merged manifest (conflict paths absent), ``conflicts`` lists the
271 unresolvable paths, and ``applied_strategies`` records which
272 ``.museattributes`` rules were used.
273 """
274 import hashlib as _hashlib
275
276 from muse.core.attributes import load_attributes, resolve_strategy
277 from muse.core.object_store import read_object, write_object
278 from muse.plugins.music.midi_merge import merge_midi_dimensions
279
280 base_files = base["files"]
281 left_files = left["files"]
282 right_files = right["files"]
283
284 attrs = load_attributes(repo_root, domain=_DOMAIN_TAG) if repo_root is not None else []
285
286 left_changed: set[str] = _changed_paths(base_files, left_files)
287 right_changed: set[str] = _changed_paths(base_files, right_files)
288 all_conflict_paths: set[str] = left_changed & right_changed
289
290 merged: dict[str, str] = dict(base_files)
291
292 # Apply clean single-side changes first.
293 for path in left_changed - all_conflict_paths:
294 if path in left_files:
295 merged[path] = left_files[path]
296 else:
297 merged.pop(path, None)
298
299 for path in right_changed - all_conflict_paths:
300 if path in right_files:
301 merged[path] = right_files[path]
302 else:
303 merged.pop(path, None)
304
305 # Consensus deletions (both sides removed the same file) — not a conflict.
306 consensus_deleted = {
307 p for p in all_conflict_paths
308 if p not in left_files and p not in right_files
309 }
310 for path in consensus_deleted:
311 merged.pop(path, None)
312
313 real_conflicts: set[str] = all_conflict_paths - consensus_deleted
314
315 applied_strategies: dict[str, str] = {}
316 dimension_reports: dict[str, dict[str, str]] = {}
317 final_conflicts: list[str] = []
318
319 for path in sorted(real_conflicts):
320 file_strategy = resolve_strategy(attrs, path, "*")
321
322 if file_strategy == "ours":
323 if path in left_files:
324 merged[path] = left_files[path]
325 else:
326 merged.pop(path, None)
327 applied_strategies[path] = "ours"
328 continue
329
330 if file_strategy == "theirs":
331 if path in right_files:
332 merged[path] = right_files[path]
333 else:
334 merged.pop(path, None)
335 applied_strategies[path] = "theirs"
336 continue
337
338 if (
339 repo_root is not None
340 and path.lower().endswith(".mid")
341 and path in left_files
342 and path in right_files
343 and path in base_files
344 ):
345 base_obj = read_object(repo_root, base_files[path])
346 left_obj = read_object(repo_root, left_files[path])
347 right_obj = read_object(repo_root, right_files[path])
348
349 if base_obj is not None and left_obj is not None and right_obj is not None:
350 try:
351 dim_result = merge_midi_dimensions(
352 base_obj, left_obj, right_obj,
353 attrs,
354 path,
355 )
356 except ValueError:
357 dim_result = None
358
359 if dim_result is not None:
360 merged_bytes, dim_report = dim_result
361 new_hash = _hashlib.sha256(merged_bytes).hexdigest()
362 write_object(repo_root, new_hash, merged_bytes)
363 merged[path] = new_hash
364 applied_strategies[path] = "dimension-merge"
365 dimension_reports[path] = dim_report
366 continue
367
368 final_conflicts.append(path)
369
370 for path in sorted((left_changed | right_changed) - real_conflicts):
371 if path in consensus_deleted:
372 continue
373 if resolve_strategy(attrs, path, "*") == "manual":
374 final_conflicts.append(path)
375 applied_strategies[path] = "manual"
376 if path in base_files:
377 merged[path] = base_files[path]
378 else:
379 merged.pop(path, None)
380
381 return MergeResult(
382 merged=SnapshotManifest(files=merged, domain=_DOMAIN_TAG),
383 conflicts=sorted(final_conflicts),
384 applied_strategies=applied_strategies,
385 dimension_reports=dimension_reports,
386 )
387
388 # ------------------------------------------------------------------
389 # 4. drift — compare committed state vs live state
390 # ------------------------------------------------------------------
391
392 def drift(
393 self,
394 committed: StateSnapshot,
395 live: LiveState,
396 ) -> DriftReport:
397 """Detect uncommitted changes in ``muse-work/`` relative to *committed*.
398
399 Args:
400 committed: The last committed snapshot.
401 live: Either a ``pathlib.Path`` (``muse-work/``) or a snapshot
402 dict representing current live state.
403
404 Returns:
405 A :class:`~muse.domain.DriftReport` describing whether and how the
406 live state differs from the committed snapshot.
407 """
408 live_snapshot = self.snapshot(live)
409 delta = self.diff(committed, live_snapshot)
410
411 inserts = sum(1 for op in delta["ops"] if op["op"] == "insert")
412 deletes = sum(1 for op in delta["ops"] if op["op"] == "delete")
413 modified = sum(1 for op in delta["ops"] if op["op"] in ("replace", "patch"))
414 has_drift = bool(inserts or deletes or modified)
415
416 parts: list[str] = []
417 if inserts:
418 parts.append(f"{inserts} added")
419 if deletes:
420 parts.append(f"{deletes} removed")
421 if modified:
422 parts.append(f"{modified} modified")
423
424 summary = ", ".join(parts) if parts else "working tree clean"
425 return DriftReport(has_drift=has_drift, summary=summary, delta=delta)
426
427 # ------------------------------------------------------------------
428 # 5. apply — execute a delta against live state (checkout)
429 # ------------------------------------------------------------------
430
431 def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState:
432 """Apply a structured delta to produce a new live state.
433
434 When ``live_state`` is a ``pathlib.Path`` the physical files have
435 already been updated by the caller (``muse checkout`` restores objects
436 from the store before calling this). Rescanning the directory is the
437 cheapest correct way to reflect the new state.
438
439 When ``live_state`` is a snapshot dict, only ``DeleteOp`` and
440 ``ReplaceOp`` at the file level can be applied in-memory. ``InsertOp``
441 at the file level requires the new content to be on disk; callers that
442 need those should pass the workdir ``pathlib.Path`` instead.
443 ``PatchOp`` entries are skipped in-memory since reconstructing patched
444 file content requires both the original bytes and the object store.
445
446 Args:
447 delta: A ``StructuredDelta`` produced by :meth:`diff`.
448 live_state: The workdir path (preferred) or a snapshot dict.
449
450 Returns:
451 The updated live state as a ``SnapshotManifest``.
452 """
453 if isinstance(live_state, pathlib.Path):
454 return self.snapshot(live_state)
455
456 current_files = dict(live_state["files"])
457
458 for op in delta["ops"]:
459 if op["op"] == "delete":
460 current_files.pop(op["address"], None)
461 elif op["op"] == "replace":
462 current_files[op["address"]] = op["new_content_id"]
463 elif op["op"] == "insert":
464 current_files[op["address"]] = op["content_id"]
465 # PatchOp and MoveOp: skip in-memory — caller must use workdir path.
466
467 return SnapshotManifest(files=current_files, domain=_DOMAIN_TAG)
468
469 # ------------------------------------------------------------------
470 # 6. schema — declare structural schema for the algorithm library
471 # ------------------------------------------------------------------
472
473 def schema(self) -> DomainSchema:
474 """Return the full structural schema for the music domain.
475
476 Declares four semantic dimensions — melodic, harmonic, dynamic, and
477 structural — that the core diff algorithm library and OT merge
478 engine use to drive per-dimension operations.
479
480 Top level is a ``SetSchema``: the music workspace is an unordered
481 collection of audio/MIDI files, each identified by its SHA-256 content
482 hash.
483
484 Dimensions:
485
486 - **melodic** — the sequence of note events over time. LCS-diffed so
487 that insertions and deletions of individual notes are surfaced.
488 - **harmonic** — the sequence of chord events and key-signature changes.
489 LCS-diffed independently of the melodic dimension.
490 - **dynamic** — velocity and expression curves as a 1-D float tensor.
491 Epsilon of 1.0 ignores sub-1-velocity noise; sparse mode emits one
492 ``ReplaceOp`` per changed event.
493 - **structural** — track layout, time signatures, and tempo map as a
494 labeled ordered tree. Structural changes are non-independent: they
495 block merging all other dimensions until resolved, because a tempo
496 change shifts the meaning of every subsequent note position.
497 """
498 return DomainSchema(
499 domain=_DOMAIN_TAG,
500 description="MIDI and audio file versioning with note-level diff",
501 top_level=SetSchema(
502 kind="set",
503 element_type="audio_file",
504 identity="by_content",
505 ),
506 dimensions=[
507 DimensionSpec(
508 name="melodic",
509 description="Note pitches and durations over time",
510 schema=SequenceSchema(
511 kind="sequence",
512 element_type="note_event",
513 identity="by_position",
514 diff_algorithm="lcs",
515 alphabet=None,
516 ),
517 independent_merge=True,
518 ),
519 DimensionSpec(
520 name="harmonic",
521 description="Chord progressions and key signatures",
522 schema=SequenceSchema(
523 kind="sequence",
524 element_type="chord_event",
525 identity="by_position",
526 diff_algorithm="lcs",
527 alphabet=None,
528 ),
529 independent_merge=True,
530 ),
531 DimensionSpec(
532 name="dynamic",
533 description="Velocity and expression curves",
534 schema=TensorSchema(
535 kind="tensor",
536 dtype="float32",
537 rank=1,
538 epsilon=1.0,
539 diff_mode="sparse",
540 ),
541 independent_merge=True,
542 ),
543 DimensionSpec(
544 name="structural",
545 description="Track layout, time signatures, tempo map",
546 schema=TreeSchema(
547 kind="tree",
548 node_type="track_node",
549 diff_algorithm="zhang_shasha",
550 ),
551 independent_merge=False,
552 ),
553 ],
554 merge_mode="three_way",
555 schema_version=1,
556 )
557
558 # ------------------------------------------------------------------
559 # 7. merge_ops — operation-level OT merge (StructuredMergePlugin)
560 # ------------------------------------------------------------------
561
562 def merge_ops(
563 self,
564 base: StateSnapshot,
565 ours_snap: StateSnapshot,
566 theirs_snap: StateSnapshot,
567 ours_ops: list[DomainOp],
568 theirs_ops: list[DomainOp],
569 *,
570 repo_root: pathlib.Path | None = None,
571 ) -> MergeResult:
572 """Operation-level three-way merge using the OT engine.
573
574 Extends the file-level ``merge()`` method with sub-file granularity: two
575 changes to non-overlapping notes in the same MIDI file no longer produce
576 a conflict.
577
578 Algorithm
579 ---------
580 1. Run :func:`~muse.core.op_transform.merge_op_lists` on the flat op
581 lists to classify each (ours, theirs) pair as commuting or
582 conflicting.
583 2. Build the merged manifest from *base* by applying all clean merged
584 ops. ``InsertOp`` and ``ReplaceOp`` entries supply a ``content_id``
585 / ``new_content_id`` directly. For ``PatchOp`` entries (sub-file
586 note changes), the final file hash is looked up from *ours_snap* or
587 *theirs_snap*. When both sides produced a ``PatchOp`` for the same
588 MIDI file and the note-level ops commute, an attempt is made to
589 reconstruct the merged MIDI bytes; on failure the file falls back to
590 a conflict.
591 3. For conflicting pairs, consult ``.museattributes``. Strategies
592 ``"ours"`` and ``"theirs"`` are applied automatically; everything
593 else enters ``MergeResult.conflicts``.
594
595 Args:
596 base: Common ancestor snapshot.
597 ours_snap: Final snapshot of our branch.
598 theirs_snap: Final snapshot of their branch.
599 ours_ops: Operations from our branch delta (base → ours).
600 theirs_ops: Operations from their branch delta (base → theirs).
601 repo_root: Repository root for object store and attributes.
602
603 Returns:
604 A :class:`~muse.domain.MergeResult` with the reconciled snapshot
605 and any remaining unresolvable conflicts.
606 """
607 from muse.core.attributes import load_attributes, resolve_strategy
608 from muse.core.op_transform import merge_op_lists
609
610 attrs = load_attributes(repo_root, domain=_DOMAIN_TAG) if repo_root is not None else []
611
612 # OT classification: find commuting and conflicting op pairs.
613 ot_result = merge_op_lists([], ours_ops, theirs_ops)
614
615 # Build the merged manifest starting from base.
616 merged_files: dict[str, str] = dict(base["files"])
617 applied_strategies: dict[str, str] = {}
618 final_conflicts: list[str] = []
619 op_log: list[DomainOp] = list(ot_result.merged_ops)
620
621 # Group PatchOps by address so we can detect same-file note merges.
622 ours_patches: dict[str, PatchOp] = {}
623 theirs_patches: dict[str, PatchOp] = {}
624 for op in ours_ops:
625 if op["op"] == "patch":
626 ours_patches[op["address"]] = op
627 for op in theirs_ops:
628 if op["op"] == "patch":
629 theirs_patches[op["address"]] = op
630
631 # Track which addresses are involved in a conflict.
632 conflicting_addresses: set[str] = {
633 our_op["address"] for our_op, _ in ot_result.conflict_ops
634 }
635
636 # --- Apply clean merged ops ---
637 for op in ot_result.merged_ops:
638 addr = op["address"]
639 if addr in conflicting_addresses:
640 continue # handled in conflict resolution below
641
642 if op["op"] == "insert":
643 merged_files[addr] = op["content_id"]
644
645 elif op["op"] == "delete":
646 merged_files.pop(addr, None)
647
648 elif op["op"] == "replace":
649 merged_files[addr] = op["new_content_id"]
650
651 elif op["op"] == "patch":
652 # PatchOp: determine which side(s) patched this file.
653 has_ours = addr in ours_patches
654 has_theirs = addr in theirs_patches
655
656 if has_ours and not has_theirs:
657 # Only our side changed this file — take our version.
658 if addr in ours_snap["files"]:
659 merged_files[addr] = ours_snap["files"][addr]
660 else:
661 merged_files.pop(addr, None)
662
663 elif has_theirs and not has_ours:
664 # Only their side changed this file — take their version.
665 if addr in theirs_snap["files"]:
666 merged_files[addr] = theirs_snap["files"][addr]
667 else:
668 merged_files.pop(addr, None)
669
670 else:
671 # Both sides patched the same file with commuting note ops.
672 # Attempt note-level MIDI reconstruction.
673 merged_content_id = _merge_patch_ops(
674 addr=addr,
675 ours_patch=ours_patches[addr],
676 theirs_patch=theirs_patches[addr],
677 base_files=dict(base["files"]),
678 ours_snap_files=dict(ours_snap["files"]),
679 theirs_snap_files=dict(theirs_snap["files"]),
680 repo_root=repo_root,
681 )
682 if merged_content_id is not None:
683 merged_files[addr] = merged_content_id
684 else:
685 # Reconstruction failed — treat as manual conflict.
686 final_conflicts.append(addr)
687
688 # --- Resolve conflicts ---
689 for our_op, their_op in ot_result.conflict_ops:
690 addr = our_op["address"]
691 strategy = resolve_strategy(attrs, addr, "*")
692
693 if strategy == "ours":
694 if addr in ours_snap["files"]:
695 merged_files[addr] = ours_snap["files"][addr]
696 else:
697 merged_files.pop(addr, None)
698 applied_strategies[addr] = "ours"
699
700 elif strategy == "theirs":
701 if addr in theirs_snap["files"]:
702 merged_files[addr] = theirs_snap["files"][addr]
703 else:
704 merged_files.pop(addr, None)
705 applied_strategies[addr] = "theirs"
706
707 else:
708 # Strategy "manual" or "auto" without a clear resolution.
709 final_conflicts.append(addr)
710
711 return MergeResult(
712 merged=SnapshotManifest(files=merged_files, domain=_DOMAIN_TAG),
713 conflicts=sorted(set(final_conflicts)),
714 applied_strategies=applied_strategies,
715 op_log=op_log,
716 )
717
718
719 # ---------------------------------------------------------------------------
720 # Module-level helpers
721 # ---------------------------------------------------------------------------
722
723
724 def _merge_patch_ops(
725 *,
726 addr: str,
727 ours_patch: PatchOp,
728 theirs_patch: PatchOp,
729 base_files: dict[str, str],
730 ours_snap_files: dict[str, str],
731 theirs_snap_files: dict[str, str],
732 repo_root: pathlib.Path | None,
733 ) -> str | None:
734 """Attempt note-level MIDI merge for two ``PatchOp``\\s on the same file.
735
736 Runs OT on the child_ops of each PatchOp. If the note-level ops all
737 commute, reconstructs the merged MIDI by:
738
739 1. Loading base, ours, and theirs MIDI bytes from the object store.
740 2. Extracting note sequences from all three versions.
741 3. Building ``content_id → NoteKey`` look-ups for the ours and theirs
742 sequences (so that InsertOp content IDs can be resolved to real notes).
743 4. Applying the merged note ops (deletions then insertions) to the base
744 note sequence.
745 5. Calling :func:`~muse.plugins.music.midi_diff.reconstruct_midi` and
746 storing the resulting bytes.
747
748 Returns the SHA-256 hash of the reconstructed MIDI (ready to store in the
749 object store) on success, or ``None`` when:
750
751 - *repo_root* is ``None`` (cannot access object store).
752 - Base or branch bytes are not in the local object store.
753 - Note-level OT found conflicts.
754 - MIDI reconstruction raised any exception.
755
756 Args:
757 addr: Workspace-relative MIDI file path.
758 ours_patch: Our PatchOp for this file.
759 theirs_patch: Their PatchOp for this file.
760 base_files: Content-ID map for the common ancestor snapshot.
761 ours_snap_files: Content-ID map for our branch's final snapshot.
762 theirs_snap_files: Content-ID map for their branch's final snapshot.
763 repo_root: Repository root for object store access.
764
765 Returns:
766 Content-ID (SHA-256 hex) of the merged MIDI, or ``None`` on failure.
767 """
768 if repo_root is None or addr not in base_files:
769 return None
770
771 from muse.core.object_store import read_object, write_object
772 from muse.core.op_transform import merge_op_lists
773 from muse.plugins.music.midi_diff import NoteKey, extract_notes, reconstruct_midi
774
775 # Run OT on note-level ops to classify conflicts.
776 note_result = merge_op_lists([], ours_patch["child_ops"], theirs_patch["child_ops"])
777 if not note_result.is_clean:
778 logger.debug(
779 "⚠️ Note-level conflict in %r: %d pair(s) — falling back to file conflict",
780 addr,
781 len(note_result.conflict_ops),
782 )
783 return None
784
785 try:
786 base_bytes = read_object(repo_root, base_files[addr])
787 if base_bytes is None:
788 return None
789
790 ours_hash = ours_snap_files.get(addr)
791 theirs_hash = theirs_snap_files.get(addr)
792 ours_bytes = read_object(repo_root, ours_hash) if ours_hash else None
793 theirs_bytes = read_object(repo_root, theirs_hash) if theirs_hash else None
794
795 base_notes, ticks_per_beat = extract_notes(base_bytes)
796
797 # Build content_id → NoteKey lookups from ours and theirs versions.
798 ours_by_id: dict[str, NoteKey] = {}
799 if ours_bytes is not None:
800 ours_notes, _ = extract_notes(ours_bytes)
801 ours_by_id = {_note_content_id(n): n for n in ours_notes}
802
803 theirs_by_id: dict[str, NoteKey] = {}
804 if theirs_bytes is not None:
805 theirs_notes, _ = extract_notes(theirs_bytes)
806 theirs_by_id = {_note_content_id(n): n for n in theirs_notes}
807
808 # Collect content IDs to delete.
809 delete_ids: set[str] = {
810 op["content_id"] for op in note_result.merged_ops if op["op"] == "delete"
811 }
812
813 # Apply deletions to base note list.
814 base_note_by_id = {_note_content_id(n): n for n in base_notes}
815 surviving: list[NoteKey] = [
816 n for n in base_notes if _note_content_id(n) not in delete_ids
817 ]
818
819 # Collect insertions: resolve content_id → NoteKey via ours then theirs.
820 inserted: list[NoteKey] = []
821 for op in note_result.merged_ops:
822 if op["op"] == "insert":
823 cid = op["content_id"]
824 note = ours_by_id.get(cid) or theirs_by_id.get(cid)
825 if note is None:
826 # Fallback: base itself shouldn't have it, but check anyway.
827 note = base_note_by_id.get(cid)
828 if note is None:
829 logger.debug(
830 "⚠️ Cannot resolve note content_id %s for %r — skipping",
831 cid[:12],
832 addr,
833 )
834 continue
835 inserted.append(note)
836
837 merged_notes = surviving + inserted
838 merged_bytes = reconstruct_midi(merged_notes, ticks_per_beat=ticks_per_beat)
839
840 merged_hash = hashlib.sha256(merged_bytes).hexdigest()
841 write_object(repo_root, merged_hash, merged_bytes)
842
843 logger.info(
844 "✅ Note-level MIDI merge for %r: %d ops clean, %d notes in result",
845 addr,
846 len(note_result.merged_ops),
847 len(merged_notes),
848 )
849 return merged_hash
850
851 except Exception as exc: # noqa: BLE001 intentional broad catch
852 logger.debug("⚠️ MIDI note-level reconstruction failed for %r: %s", addr, exc)
853 return None
854
855
856 def _note_content_id(note: NoteKey) -> str:
857 """Return the SHA-256 content ID for a :class:`~muse.plugins.music.midi_diff.NoteKey`.
858
859 Delegates to the same algorithm used in :mod:`muse.plugins.music.midi_diff`
860 so that content IDs computed here are identical to those stored in
861 ``InsertOp`` / ``DeleteOp`` entries.
862 """
863 payload = (
864 f"{note['pitch']}:{note['velocity']}:"
865 f"{note['start_tick']}:{note['duration_ticks']}:{note['channel']}"
866 )
867 return hashlib.sha256(payload.encode()).hexdigest()
868
869
870 def _diff_modified_file(
871 *,
872 path: str,
873 old_hash: str,
874 new_hash: str,
875 repo_root: pathlib.Path | None,
876 ) -> DomainOp:
877 """Produce the richest available operation for a modified file.
878
879 For ``.mid`` files where both content revisions are readable from the
880 object store, performs a full note-level MIDI diff and returns a
881 ``PatchOp`` carrying the individual ``InsertOp``/``DeleteOp`` child
882 operations. Falls back to a ``ReplaceOp`` (opaque before/after hash
883 pair) when the file is not a MIDI file, ``repo_root`` is ``None``, or
884 either content revision cannot be retrieved from the store.
885
886 Args:
887 path: Workspace-relative POSIX path of the modified file.
888 old_hash: SHA-256 of the base content in the object store.
889 new_hash: SHA-256 of the current content in the object store.
890 repo_root: Repository root for object store access. ``None`` forces
891 immediate fallback to ``ReplaceOp``.
892
893 Returns:
894 A ``PatchOp`` with note-level child ops when deep diff succeeds,
895 otherwise a ``ReplaceOp`` with the opaque before/after content hashes.
896 """
897 if path.lower().endswith(".mid") and repo_root is not None:
898 from muse.core.object_store import read_object
899 from muse.plugins.music.midi_diff import diff_midi_notes
900
901 base_bytes = read_object(repo_root, old_hash)
902 target_bytes = read_object(repo_root, new_hash)
903
904 if base_bytes is not None and target_bytes is not None:
905 try:
906 child_delta = diff_midi_notes(
907 base_bytes, target_bytes, file_path=path
908 )
909 return PatchOp(
910 op="patch",
911 address=path,
912 child_ops=child_delta["ops"],
913 child_domain=child_delta["domain"],
914 child_summary=child_delta["summary"],
915 )
916 except (ValueError, Exception) as exc:
917 logger.debug("⚠️ MIDI deep diff failed for %r: %s", path, exc)
918
919 return ReplaceOp(
920 op="replace",
921 address=path,
922 position=None,
923 old_content_id=old_hash,
924 new_content_id=new_hash,
925 old_summary=f"{path} (previous)",
926 new_summary=f"{path} (updated)",
927 )
928
929
930 def _summarise_ops(ops: list[DomainOp]) -> str:
931 """Build a human-readable summary string from a list of domain ops."""
932 inserts = 0
933 deletes = 0
934 replaces = 0
935 patches = 0
936
937 for op in ops:
938 kind = op["op"]
939 if kind == "insert":
940 inserts += 1
941 elif kind == "delete":
942 deletes += 1
943 elif kind == "replace":
944 replaces += 1
945 elif kind == "patch":
946 patches += 1
947
948 parts: list[str] = []
949 if inserts:
950 parts.append(f"{inserts} file{'s' if inserts != 1 else ''} added")
951 if deletes:
952 parts.append(f"{deletes} file{'s' if deletes != 1 else ''} removed")
953 if replaces:
954 parts.append(f"{replaces} file{'s' if replaces != 1 else ''} modified")
955 if patches:
956 parts.append(f"{patches} file{'s' if patches != 1 else ''} patched")
957
958 return ", ".join(parts) if parts else "no changes"
959
960
961 def _hash_file(path: pathlib.Path) -> str:
962 """Return the SHA-256 hex digest of a file's raw bytes."""
963 h = hashlib.sha256()
964 with path.open("rb") as fh:
965 for chunk in iter(lambda: fh.read(65536), b""):
966 h.update(chunk)
967 return h.hexdigest()
968
969
970 def _changed_paths(
971 base: dict[str, str], other: dict[str, str]
972 ) -> set[str]:
973 """Return paths that differ between *base* and *other*."""
974 base_p = set(base)
975 other_p = set(other)
976 added = other_p - base_p
977 deleted = base_p - other_p
978 common = base_p & other_p
979 modified = {p for p in common if base[p] != other[p]}
980 return added | deleted | modified
981
982
983 def content_hash(snapshot: StateSnapshot) -> str:
984 """Return a stable SHA-256 digest of a snapshot for content-addressing."""
985 canonical = json.dumps(snapshot, sort_keys=True, separators=(",", ":"))
986 return hashlib.sha256(canonical.encode()).hexdigest()
987
988
989 #: Module-level singleton — import and use directly.
990 plugin = MusicPlugin()
991
992 assert isinstance(plugin, MuseDomainPlugin), (
993 "MusicPlugin does not satisfy the MuseDomainPlugin protocol"
994 )
995 assert isinstance(plugin, StructuredMergePlugin), (
996 "MusicPlugin does not satisfy the StructuredMergePlugin protocol"
997 )