plugin.py
python
| 1 | """Music domain plugin — reference implementation of :class:`MuseDomainPlugin`. |
| 2 | |
| 3 | This plugin implements the six Muse domain interfaces for MIDI state: |
| 4 | notes, velocities, controller events (CC), pitch bends, and aftertouch. |
| 5 | |
| 6 | It is the domain that proved the abstraction. Every other domain — scientific |
| 7 | simulation, genomics, 3D spatial design — is a new plugin that implements |
| 8 | the same six interfaces. |
| 9 | |
| 10 | Live State |
| 11 | ---------- |
| 12 | For the music domain, ``LiveState`` is either: |
| 13 | |
| 14 | 1. A ``muse-work/`` directory path (``pathlib.Path``) — the CLI path where |
| 15 | MIDI files live on disk and are managed by ``muse commit / checkout``. |
| 16 | 2. A dict snapshot previously captured by :meth:`snapshot` — used when |
| 17 | constructing merges and diffs in memory. |
| 18 | |
| 19 | Both forms are supported. The plugin detects which form it received by |
| 20 | checking for ``pathlib.Path`` vs ``dict``. |
| 21 | |
| 22 | Snapshot Format |
| 23 | --------------- |
| 24 | A music snapshot is a JSON-serialisable dict: |
| 25 | |
| 26 | .. code-block:: json |
| 27 | |
| 28 | { |
| 29 | "files": { |
| 30 | "tracks/drums.mid": "<sha256>", |
| 31 | "tracks/bass.mid": "<sha256>" |
| 32 | }, |
| 33 | "domain": "music" |
| 34 | } |
| 35 | |
| 36 | The ``files`` key maps POSIX paths (relative to ``muse-work/``) to their |
| 37 | SHA-256 content digests. |
| 38 | |
| 39 | Delta Format (Phase 1) |
| 40 | ---------------------- |
| 41 | ``diff()`` returns a ``StructuredDelta`` with typed ``DomainOp`` entries: |
| 42 | |
| 43 | - ``InsertOp`` — a file was added (``content_id`` = its SHA-256 hash). |
| 44 | - ``DeleteOp`` — a file was removed. |
| 45 | - ``ReplaceOp`` — a non-MIDI file's content changed. |
| 46 | - ``PatchOp`` — a ``.mid`` file changed; ``child_ops`` contains note-level |
| 47 | ``InsertOp`` / ``DeleteOp`` entries from the Myers LCS diff. |
| 48 | |
| 49 | When ``repo_root`` is available, MIDI files are loaded from the object store |
| 50 | and diffed at note level. Without it, modified ``.mid`` files fall back to |
| 51 | ``ReplaceOp``. |
| 52 | """ |
| 53 | from __future__ import annotations |
| 54 | |
| 55 | import hashlib |
| 56 | import json |
| 57 | import logging |
| 58 | import pathlib |
| 59 | |
| 60 | from muse.core.schema import ( |
| 61 | DimensionSpec, |
| 62 | DomainSchema, |
| 63 | SequenceSchema, |
| 64 | SetSchema, |
| 65 | TensorSchema, |
| 66 | TreeSchema, |
| 67 | ) |
| 68 | from muse.domain import ( |
| 69 | DeleteOp, |
| 70 | DomainOp, |
| 71 | DriftReport, |
| 72 | InsertOp, |
| 73 | LiveState, |
| 74 | MergeResult, |
| 75 | MuseDomainPlugin, |
| 76 | PatchOp, |
| 77 | ReplaceOp, |
| 78 | SnapshotManifest, |
| 79 | StateDelta, |
| 80 | StateSnapshot, |
| 81 | StructuredDelta, |
| 82 | StructuredMergePlugin, |
| 83 | ) |
| 84 | from muse.plugins.music.midi_diff import NoteKey |
| 85 | |
| 86 | logger = logging.getLogger(__name__) |
| 87 | |
| 88 | _DOMAIN_TAG = "music" |
| 89 | |
| 90 | |
| 91 | class MusicPlugin: |
| 92 | """Music domain plugin for the Muse VCS. |
| 93 | |
| 94 | Implements :class:`~muse.domain.MuseDomainPlugin` (six core interfaces) |
| 95 | and :class:`~muse.domain.StructuredMergePlugin` (Phase 3 operation-level |
| 96 | merge) for MIDI state stored as files in ``muse-work/``. |
| 97 | |
| 98 | This is the reference implementation. Every other domain plugin implements |
| 99 | the same six core interfaces; the :class:`~muse.domain.StructuredMergePlugin` |
| 100 | extension is optional but strongly recommended for domains that produce |
| 101 | note-level (sub-file) diffs. |
| 102 | """ |
| 103 | |
| 104 | # ------------------------------------------------------------------ |
| 105 | # 1. snapshot — capture live state as a content-addressed dict |
| 106 | # ------------------------------------------------------------------ |
| 107 | |
| 108 | def snapshot(self, live_state: LiveState) -> StateSnapshot: |
| 109 | """Capture the current ``muse-work/`` directory as a snapshot dict. |
| 110 | |
| 111 | Args: |
| 112 | live_state: Either a ``pathlib.Path`` pointing to ``muse-work/`` |
| 113 | or an existing snapshot dict (returned as-is). |
| 114 | |
| 115 | Returns: |
| 116 | A JSON-serialisable ``{"files": {path: sha256}, "domain": "music"}`` |
| 117 | dict. The ``files`` mapping is the canonical snapshot manifest used |
| 118 | by the core VCS engine for commit / checkout / diff. |
| 119 | |
| 120 | Ignore rules |
| 121 | ------------ |
| 122 | When *live_state* is a ``pathlib.Path``, the plugin reads |
| 123 | ``.museignore`` from the repository root (the parent of ``muse-work/``) |
| 124 | and excludes any matching paths from the snapshot. Dotfiles are always |
| 125 | excluded regardless of ``.museignore``. |
| 126 | """ |
| 127 | if isinstance(live_state, pathlib.Path): |
| 128 | from muse.core.ignore import is_ignored, load_patterns |
| 129 | workdir = live_state |
| 130 | repo_root = workdir.parent |
| 131 | patterns = load_patterns(repo_root) |
| 132 | files: dict[str, str] = {} |
| 133 | for file_path in sorted(workdir.rglob("*")): |
| 134 | if not file_path.is_file(): |
| 135 | continue |
| 136 | if file_path.name.startswith("."): |
| 137 | continue |
| 138 | rel = file_path.relative_to(workdir).as_posix() |
| 139 | if is_ignored(rel, patterns): |
| 140 | continue |
| 141 | files[rel] = _hash_file(file_path) |
| 142 | return SnapshotManifest(files=files, domain=_DOMAIN_TAG) |
| 143 | |
| 144 | return live_state |
| 145 | |
| 146 | # ------------------------------------------------------------------ |
| 147 | # 2. diff — compute the structured delta between two snapshots |
| 148 | # ------------------------------------------------------------------ |
| 149 | |
| 150 | def diff( |
| 151 | self, |
| 152 | base: StateSnapshot, |
| 153 | target: StateSnapshot, |
| 154 | *, |
| 155 | repo_root: pathlib.Path | None = None, |
| 156 | ) -> StateDelta: |
| 157 | """Compute a ``StructuredDelta`` between two music snapshots. |
| 158 | |
| 159 | File additions and removals produce ``InsertOp`` and ``DeleteOp`` |
| 160 | entries respectively. For modified files: |
| 161 | |
| 162 | - ``.mid`` files: when ``repo_root`` is provided, load the MIDI bytes |
| 163 | from the object store and produce a ``PatchOp`` with note-level |
| 164 | ``child_ops`` from the Myers LCS diff. Falls back to ``ReplaceOp`` |
| 165 | when the object store is unavailable or parsing fails. |
| 166 | - All other files: ``ReplaceOp`` with file-level content IDs. |
| 167 | |
| 168 | Args: |
| 169 | base: The ancestor snapshot. |
| 170 | target: The later snapshot. |
| 171 | repo_root: Repository root directory. When provided, MIDI files are |
| 172 | loaded from ``.muse/objects/`` for note-level diffing. |
| 173 | |
| 174 | Returns: |
| 175 | A ``StructuredDelta`` whose ``ops`` list transforms *base* into |
| 176 | *target* and whose ``summary`` is human-readable. |
| 177 | """ |
| 178 | base_files = base["files"] |
| 179 | target_files = target["files"] |
| 180 | |
| 181 | base_paths = set(base_files) |
| 182 | target_paths = set(target_files) |
| 183 | |
| 184 | ops: list[DomainOp] = [] |
| 185 | |
| 186 | # Added files → InsertOp |
| 187 | for path in sorted(target_paths - base_paths): |
| 188 | ops.append( |
| 189 | InsertOp( |
| 190 | op="insert", |
| 191 | address=path, |
| 192 | position=None, |
| 193 | content_id=target_files[path], |
| 194 | content_summary=f"new file: {path}", |
| 195 | ) |
| 196 | ) |
| 197 | |
| 198 | # Removed files → DeleteOp |
| 199 | for path in sorted(base_paths - target_paths): |
| 200 | ops.append( |
| 201 | DeleteOp( |
| 202 | op="delete", |
| 203 | address=path, |
| 204 | position=None, |
| 205 | content_id=base_files[path], |
| 206 | content_summary=f"deleted: {path}", |
| 207 | ) |
| 208 | ) |
| 209 | |
| 210 | # Modified files |
| 211 | for path in sorted( |
| 212 | p for p in base_paths & target_paths if base_files[p] != target_files[p] |
| 213 | ): |
| 214 | op = _diff_modified_file( |
| 215 | path=path, |
| 216 | old_hash=base_files[path], |
| 217 | new_hash=target_files[path], |
| 218 | repo_root=repo_root, |
| 219 | ) |
| 220 | ops.append(op) |
| 221 | |
| 222 | summary = _summarise_ops(ops) |
| 223 | return StructuredDelta(domain=_DOMAIN_TAG, ops=ops, summary=summary) |
| 224 | |
| 225 | # ------------------------------------------------------------------ |
| 226 | # 3. merge — three-way reconciliation |
| 227 | # ------------------------------------------------------------------ |
| 228 | |
| 229 | def merge( |
| 230 | self, |
| 231 | base: StateSnapshot, |
| 232 | left: StateSnapshot, |
| 233 | right: StateSnapshot, |
| 234 | *, |
| 235 | repo_root: pathlib.Path | None = None, |
| 236 | ) -> MergeResult: |
| 237 | """Three-way merge two divergent music state lines against a common base. |
| 238 | |
| 239 | A file is auto-merged when only one side changed it. When both sides |
| 240 | changed the same file, the merge proceeds in two stages: |
| 241 | |
| 242 | 1. **File-level strategy** — if ``.museattributes`` contains an |
| 243 | ``ours`` or ``theirs`` rule matching the path (dimension ``"*"``), |
| 244 | the rule is applied and the file is removed from the conflict list. |
| 245 | |
| 246 | 2. **Dimension-level merge** — for ``.mid`` files that survive the |
| 247 | file-level check, the MIDI event stream is split into orthogonal |
| 248 | dimension slices (notes/melodic, harmonic, dynamic, structural). |
| 249 | Each dimension is merged independently. Dimension-specific |
| 250 | ``ours``/``theirs`` rules in ``.museattributes`` are honoured. |
| 251 | Only dimensions where *both* sides changed AND no resolvable rule |
| 252 | exists cause a true file-level conflict. |
| 253 | |
| 254 | 3. **Manual override** — ``manual`` strategy in ``.museattributes`` |
| 255 | forces a path into the conflict list even when the engine would |
| 256 | normally auto-resolve it. |
| 257 | """ |
| 258 | import hashlib as _hashlib |
| 259 | |
| 260 | from muse.core.attributes import load_attributes, resolve_strategy |
| 261 | from muse.core.object_store import read_object, write_object |
| 262 | from muse.plugins.music.midi_merge import merge_midi_dimensions |
| 263 | |
| 264 | base_files = base["files"] |
| 265 | left_files = left["files"] |
| 266 | right_files = right["files"] |
| 267 | |
| 268 | attrs = load_attributes(repo_root) if repo_root is not None else [] |
| 269 | |
| 270 | left_changed: set[str] = _changed_paths(base_files, left_files) |
| 271 | right_changed: set[str] = _changed_paths(base_files, right_files) |
| 272 | all_conflict_paths: set[str] = left_changed & right_changed |
| 273 | |
| 274 | merged: dict[str, str] = dict(base_files) |
| 275 | |
| 276 | # Apply clean single-side changes first. |
| 277 | for path in left_changed - all_conflict_paths: |
| 278 | if path in left_files: |
| 279 | merged[path] = left_files[path] |
| 280 | else: |
| 281 | merged.pop(path, None) |
| 282 | |
| 283 | for path in right_changed - all_conflict_paths: |
| 284 | if path in right_files: |
| 285 | merged[path] = right_files[path] |
| 286 | else: |
| 287 | merged.pop(path, None) |
| 288 | |
| 289 | # Consensus deletions (both sides removed the same file) — not a conflict. |
| 290 | consensus_deleted = { |
| 291 | p for p in all_conflict_paths |
| 292 | if p not in left_files and p not in right_files |
| 293 | } |
| 294 | for path in consensus_deleted: |
| 295 | merged.pop(path, None) |
| 296 | |
| 297 | real_conflicts: set[str] = all_conflict_paths - consensus_deleted |
| 298 | |
| 299 | applied_strategies: dict[str, str] = {} |
| 300 | dimension_reports: dict[str, dict[str, str]] = {} |
| 301 | final_conflicts: list[str] = [] |
| 302 | |
| 303 | for path in sorted(real_conflicts): |
| 304 | file_strategy = resolve_strategy(attrs, path, "*") |
| 305 | |
| 306 | if file_strategy == "ours": |
| 307 | if path in left_files: |
| 308 | merged[path] = left_files[path] |
| 309 | else: |
| 310 | merged.pop(path, None) |
| 311 | applied_strategies[path] = "ours" |
| 312 | continue |
| 313 | |
| 314 | if file_strategy == "theirs": |
| 315 | if path in right_files: |
| 316 | merged[path] = right_files[path] |
| 317 | else: |
| 318 | merged.pop(path, None) |
| 319 | applied_strategies[path] = "theirs" |
| 320 | continue |
| 321 | |
| 322 | if ( |
| 323 | repo_root is not None |
| 324 | and path.lower().endswith(".mid") |
| 325 | and path in left_files |
| 326 | and path in right_files |
| 327 | and path in base_files |
| 328 | ): |
| 329 | base_obj = read_object(repo_root, base_files[path]) |
| 330 | left_obj = read_object(repo_root, left_files[path]) |
| 331 | right_obj = read_object(repo_root, right_files[path]) |
| 332 | |
| 333 | if base_obj is not None and left_obj is not None and right_obj is not None: |
| 334 | try: |
| 335 | dim_result = merge_midi_dimensions( |
| 336 | base_obj, left_obj, right_obj, |
| 337 | attrs, |
| 338 | path, |
| 339 | ) |
| 340 | except ValueError: |
| 341 | dim_result = None |
| 342 | |
| 343 | if dim_result is not None: |
| 344 | merged_bytes, dim_report = dim_result |
| 345 | new_hash = _hashlib.sha256(merged_bytes).hexdigest() |
| 346 | write_object(repo_root, new_hash, merged_bytes) |
| 347 | merged[path] = new_hash |
| 348 | applied_strategies[path] = "dimension-merge" |
| 349 | dimension_reports[path] = dim_report |
| 350 | continue |
| 351 | |
| 352 | final_conflicts.append(path) |
| 353 | |
| 354 | for path in sorted((left_changed | right_changed) - real_conflicts): |
| 355 | if path in consensus_deleted: |
| 356 | continue |
| 357 | if resolve_strategy(attrs, path, "*") == "manual": |
| 358 | final_conflicts.append(path) |
| 359 | applied_strategies[path] = "manual" |
| 360 | if path in base_files: |
| 361 | merged[path] = base_files[path] |
| 362 | else: |
| 363 | merged.pop(path, None) |
| 364 | |
| 365 | return MergeResult( |
| 366 | merged=SnapshotManifest(files=merged, domain=_DOMAIN_TAG), |
| 367 | conflicts=sorted(final_conflicts), |
| 368 | applied_strategies=applied_strategies, |
| 369 | dimension_reports=dimension_reports, |
| 370 | ) |
| 371 | |
| 372 | # ------------------------------------------------------------------ |
| 373 | # 4. drift — compare committed state vs live state |
| 374 | # ------------------------------------------------------------------ |
| 375 | |
| 376 | def drift( |
| 377 | self, |
| 378 | committed: StateSnapshot, |
| 379 | live: LiveState, |
| 380 | ) -> DriftReport: |
| 381 | """Detect uncommitted changes in ``muse-work/`` relative to *committed*. |
| 382 | |
| 383 | Args: |
| 384 | committed: The last committed snapshot. |
| 385 | live: Either a ``pathlib.Path`` (``muse-work/``) or a snapshot |
| 386 | dict representing current live state. |
| 387 | |
| 388 | Returns: |
| 389 | A :class:`~muse.domain.DriftReport` describing whether and how the |
| 390 | live state differs from the committed snapshot. |
| 391 | """ |
| 392 | live_snapshot = self.snapshot(live) |
| 393 | delta = self.diff(committed, live_snapshot) |
| 394 | |
| 395 | inserts = sum(1 for op in delta["ops"] if op["op"] == "insert") |
| 396 | deletes = sum(1 for op in delta["ops"] if op["op"] == "delete") |
| 397 | modified = sum(1 for op in delta["ops"] if op["op"] in ("replace", "patch")) |
| 398 | has_drift = bool(inserts or deletes or modified) |
| 399 | |
| 400 | parts: list[str] = [] |
| 401 | if inserts: |
| 402 | parts.append(f"{inserts} added") |
| 403 | if deletes: |
| 404 | parts.append(f"{deletes} removed") |
| 405 | if modified: |
| 406 | parts.append(f"{modified} modified") |
| 407 | |
| 408 | summary = ", ".join(parts) if parts else "working tree clean" |
| 409 | return DriftReport(has_drift=has_drift, summary=summary, delta=delta) |
| 410 | |
| 411 | # ------------------------------------------------------------------ |
| 412 | # 5. apply — execute a delta against live state (checkout) |
| 413 | # ------------------------------------------------------------------ |
| 414 | |
| 415 | def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState: |
| 416 | """Apply a structured delta to produce a new live state. |
| 417 | |
| 418 | When ``live_state`` is a ``pathlib.Path`` the physical files have |
| 419 | already been updated by the caller (``muse checkout`` restores objects |
| 420 | from the store before calling this). Rescanning the directory is the |
| 421 | cheapest correct way to reflect the new state. |
| 422 | |
| 423 | When ``live_state`` is a snapshot dict, only ``DeleteOp`` and |
| 424 | ``ReplaceOp`` at the file level can be applied in-memory. ``InsertOp`` |
| 425 | at the file level requires the new content to be on disk; callers that |
| 426 | need those should pass the workdir ``pathlib.Path`` instead. |
| 427 | ``PatchOp`` entries are skipped in-memory since reconstructing patched |
| 428 | file content requires both the original bytes and the object store. |
| 429 | |
| 430 | Args: |
| 431 | delta: A ``StructuredDelta`` produced by :meth:`diff`. |
| 432 | live_state: The workdir path (preferred) or a snapshot dict. |
| 433 | |
| 434 | Returns: |
| 435 | The updated live state as a ``SnapshotManifest``. |
| 436 | """ |
| 437 | if isinstance(live_state, pathlib.Path): |
| 438 | return self.snapshot(live_state) |
| 439 | |
| 440 | current_files = dict(live_state["files"]) |
| 441 | |
| 442 | for op in delta["ops"]: |
| 443 | if op["op"] == "delete": |
| 444 | current_files.pop(op["address"], None) |
| 445 | elif op["op"] == "replace": |
| 446 | current_files[op["address"]] = op["new_content_id"] |
| 447 | elif op["op"] == "insert": |
| 448 | current_files[op["address"]] = op["content_id"] |
| 449 | # PatchOp and MoveOp: skip in-memory — caller must use workdir path. |
| 450 | |
| 451 | return SnapshotManifest(files=current_files, domain=_DOMAIN_TAG) |
| 452 | |
| 453 | # ------------------------------------------------------------------ |
| 454 | # 6. schema — declare structural schema for the algorithm library |
| 455 | # ------------------------------------------------------------------ |
| 456 | |
| 457 | def schema(self) -> DomainSchema: |
| 458 | """Return the full structural schema for the music domain. |
| 459 | |
| 460 | Declares four semantic dimensions — melodic, harmonic, dynamic, and |
| 461 | structural — that the core diff algorithm library (Phase 2) and merge |
| 462 | engine (Phase 3) use to drive per-dimension operations. |
| 463 | |
| 464 | Top level is a ``SetSchema``: the music workspace is an unordered |
| 465 | collection of audio/MIDI files, each identified by its SHA-256 content |
| 466 | hash. |
| 467 | |
| 468 | Dimensions: |
| 469 | |
| 470 | - **melodic** — the sequence of note events over time. LCS-diffed so |
| 471 | that insertions and deletions of individual notes are surfaced. |
| 472 | - **harmonic** — the sequence of chord events and key-signature changes. |
| 473 | LCS-diffed independently of the melodic dimension. |
| 474 | - **dynamic** — velocity and expression curves as a 1-D float tensor. |
| 475 | Epsilon of 1.0 ignores sub-1-velocity noise; sparse mode emits one |
| 476 | ``ReplaceOp`` per changed event. |
| 477 | - **structural** — track layout, time signatures, and tempo map as a |
| 478 | labeled ordered tree. Structural changes are non-independent: they |
| 479 | block merging all other dimensions until resolved, because a tempo |
| 480 | change shifts the meaning of every subsequent note position. |
| 481 | """ |
| 482 | return DomainSchema( |
| 483 | domain=_DOMAIN_TAG, |
| 484 | description="MIDI and audio file versioning with note-level diff", |
| 485 | top_level=SetSchema( |
| 486 | kind="set", |
| 487 | element_type="audio_file", |
| 488 | identity="by_content", |
| 489 | ), |
| 490 | dimensions=[ |
| 491 | DimensionSpec( |
| 492 | name="melodic", |
| 493 | description="Note pitches and durations over time", |
| 494 | schema=SequenceSchema( |
| 495 | kind="sequence", |
| 496 | element_type="note_event", |
| 497 | identity="by_position", |
| 498 | diff_algorithm="lcs", |
| 499 | alphabet=None, |
| 500 | ), |
| 501 | independent_merge=True, |
| 502 | ), |
| 503 | DimensionSpec( |
| 504 | name="harmonic", |
| 505 | description="Chord progressions and key signatures", |
| 506 | schema=SequenceSchema( |
| 507 | kind="sequence", |
| 508 | element_type="chord_event", |
| 509 | identity="by_position", |
| 510 | diff_algorithm="lcs", |
| 511 | alphabet=None, |
| 512 | ), |
| 513 | independent_merge=True, |
| 514 | ), |
| 515 | DimensionSpec( |
| 516 | name="dynamic", |
| 517 | description="Velocity and expression curves", |
| 518 | schema=TensorSchema( |
| 519 | kind="tensor", |
| 520 | dtype="float32", |
| 521 | rank=1, |
| 522 | epsilon=1.0, |
| 523 | diff_mode="sparse", |
| 524 | ), |
| 525 | independent_merge=True, |
| 526 | ), |
| 527 | DimensionSpec( |
| 528 | name="structural", |
| 529 | description="Track layout, time signatures, tempo map", |
| 530 | schema=TreeSchema( |
| 531 | kind="tree", |
| 532 | node_type="track_node", |
| 533 | diff_algorithm="zhang_shasha", |
| 534 | ), |
| 535 | independent_merge=False, |
| 536 | ), |
| 537 | ], |
| 538 | merge_mode="three_way", |
| 539 | schema_version=1, |
| 540 | ) |
| 541 | |
| 542 | # ------------------------------------------------------------------ |
| 543 | # 7. merge_ops — Phase 3 operation-level merge (StructuredMergePlugin) |
| 544 | # ------------------------------------------------------------------ |
| 545 | |
| 546 | def merge_ops( |
| 547 | self, |
| 548 | base: StateSnapshot, |
| 549 | ours_snap: StateSnapshot, |
| 550 | theirs_snap: StateSnapshot, |
| 551 | ours_ops: list[DomainOp], |
| 552 | theirs_ops: list[DomainOp], |
| 553 | *, |
| 554 | repo_root: pathlib.Path | None = None, |
| 555 | ) -> MergeResult: |
| 556 | """Operation-level three-way merge using the Phase 3 OT engine. |
| 557 | |
| 558 | Extends the file-level ``merge()`` method with sub-file granularity: two |
| 559 | changes to non-overlapping notes in the same MIDI file no longer produce |
| 560 | a conflict. |
| 561 | |
| 562 | Algorithm |
| 563 | --------- |
| 564 | 1. Run :func:`~muse.core.op_transform.merge_op_lists` on the flat op |
| 565 | lists to classify each (ours, theirs) pair as commuting or |
| 566 | conflicting. |
| 567 | 2. Build the merged manifest from *base* by applying all clean merged |
| 568 | ops. ``InsertOp`` and ``ReplaceOp`` entries supply a ``content_id`` |
| 569 | / ``new_content_id`` directly. For ``PatchOp`` entries (sub-file |
| 570 | note changes), the final file hash is looked up from *ours_snap* or |
| 571 | *theirs_snap*. When both sides produced a ``PatchOp`` for the same |
| 572 | MIDI file and the note-level ops commute, an attempt is made to |
| 573 | reconstruct the merged MIDI bytes; on failure the file falls back to |
| 574 | a conflict. |
| 575 | 3. For conflicting pairs, consult ``.museattributes``. Strategies |
| 576 | ``"ours"`` and ``"theirs"`` are applied automatically; everything |
| 577 | else enters ``MergeResult.conflicts``. |
| 578 | |
| 579 | Args: |
| 580 | base: Common ancestor snapshot. |
| 581 | ours_snap: Final snapshot of our branch. |
| 582 | theirs_snap: Final snapshot of their branch. |
| 583 | ours_ops: Operations from our branch delta (base → ours). |
| 584 | theirs_ops: Operations from their branch delta (base → theirs). |
| 585 | repo_root: Repository root for object store and attributes. |
| 586 | |
| 587 | Returns: |
| 588 | A :class:`~muse.domain.MergeResult` with the reconciled snapshot |
| 589 | and any remaining unresolvable conflicts. |
| 590 | """ |
| 591 | from muse.core.attributes import load_attributes, resolve_strategy |
| 592 | from muse.core.op_transform import merge_op_lists |
| 593 | |
| 594 | attrs = load_attributes(repo_root) if repo_root is not None else [] |
| 595 | |
| 596 | # OT classification: find commuting and conflicting op pairs. |
| 597 | ot_result = merge_op_lists([], ours_ops, theirs_ops) |
| 598 | |
| 599 | # Build the merged manifest starting from base. |
| 600 | merged_files: dict[str, str] = dict(base["files"]) |
| 601 | applied_strategies: dict[str, str] = {} |
| 602 | final_conflicts: list[str] = [] |
| 603 | op_log: list[DomainOp] = list(ot_result.merged_ops) |
| 604 | |
| 605 | # Group PatchOps by address so we can detect same-file note merges. |
| 606 | ours_patches: dict[str, PatchOp] = {} |
| 607 | theirs_patches: dict[str, PatchOp] = {} |
| 608 | for op in ours_ops: |
| 609 | if op["op"] == "patch": |
| 610 | ours_patches[op["address"]] = op |
| 611 | for op in theirs_ops: |
| 612 | if op["op"] == "patch": |
| 613 | theirs_patches[op["address"]] = op |
| 614 | |
| 615 | # Track which addresses are involved in a conflict. |
| 616 | conflicting_addresses: set[str] = { |
| 617 | our_op["address"] for our_op, _ in ot_result.conflict_ops |
| 618 | } |
| 619 | |
| 620 | # --- Apply clean merged ops --- |
| 621 | for op in ot_result.merged_ops: |
| 622 | addr = op["address"] |
| 623 | if addr in conflicting_addresses: |
| 624 | continue # handled in conflict resolution below |
| 625 | |
| 626 | if op["op"] == "insert": |
| 627 | merged_files[addr] = op["content_id"] |
| 628 | |
| 629 | elif op["op"] == "delete": |
| 630 | merged_files.pop(addr, None) |
| 631 | |
| 632 | elif op["op"] == "replace": |
| 633 | merged_files[addr] = op["new_content_id"] |
| 634 | |
| 635 | elif op["op"] == "patch": |
| 636 | # PatchOp: determine which side(s) patched this file. |
| 637 | has_ours = addr in ours_patches |
| 638 | has_theirs = addr in theirs_patches |
| 639 | |
| 640 | if has_ours and not has_theirs: |
| 641 | # Only our side changed this file — take our version. |
| 642 | if addr in ours_snap["files"]: |
| 643 | merged_files[addr] = ours_snap["files"][addr] |
| 644 | else: |
| 645 | merged_files.pop(addr, None) |
| 646 | |
| 647 | elif has_theirs and not has_ours: |
| 648 | # Only their side changed this file — take their version. |
| 649 | if addr in theirs_snap["files"]: |
| 650 | merged_files[addr] = theirs_snap["files"][addr] |
| 651 | else: |
| 652 | merged_files.pop(addr, None) |
| 653 | |
| 654 | else: |
| 655 | # Both sides patched the same file with commuting note ops. |
| 656 | # Attempt note-level MIDI reconstruction. |
| 657 | merged_content_id = _merge_patch_ops( |
| 658 | addr=addr, |
| 659 | ours_patch=ours_patches[addr], |
| 660 | theirs_patch=theirs_patches[addr], |
| 661 | base_files=dict(base["files"]), |
| 662 | ours_snap_files=dict(ours_snap["files"]), |
| 663 | theirs_snap_files=dict(theirs_snap["files"]), |
| 664 | repo_root=repo_root, |
| 665 | ) |
| 666 | if merged_content_id is not None: |
| 667 | merged_files[addr] = merged_content_id |
| 668 | else: |
| 669 | # Reconstruction failed — treat as manual conflict. |
| 670 | final_conflicts.append(addr) |
| 671 | |
| 672 | # --- Resolve conflicts --- |
| 673 | for our_op, their_op in ot_result.conflict_ops: |
| 674 | addr = our_op["address"] |
| 675 | strategy = resolve_strategy(attrs, addr, "*") |
| 676 | |
| 677 | if strategy == "ours": |
| 678 | if addr in ours_snap["files"]: |
| 679 | merged_files[addr] = ours_snap["files"][addr] |
| 680 | else: |
| 681 | merged_files.pop(addr, None) |
| 682 | applied_strategies[addr] = "ours" |
| 683 | |
| 684 | elif strategy == "theirs": |
| 685 | if addr in theirs_snap["files"]: |
| 686 | merged_files[addr] = theirs_snap["files"][addr] |
| 687 | else: |
| 688 | merged_files.pop(addr, None) |
| 689 | applied_strategies[addr] = "theirs" |
| 690 | |
| 691 | else: |
| 692 | # Strategy "manual" or "auto" without a clear resolution. |
| 693 | final_conflicts.append(addr) |
| 694 | |
| 695 | return MergeResult( |
| 696 | merged=SnapshotManifest(files=merged_files, domain=_DOMAIN_TAG), |
| 697 | conflicts=sorted(set(final_conflicts)), |
| 698 | applied_strategies=applied_strategies, |
| 699 | op_log=op_log, |
| 700 | ) |
| 701 | |
| 702 | |
| 703 | # --------------------------------------------------------------------------- |
| 704 | # Module-level helpers |
| 705 | # --------------------------------------------------------------------------- |
| 706 | |
| 707 | |
| 708 | def _merge_patch_ops( |
| 709 | *, |
| 710 | addr: str, |
| 711 | ours_patch: PatchOp, |
| 712 | theirs_patch: PatchOp, |
| 713 | base_files: dict[str, str], |
| 714 | ours_snap_files: dict[str, str], |
| 715 | theirs_snap_files: dict[str, str], |
| 716 | repo_root: pathlib.Path | None, |
| 717 | ) -> str | None: |
| 718 | """Attempt note-level MIDI merge for two ``PatchOp``\\s on the same file. |
| 719 | |
| 720 | Runs OT on the child_ops of each PatchOp. If the note-level ops all |
| 721 | commute, reconstructs the merged MIDI by: |
| 722 | |
| 723 | 1. Loading base, ours, and theirs MIDI bytes from the object store. |
| 724 | 2. Extracting note sequences from all three versions. |
| 725 | 3. Building ``content_id → NoteKey`` look-ups for the ours and theirs |
| 726 | sequences (so that InsertOp content IDs can be resolved to real notes). |
| 727 | 4. Applying the merged note ops (deletions then insertions) to the base |
| 728 | note sequence. |
| 729 | 5. Calling :func:`~muse.plugins.music.midi_diff.reconstruct_midi` and |
| 730 | storing the resulting bytes. |
| 731 | |
| 732 | Returns the SHA-256 hash of the reconstructed MIDI (ready to store in the |
| 733 | object store) on success, or ``None`` when: |
| 734 | |
| 735 | - *repo_root* is ``None`` (cannot access object store). |
| 736 | - Base or branch bytes are not in the local object store. |
| 737 | - Note-level OT found conflicts. |
| 738 | - MIDI reconstruction raised any exception. |
| 739 | |
| 740 | Args: |
| 741 | addr: Workspace-relative MIDI file path. |
| 742 | ours_patch: Our PatchOp for this file. |
| 743 | theirs_patch: Their PatchOp for this file. |
| 744 | base_files: Content-ID map for the common ancestor snapshot. |
| 745 | ours_snap_files: Content-ID map for our branch's final snapshot. |
| 746 | theirs_snap_files: Content-ID map for their branch's final snapshot. |
| 747 | repo_root: Repository root for object store access. |
| 748 | |
| 749 | Returns: |
| 750 | Content-ID (SHA-256 hex) of the merged MIDI, or ``None`` on failure. |
| 751 | """ |
| 752 | if repo_root is None or addr not in base_files: |
| 753 | return None |
| 754 | |
| 755 | from muse.core.object_store import read_object, write_object |
| 756 | from muse.core.op_transform import merge_op_lists |
| 757 | from muse.plugins.music.midi_diff import NoteKey, extract_notes, reconstruct_midi |
| 758 | |
| 759 | # Run OT on note-level ops to classify conflicts. |
| 760 | note_result = merge_op_lists([], ours_patch["child_ops"], theirs_patch["child_ops"]) |
| 761 | if not note_result.is_clean: |
| 762 | logger.debug( |
| 763 | "⚠️ Note-level conflict in %r: %d pair(s) — falling back to file conflict", |
| 764 | addr, |
| 765 | len(note_result.conflict_ops), |
| 766 | ) |
| 767 | return None |
| 768 | |
| 769 | try: |
| 770 | base_bytes = read_object(repo_root, base_files[addr]) |
| 771 | if base_bytes is None: |
| 772 | return None |
| 773 | |
| 774 | ours_hash = ours_snap_files.get(addr) |
| 775 | theirs_hash = theirs_snap_files.get(addr) |
| 776 | ours_bytes = read_object(repo_root, ours_hash) if ours_hash else None |
| 777 | theirs_bytes = read_object(repo_root, theirs_hash) if theirs_hash else None |
| 778 | |
| 779 | base_notes, ticks_per_beat = extract_notes(base_bytes) |
| 780 | |
| 781 | # Build content_id → NoteKey lookups from ours and theirs versions. |
| 782 | ours_by_id: dict[str, NoteKey] = {} |
| 783 | if ours_bytes is not None: |
| 784 | ours_notes, _ = extract_notes(ours_bytes) |
| 785 | ours_by_id = {_note_content_id(n): n for n in ours_notes} |
| 786 | |
| 787 | theirs_by_id: dict[str, NoteKey] = {} |
| 788 | if theirs_bytes is not None: |
| 789 | theirs_notes, _ = extract_notes(theirs_bytes) |
| 790 | theirs_by_id = {_note_content_id(n): n for n in theirs_notes} |
| 791 | |
| 792 | # Collect content IDs to delete. |
| 793 | delete_ids: set[str] = { |
| 794 | op["content_id"] for op in note_result.merged_ops if op["op"] == "delete" |
| 795 | } |
| 796 | |
| 797 | # Apply deletions to base note list. |
| 798 | base_note_by_id = {_note_content_id(n): n for n in base_notes} |
| 799 | surviving: list[NoteKey] = [ |
| 800 | n for n in base_notes if _note_content_id(n) not in delete_ids |
| 801 | ] |
| 802 | |
| 803 | # Collect insertions: resolve content_id → NoteKey via ours then theirs. |
| 804 | inserted: list[NoteKey] = [] |
| 805 | for op in note_result.merged_ops: |
| 806 | if op["op"] == "insert": |
| 807 | cid = op["content_id"] |
| 808 | note = ours_by_id.get(cid) or theirs_by_id.get(cid) |
| 809 | if note is None: |
| 810 | # Fallback: base itself shouldn't have it, but check anyway. |
| 811 | note = base_note_by_id.get(cid) |
| 812 | if note is None: |
| 813 | logger.debug( |
| 814 | "⚠️ Cannot resolve note content_id %s for %r — skipping", |
| 815 | cid[:12], |
| 816 | addr, |
| 817 | ) |
| 818 | continue |
| 819 | inserted.append(note) |
| 820 | |
| 821 | merged_notes = surviving + inserted |
| 822 | merged_bytes = reconstruct_midi(merged_notes, ticks_per_beat=ticks_per_beat) |
| 823 | |
| 824 | merged_hash = hashlib.sha256(merged_bytes).hexdigest() |
| 825 | write_object(repo_root, merged_hash, merged_bytes) |
| 826 | |
| 827 | logger.info( |
| 828 | "✅ Note-level MIDI merge for %r: %d ops clean, %d notes in result", |
| 829 | addr, |
| 830 | len(note_result.merged_ops), |
| 831 | len(merged_notes), |
| 832 | ) |
| 833 | return merged_hash |
| 834 | |
| 835 | except Exception as exc: # noqa: BLE001 intentional broad catch |
| 836 | logger.debug("⚠️ MIDI note-level reconstruction failed for %r: %s", addr, exc) |
| 837 | return None |
| 838 | |
| 839 | |
| 840 | def _note_content_id(note: NoteKey) -> str: |
| 841 | """Return the SHA-256 content ID for a :class:`~muse.plugins.music.midi_diff.NoteKey`. |
| 842 | |
| 843 | Delegates to the same algorithm used in :mod:`muse.plugins.music.midi_diff` |
| 844 | so that content IDs computed here are identical to those stored in |
| 845 | ``InsertOp`` / ``DeleteOp`` entries. |
| 846 | """ |
| 847 | payload = ( |
| 848 | f"{note['pitch']}:{note['velocity']}:" |
| 849 | f"{note['start_tick']}:{note['duration_ticks']}:{note['channel']}" |
| 850 | ) |
| 851 | return hashlib.sha256(payload.encode()).hexdigest() |
| 852 | |
| 853 | |
| 854 | def _diff_modified_file( |
| 855 | *, |
| 856 | path: str, |
| 857 | old_hash: str, |
| 858 | new_hash: str, |
| 859 | repo_root: pathlib.Path | None, |
| 860 | ) -> DomainOp: |
| 861 | """Produce the best available op for a modified file. |
| 862 | |
| 863 | Tries deep MIDI diff when possible; falls back to ``ReplaceOp``. |
| 864 | """ |
| 865 | if path.lower().endswith(".mid") and repo_root is not None: |
| 866 | from muse.core.object_store import read_object |
| 867 | from muse.plugins.music.midi_diff import diff_midi_notes |
| 868 | |
| 869 | base_bytes = read_object(repo_root, old_hash) |
| 870 | target_bytes = read_object(repo_root, new_hash) |
| 871 | |
| 872 | if base_bytes is not None and target_bytes is not None: |
| 873 | try: |
| 874 | child_delta = diff_midi_notes( |
| 875 | base_bytes, target_bytes, file_path=path |
| 876 | ) |
| 877 | return PatchOp( |
| 878 | op="patch", |
| 879 | address=path, |
| 880 | child_ops=child_delta["ops"], |
| 881 | child_domain=child_delta["domain"], |
| 882 | child_summary=child_delta["summary"], |
| 883 | ) |
| 884 | except (ValueError, Exception) as exc: |
| 885 | logger.debug("⚠️ MIDI deep diff failed for %r: %s", path, exc) |
| 886 | |
| 887 | return ReplaceOp( |
| 888 | op="replace", |
| 889 | address=path, |
| 890 | position=None, |
| 891 | old_content_id=old_hash, |
| 892 | new_content_id=new_hash, |
| 893 | old_summary=f"{path} (previous)", |
| 894 | new_summary=f"{path} (updated)", |
| 895 | ) |
| 896 | |
| 897 | |
| 898 | def _summarise_ops(ops: list[DomainOp]) -> str: |
| 899 | """Build a human-readable summary string from a list of domain ops.""" |
| 900 | inserts = 0 |
| 901 | deletes = 0 |
| 902 | replaces = 0 |
| 903 | patches = 0 |
| 904 | |
| 905 | for op in ops: |
| 906 | kind = op["op"] |
| 907 | if kind == "insert": |
| 908 | inserts += 1 |
| 909 | elif kind == "delete": |
| 910 | deletes += 1 |
| 911 | elif kind == "replace": |
| 912 | replaces += 1 |
| 913 | elif kind == "patch": |
| 914 | patches += 1 |
| 915 | |
| 916 | parts: list[str] = [] |
| 917 | if inserts: |
| 918 | parts.append(f"{inserts} file{'s' if inserts != 1 else ''} added") |
| 919 | if deletes: |
| 920 | parts.append(f"{deletes} file{'s' if deletes != 1 else ''} removed") |
| 921 | if replaces: |
| 922 | parts.append(f"{replaces} file{'s' if replaces != 1 else ''} modified") |
| 923 | if patches: |
| 924 | parts.append(f"{patches} file{'s' if patches != 1 else ''} patched") |
| 925 | |
| 926 | return ", ".join(parts) if parts else "no changes" |
| 927 | |
| 928 | |
| 929 | def _hash_file(path: pathlib.Path) -> str: |
| 930 | """Return the SHA-256 hex digest of a file's raw bytes.""" |
| 931 | h = hashlib.sha256() |
| 932 | with path.open("rb") as fh: |
| 933 | for chunk in iter(lambda: fh.read(65536), b""): |
| 934 | h.update(chunk) |
| 935 | return h.hexdigest() |
| 936 | |
| 937 | |
| 938 | def _changed_paths( |
| 939 | base: dict[str, str], other: dict[str, str] |
| 940 | ) -> set[str]: |
| 941 | """Return paths that differ between *base* and *other*.""" |
| 942 | base_p = set(base) |
| 943 | other_p = set(other) |
| 944 | added = other_p - base_p |
| 945 | deleted = base_p - other_p |
| 946 | common = base_p & other_p |
| 947 | modified = {p for p in common if base[p] != other[p]} |
| 948 | return added | deleted | modified |
| 949 | |
| 950 | |
| 951 | def content_hash(snapshot: StateSnapshot) -> str: |
| 952 | """Return a stable SHA-256 digest of a snapshot for content-addressing.""" |
| 953 | canonical = json.dumps(snapshot, sort_keys=True, separators=(",", ":")) |
| 954 | return hashlib.sha256(canonical.encode()).hexdigest() |
| 955 | |
| 956 | |
| 957 | #: Module-level singleton — import and use directly. |
| 958 | plugin = MusicPlugin() |
| 959 | |
| 960 | assert isinstance(plugin, MuseDomainPlugin), ( |
| 961 | "MusicPlugin does not satisfy the MuseDomainPlugin protocol" |
| 962 | ) |
| 963 | assert isinstance(plugin, StructuredMergePlugin), ( |
| 964 | "MusicPlugin does not satisfy the StructuredMergePlugin protocol" |
| 965 | ) |