set_ops.py
python
| 1 | """Hash-set algebra diff for unordered collections. |
| 2 | |
| 3 | Computes the symmetric difference between two ``frozenset[str]`` collections |
| 4 | of content IDs. The result is a ``StructuredDelta`` containing: |
| 5 | |
| 6 | - ``InsertOp`` entries for content IDs present in *target* but not *base*. |
| 7 | - ``DeleteOp`` entries for content IDs present in *base* but not *target*. |
| 8 | |
| 9 | No ``MoveOp`` or ``ReplaceOp`` is ever produced: unordered sets have no |
| 10 | positional semantics, so every element is either added or removed. |
| 11 | |
| 12 | This is the algorithm the MIDI plugin uses for file-level diffing (which set |
| 13 | of POSIX paths changed). Plugins with a ``SetSchema`` in their ``DomainSchema`` |
| 14 | get this algorithm for free via :func:`~muse.core.diff_algorithms.diff_by_schema`. |
| 15 | |
| 16 | Public API |
| 17 | ---------- |
| 18 | - :func:`diff` — ``frozenset[str]`` × ``frozenset[str]`` → ``StructuredDelta``. |
| 19 | """ |
| 20 | |
| 21 | from __future__ import annotations |
| 22 | |
| 23 | import logging |
| 24 | |
| 25 | from muse.core.schema import SetSchema |
| 26 | from muse.domain import DeleteOp, DomainOp, InsertOp, StructuredDelta |
| 27 | |
| 28 | logger = logging.getLogger(__name__) |
| 29 | |
| 30 | |
| 31 | def diff( |
| 32 | schema: SetSchema, |
| 33 | base: frozenset[str], |
| 34 | target: frozenset[str], |
| 35 | *, |
| 36 | domain: str, |
| 37 | address: str = "", |
| 38 | ) -> StructuredDelta: |
| 39 | """Diff two unordered sets of content IDs under the given ``SetSchema``. |
| 40 | |
| 41 | All insertions and deletions have ``position=None`` because the collection |
| 42 | is unordered — position has no meaning for set elements. |
| 43 | |
| 44 | Args: |
| 45 | schema: The ``SetSchema`` declaring element type and identity. |
| 46 | base: Base (ancestor) set of content IDs. |
| 47 | target: Target (newer) set of content IDs. |
| 48 | domain: Domain tag for the returned ``StructuredDelta``. |
| 49 | address: Address prefix for generated op entries. |
| 50 | |
| 51 | Returns: |
| 52 | A ``StructuredDelta`` with ``InsertOp`` and ``DeleteOp`` entries. |
| 53 | """ |
| 54 | added = sorted(target - base) |
| 55 | removed = sorted(base - target) |
| 56 | elem = schema["element_type"] |
| 57 | |
| 58 | ops: list[DomainOp] = [] |
| 59 | |
| 60 | for cid in removed: |
| 61 | ops.append( |
| 62 | DeleteOp( |
| 63 | op="delete", |
| 64 | address=address, |
| 65 | position=None, |
| 66 | content_id=cid, |
| 67 | content_summary=f"{elem} removed: {cid[:12]}", |
| 68 | ) |
| 69 | ) |
| 70 | |
| 71 | for cid in added: |
| 72 | ops.append( |
| 73 | InsertOp( |
| 74 | op="insert", |
| 75 | address=address, |
| 76 | position=None, |
| 77 | content_id=cid, |
| 78 | content_summary=f"{elem} added: {cid[:12]}", |
| 79 | ) |
| 80 | ) |
| 81 | |
| 82 | n_add = len(added) |
| 83 | n_del = len(removed) |
| 84 | parts: list[str] = [] |
| 85 | if n_add: |
| 86 | parts.append(f"{n_add} {elem}{'s' if n_add != 1 else ''} added") |
| 87 | if n_del: |
| 88 | parts.append(f"{n_del} {elem}{'s' if n_del != 1 else ''} removed") |
| 89 | summary = ", ".join(parts) if parts else f"no {elem} changes" |
| 90 | |
| 91 | logger.debug( |
| 92 | "set_ops.diff %r: +%d -%d elements", |
| 93 | address, |
| 94 | n_add, |
| 95 | n_del, |
| 96 | ) |
| 97 | |
| 98 | return StructuredDelta(domain=domain, ops=ops, summary=summary) |