gabriel / muse public
set_ops.py python
98 lines 2.9 KB
bda49bdb feat: redesign .museignore as TOML with domain-scoped sections (#100) Gabriel Cardona <cgcardona@gmail.com> 5d ago
1 """Hash-set algebra diff for unordered collections.
2
3 Computes the symmetric difference between two ``frozenset[str]`` collections
4 of content IDs. The result is a ``StructuredDelta`` containing:
5
6 - ``InsertOp`` entries for content IDs present in *target* but not *base*.
7 - ``DeleteOp`` entries for content IDs present in *base* but not *target*.
8
9 No ``MoveOp`` or ``ReplaceOp`` is ever produced: unordered sets have no
10 positional semantics, so every element is either added or removed.
11
12 This is the algorithm the MIDI plugin uses for file-level diffing (which set
13 of POSIX paths changed). Plugins with a ``SetSchema`` in their ``DomainSchema``
14 get this algorithm for free via :func:`~muse.core.diff_algorithms.diff_by_schema`.
15
16 Public API
17 ----------
18 - :func:`diff` — ``frozenset[str]`` × ``frozenset[str]`` → ``StructuredDelta``.
19 """
20
21 from __future__ import annotations
22
23 import logging
24
25 from muse.core.schema import SetSchema
26 from muse.domain import DeleteOp, DomainOp, InsertOp, StructuredDelta
27
28 logger = logging.getLogger(__name__)
29
30
31 def diff(
32 schema: SetSchema,
33 base: frozenset[str],
34 target: frozenset[str],
35 *,
36 domain: str,
37 address: str = "",
38 ) -> StructuredDelta:
39 """Diff two unordered sets of content IDs under the given ``SetSchema``.
40
41 All insertions and deletions have ``position=None`` because the collection
42 is unordered — position has no meaning for set elements.
43
44 Args:
45 schema: The ``SetSchema`` declaring element type and identity.
46 base: Base (ancestor) set of content IDs.
47 target: Target (newer) set of content IDs.
48 domain: Domain tag for the returned ``StructuredDelta``.
49 address: Address prefix for generated op entries.
50
51 Returns:
52 A ``StructuredDelta`` with ``InsertOp`` and ``DeleteOp`` entries.
53 """
54 added = sorted(target - base)
55 removed = sorted(base - target)
56 elem = schema["element_type"]
57
58 ops: list[DomainOp] = []
59
60 for cid in removed:
61 ops.append(
62 DeleteOp(
63 op="delete",
64 address=address,
65 position=None,
66 content_id=cid,
67 content_summary=f"{elem} removed: {cid[:12]}",
68 )
69 )
70
71 for cid in added:
72 ops.append(
73 InsertOp(
74 op="insert",
75 address=address,
76 position=None,
77 content_id=cid,
78 content_summary=f"{elem} added: {cid[:12]}",
79 )
80 )
81
82 n_add = len(added)
83 n_del = len(removed)
84 parts: list[str] = []
85 if n_add:
86 parts.append(f"{n_add} {elem}{'s' if n_add != 1 else ''} added")
87 if n_del:
88 parts.append(f"{n_del} {elem}{'s' if n_del != 1 else ''} removed")
89 summary = ", ".join(parts) if parts else f"no {elem} changes"
90
91 logger.debug(
92 "set_ops.diff %r: +%d -%d elements",
93 address,
94 n_add,
95 n_del,
96 )
97
98 return StructuredDelta(domain=domain, ops=ops, summary=summary)