gabriel / muse public
schema.py python
225 lines 8.3 KB
bda49bdb feat: redesign .museignore as TOML with domain-scoped sections (#100) Gabriel Cardona <cgcardona@gmail.com> 5d ago
1 """Domain schema declaration types.
2
3 A plugin implements :meth:`~muse.domain.MuseDomainPlugin.schema` returning a
4 :class:`DomainSchema` to declare the structural shape of its data. The core
5 engine uses this declaration to:
6
7 1. Select the correct diff algorithm for each dimension via
8 :func:`~muse.core.diff_algorithms.diff_by_schema`.
9 2. Provide informed conflict messages (citing dimension names) during OT merge.
10 3. Route to CRDT convergent join when ``merge_mode`` is ``"crdt"``.
11
12 Every schema type is a ``TypedDict`` — JSON-serialisable, zero-``Any``, and
13 verifiable by mypy in strict mode.
14
15 CRDT dimension spec
16 -------------------
17 :class:`CRDTDimensionSpec` declares which CRDT primitive a dimension uses when
18 ``DomainSchema.merge_mode`` is ``"crdt"``. Plugins that mix three-way and
19 CRDT semantics per-dimension use :class:`CRDTDimensionSpec` for their CRDT
20 dimensions and :class:`DimensionSpec` for their three-way dimensions; both are
21 listed in :class:`DomainSchema`.
22
23 Design note on ``MapSchema.value_schema``
24 -----------------------------------------
25 ``MapSchema.value_schema`` carries the type ``ElementSchema``, which is
26 defined *after* ``MapSchema`` in this file. With ``from __future__ import
27 annotations`` all annotations are evaluated lazily, so this forward reference
28 is resolved correctly by both the Python runtime and mypy.
29 """
30
31 from __future__ import annotations
32
33 from typing import Literal, TypedDict
34
35
36 # ---------------------------------------------------------------------------
37 # Element schema types — one per structural primitive
38 # ---------------------------------------------------------------------------
39
40
41 class SequenceSchema(TypedDict):
42 """Ordered sequence of homogeneous elements (LCS-diffable).
43
44 Use for any domain data that is fundamentally a list: note events in a
45 MIDI track, nucleotides in a DNA strand, frames in an animation.
46
47 ``diff_algorithm`` selects the variant of LCS:
48 - ``"lcs"`` — classic O(nm) LCS, minimal insertions and deletions.
49 - ``"myers"`` — O(nd) Myers algorithm, same semantics, faster for low
50 edit distance (this is what Git uses).
51 - ``"patience"`` — patience-sort variant, produces more human-readable
52 diffs for sequences with many repeated elements.
53 """
54
55 kind: Literal["sequence"]
56 element_type: str
57 identity: Literal["by_id", "by_position", "by_content"]
58 diff_algorithm: Literal["lcs", "myers", "patience"]
59 alphabet: list[str] | None
60
61
62 class TreeSchema(TypedDict):
63 """Hierarchical labeled ordered tree (tree-edit-diffable).
64
65 Use for domain data with parent-child relationships: scene graphs, XML /
66 AST nodes, track hierarchies in a DAW.
67
68 ``diff_algorithm`` selects the tree edit algorithm:
69 - ``"zhang_shasha"`` — Zhang-Shasha 1989 O(n²m) minimum edit distance.
70 - ``"gumtree"`` — GumTree heuristic, better for large ASTs.
71 """
72
73 kind: Literal["tree"]
74 node_type: str
75 diff_algorithm: Literal["zhang_shasha", "gumtree"]
76
77
78 class TensorSchema(TypedDict):
79 """N-dimensional numerical array (sparse-numerical-diffable).
80
81 Use for simulation state, velocity curves, weight matrices, voxel grids.
82 Floating-point drift below ``epsilon`` is *not* considered a change.
83
84 ``diff_mode`` controls the output granularity:
85 - ``"sparse"`` — one ``ReplaceOp`` per changed element.
86 - ``"block"`` — groups adjacent changes into contiguous range ops.
87 - ``"full"`` — one ``ReplaceOp`` for the entire array if anything changed.
88 """
89
90 kind: Literal["tensor"]
91 dtype: Literal["float32", "float64", "int8", "int16", "int32", "int64"]
92 rank: int
93 epsilon: float
94 diff_mode: Literal["sparse", "block", "full"]
95
96
97 class SetSchema(TypedDict):
98 """Unordered collection of unique elements (set-algebra-diffable).
99
100 Use for collections where order is irrelevant: a set of files, a set of
101 annotations, a set of material IDs in a 3D scene.
102
103 ``identity`` determines what makes two elements "the same":
104 - ``"by_content"`` — SHA-256 of content (structural equality).
105 - ``"by_id"`` — stable element ID (e.g. UUID).
106 """
107
108 kind: Literal["set"]
109 element_type: str
110 identity: Literal["by_content", "by_id"]
111
112
113 class MapSchema(TypedDict):
114 """Key-value map with known or dynamic keys.
115
116 Use for dictionaries where both key and value structure matter: a map of
117 chromosome name → nucleotide sequence, or annotation key → quality scores.
118
119 ``value_schema`` is itself an ``ElementSchema``, allowing recursive
120 declarations (e.g. a map of sequences, a map of trees).
121 """
122
123 kind: Literal["map"]
124 key_type: str
125 value_schema: ElementSchema # forward reference — resolved lazily
126 identity: Literal["by_key"]
127
128
129 #: Union of all element schema types.
130 #: This is the type of ``DimensionSpec.schema`` and ``DomainSchema.top_level``.
131 ElementSchema = SequenceSchema | TreeSchema | TensorSchema | MapSchema | SetSchema
132
133
134 # ---------------------------------------------------------------------------
135 # Dimension spec — a named semantic sub-dimension
136 # ---------------------------------------------------------------------------
137
138
139 class DimensionSpec(TypedDict):
140 """A named semantic sub-dimension of the domain's state.
141
142 Domains are multi-dimensional. MIDI has notes, pitch_bend, cc_volume, and
143 track_structure dimensions. Genomics has coding regions, regulatory elements,
144 and metadata dimensions. 3D spatial design has geometry, materials,
145 lighting, and animation dimensions.
146
147 Each dimension can use a different element schema and diff algorithm.
148 The OT merge engine merges independent dimensions in parallel
149 without blocking on each other.
150
151 ``independent_merge`` — when ``True``, a conflict in this dimension does
152 not block merging other dimensions. When ``False`` (e.g. track_structure changes
153 in a DAW session), all dimensions must wait for this one to resolve.
154 """
155
156 name: str
157 description: str
158 schema: ElementSchema
159 independent_merge: bool
160
161
162 # ---------------------------------------------------------------------------
163 # CRDT per-dimension schema
164 # ---------------------------------------------------------------------------
165
166 #: The CRDT primitive types available for a dimension.
167 CRDTPrimitive = Literal["lww_register", "or_set", "rga", "aw_map", "g_counter"]
168
169
170 class CRDTDimensionSpec(TypedDict):
171 """Schema for a single dimension that uses CRDT convergent merge semantics.
172
173 Plugins declare a ``CRDTDimensionSpec`` for each dimension they want the
174 core engine to merge via :meth:`~muse.domain.CRDTPlugin.join` rather than
175 the three-way merge path.
176
177 ``crdt_type`` selects the primitive:
178
179 - ``"lww_register"`` — scalar, last-write-wins (timestamps).
180 - ``"or_set"`` — unordered set, adds win over concurrent removes.
181 - ``"rga"`` — ordered sequence (collaborative text / note editing).
182 - ``"aw_map"`` — key-value map, adds win.
183 - ``"g_counter"`` — monotonically increasing integer counter.
184
185 ``independent_merge`` mirrors :class:`DimensionSpec`: when ``True``,
186 conflicts in other dimensions do not block this one.
187 """
188
189 name: str
190 description: str
191 crdt_type: CRDTPrimitive
192 independent_merge: bool
193
194
195 # ---------------------------------------------------------------------------
196 # Top-level domain schema
197 # ---------------------------------------------------------------------------
198
199
200 class DomainSchema(TypedDict):
201 """Complete structural declaration for a domain plugin.
202
203 Returned by :meth:`~muse.domain.MuseDomainPlugin.schema`. The core engine
204 reads this once at plugin registration time.
205
206 ``top_level`` declares the primary collection structure (e.g. a set of
207 files for music, a map of chromosome sequences for genomics).
208
209 ``dimensions`` declares the semantic sub-dimensions. The merge engine
210 The OT merge engine uses these to determine which changes can be merged independently.
211
212 ``merge_mode`` controls the merge strategy:
213 - ``"three_way"`` — standard three-way merge (Phases 1–3).
214 - ``"crdt"`` — convergent CRDT join.
215
216 ``schema_version`` tracks the schema format for future migrations.
217 It is always ``1``.
218 """
219
220 domain: str
221 description: str
222 dimensions: list[DimensionSpec]
223 top_level: ElementSchema
224 merge_mode: Literal["three_way", "crdt"]
225 schema_version: Literal[1]