gabriel / muse public
attributes.py python
380 lines 12.3 KB
bda49bdb feat: redesign .museignore as TOML with domain-scoped sections (#100) Gabriel Cardona <cgcardona@gmail.com> 5d ago
1 """Muse attributes — ``.museattributes`` TOML parser and per-path strategy resolver.
2
3 ``.museattributes`` lives in the repository root (next to ``.muse/`` and
4 ``muse-work/``) and declares merge strategies for specific paths and
5 dimensions. It uses TOML syntax with an optional ``[meta]`` section for
6 domain declaration and an ordered ``[[rules]]`` array.
7
8 Format
9 ------
10
11 .. code-block:: toml
12
13 # .museattributes
14 # Merge strategy overrides for this repository.
15
16 [meta]
17 domain = "midi" # optional — validated against .muse/repo.json
18
19 [[rules]]
20 path = "drums/*" # fnmatch glob against workspace-relative POSIX paths
21 dimension = "*" # domain axis name, or "*" to match any dimension
22 strategy = "ours" # resolution strategy (see below)
23 comment = "Drums are always authored by branch A — always prefer ours."
24 priority = 10 # optional; higher priority rules are tried first
25
26 [[rules]]
27 path = "keys/*"
28 dimension = "pitch_bend"
29 strategy = "theirs"
30 comment = "Remote always has the better pitch-bend automation."
31
32 [[rules]]
33 path = "*"
34 dimension = "*"
35 strategy = "auto"
36
37 Strategies
38 ----------
39
40 ``ours``
41 Take the left / current-branch version; the path is removed from the
42 conflict list.
43
44 ``theirs``
45 Take the right / incoming-branch version; the path is removed from the
46 conflict list.
47
48 ``union``
49 Include **all** additions from both sides. Deletions are honoured only
50 when **both** sides agree. For independent element sets (MIDI notes,
51 code symbol additions, import sets) this produces a combined result with
52 no conflicts. For opaque binary blobs where full unification is
53 impossible, the left / current-branch blob is preferred and the path is
54 removed from the conflict list.
55
56 ``base``
57 Revert to the common merge-base version — discard changes from *both*
58 branches. Useful for generated files, lock files, or any path that
59 should always stay at a known-good state during a merge.
60
61 ``auto``
62 Default behaviour. Defer to the engine's three-way algorithm.
63
64 ``manual``
65 Force the path into the conflict list even if the engine would
66 auto-resolve it. Use this to guarantee human review on safety-critical
67 paths.
68
69 Rule fields
70 -----------
71
72 ``path`` (required) — ``fnmatch`` glob matched against workspace-relative
73 POSIX paths (e.g. ``"tracks/*.mid"``, ``"src/**/*.py"``).
74
75 ``dimension`` (required) — domain axis name (e.g. ``"notes"``,
76 ``"pitch_bend"``, ``"symbols"``) or ``"*"`` to match any
77 dimension.
78
79 ``strategy`` (required) — one of the six strategies listed above.
80
81 ``comment`` (optional) — free-form documentation string; ignored at
82 runtime. Use it to explain *why* the rule exists.
83
84 ``priority`` (optional, default 0) — integer used to order rules before
85 file order. Higher-priority rules are evaluated first. Rules
86 with equal priority preserve their declaration order.
87
88 **First matching rule wins** after sorting by priority (descending) then
89 file order (ascending).
90
91 ``[meta]`` is optional; its absence has no effect on merge correctness.
92 When both ``[meta] domain`` and a repo ``domain`` are known, a mismatch
93 logs a warning.
94
95 Public API
96 ----------
97
98 - :class:`AttributesMeta` — TypedDict for the ``[meta]`` section.
99 - :class:`AttributesRuleDict` — TypedDict for a single ``[[rules]]`` entry.
100 - :class:`MuseAttributesFile` — TypedDict for the full parsed file.
101 - :class:`AttributeRule` — a single resolved rule (dataclass).
102 - :func:`read_attributes_meta` — read only the ``[meta]`` section.
103 - :func:`load_attributes` — read ``.museattributes`` from a repo root.
104 - :func:`resolve_strategy` — first-match strategy lookup.
105 """
106
107 from __future__ import annotations
108
109 import fnmatch
110 import logging
111 import pathlib
112 import tomllib
113 from dataclasses import dataclass, field
114 from typing import TypedDict
115
116 _logger = logging.getLogger(__name__)
117
118 VALID_STRATEGIES: frozenset[str] = frozenset(
119 {"ours", "theirs", "union", "base", "auto", "manual"}
120 )
121
122 _FILENAME = ".museattributes"
123
124
125 class AttributesMeta(TypedDict, total=False):
126 """Typed representation of the ``[meta]`` section in ``.museattributes``."""
127
128 domain: str
129
130
131 class AttributesRuleDict(TypedDict, total=False):
132 """Typed representation of a single ``[[rules]]`` entry.
133
134 ``path``, ``dimension``, and ``strategy`` are required at parse time.
135 ``comment`` and ``priority`` are optional.
136 """
137
138 path: str
139 dimension: str
140 strategy: str
141 comment: str
142 priority: int
143
144
145 class MuseAttributesFile(TypedDict, total=False):
146 """Typed representation of the complete ``.museattributes`` file."""
147
148 meta: AttributesMeta
149 rules: list[AttributesRuleDict]
150
151
152 @dataclass(frozen=True)
153 class AttributeRule:
154 """A single rule resolved from ``.museattributes``.
155
156 Attributes:
157 path_pattern: ``fnmatch`` glob matched against workspace-relative paths.
158 dimension: Domain axis name (e.g. ``"notes"``) or ``"*"``.
159 strategy: Resolution strategy: one of ``ours | theirs | union |
160 base | auto | manual``.
161 comment: Human-readable annotation explaining the rule's purpose.
162 Ignored at runtime.
163 priority: Ordering weight. Higher values are evaluated before
164 lower values. Rules with equal priority preserve
165 declaration order.
166 source_index: 0-based index of the rule in the ``[[rules]]`` array.
167 """
168
169 path_pattern: str
170 dimension: str
171 strategy: str
172 comment: str = ""
173 priority: int = 0
174 source_index: int = 0
175
176
177 def _parse_raw(root: pathlib.Path) -> MuseAttributesFile:
178 """Read and TOML-parse ``.museattributes``, returning a typed file structure.
179
180 Builds ``MuseAttributesFile`` from the raw TOML dict using explicit
181 ``isinstance`` checks — no ``Any`` propagated into the return value.
182
183 Raises:
184 ValueError: On TOML syntax errors.
185 """
186 attr_file = root / _FILENAME
187 raw_bytes = attr_file.read_bytes()
188 try:
189 raw = tomllib.loads(raw_bytes.decode("utf-8"))
190 except tomllib.TOMLDecodeError as exc:
191 raise ValueError(f"{_FILENAME}: TOML parse error — {exc}") from exc
192
193 result: MuseAttributesFile = {}
194
195 # [meta] section
196 meta_raw = raw.get("meta")
197 if isinstance(meta_raw, dict):
198 meta: AttributesMeta = {}
199 domain_val = meta_raw.get("domain")
200 if isinstance(domain_val, str):
201 meta["domain"] = domain_val
202 result["meta"] = meta
203
204 # [[rules]] array
205 rules_raw = raw.get("rules")
206 if isinstance(rules_raw, list):
207 rules: list[AttributesRuleDict] = []
208 for idx, entry in enumerate(rules_raw):
209 if not isinstance(entry, dict):
210 continue
211 path_val = entry.get("path")
212 dim_val = entry.get("dimension")
213 strat_val = entry.get("strategy")
214 if (
215 isinstance(path_val, str)
216 and isinstance(dim_val, str)
217 and isinstance(strat_val, str)
218 ):
219 rule: AttributesRuleDict = {
220 "path": path_val,
221 "dimension": dim_val,
222 "strategy": strat_val,
223 }
224 comment_val = entry.get("comment")
225 if isinstance(comment_val, str):
226 rule["comment"] = comment_val
227 priority_val = entry.get("priority")
228 if isinstance(priority_val, int):
229 rule["priority"] = priority_val
230 rules.append(rule)
231 else:
232 missing = [
233 f
234 for f, v in (
235 ("path", path_val),
236 ("dimension", dim_val),
237 ("strategy", strat_val),
238 )
239 if not isinstance(v, str)
240 ]
241 raise ValueError(
242 f"{_FILENAME}: rule[{idx}] is missing required field(s): "
243 + ", ".join(missing)
244 )
245 result["rules"] = rules
246
247 return result
248
249
250 def read_attributes_meta(root: pathlib.Path) -> AttributesMeta:
251 """Return the ``[meta]`` section of ``.museattributes``, or an empty dict.
252
253 Does not validate or resolve rules — use this to inspect metadata only.
254
255 Args:
256 root: Repository root directory.
257
258 Returns:
259 The ``[meta]`` TypedDict, which may be empty if the section is absent
260 or the file does not exist.
261 """
262 attr_file = root / _FILENAME
263 if not attr_file.exists():
264 return {}
265 try:
266 parsed = _parse_raw(root)
267 except ValueError:
268 return {}
269 meta = parsed.get("meta")
270 if meta is None:
271 return {}
272 return meta
273
274
275 def load_attributes(
276 root: pathlib.Path,
277 *,
278 domain: str | None = None,
279 ) -> list[AttributeRule]:
280 """Parse ``.museattributes`` from *root* and return the ordered rule list.
281
282 Rules are sorted by ``priority`` (descending) then by declaration order
283 (ascending), so higher-priority rules are evaluated first.
284
285 Args:
286 root: Repository root directory (the directory that contains ``.muse/``
287 and ``muse-work/``).
288 domain: Optional domain name from the active repository. When provided
289 and the file contains ``[meta] domain``, a mismatch logs a
290 warning. Pass ``None`` to skip domain validation.
291
292 Returns:
293 A list of :class:`AttributeRule` sorted by priority then file order.
294 Returns an empty list when ``.museattributes`` is absent or contains
295 no valid rules.
296
297 Raises:
298 ValueError: If a rule entry is missing required fields, or contains an
299 invalid strategy.
300 """
301 attr_file = root / _FILENAME
302 if not attr_file.exists():
303 return []
304
305 data = _parse_raw(root)
306
307 # Domain validation
308 meta = data.get("meta", {})
309 file_domain = meta.get("domain") if meta else None
310 if file_domain and domain and file_domain != domain:
311 _logger.warning(
312 "⚠️ %s: [meta] domain %r does not match active repo domain %r — "
313 "rules may target a different domain",
314 _FILENAME,
315 file_domain,
316 domain,
317 )
318
319 raw_rules = data.get("rules", [])
320
321 rules: list[AttributeRule] = []
322 for idx, entry in enumerate(raw_rules):
323 strategy = entry["strategy"]
324 if strategy not in VALID_STRATEGIES:
325 raise ValueError(
326 f"{_FILENAME}: rule[{idx}]: unknown strategy {strategy!r}. "
327 f"Valid strategies: {sorted(VALID_STRATEGIES)}"
328 )
329
330 rules.append(
331 AttributeRule(
332 path_pattern=entry["path"],
333 dimension=entry["dimension"],
334 strategy=strategy,
335 comment=entry.get("comment", ""),
336 priority=entry.get("priority", 0),
337 source_index=idx,
338 )
339 )
340
341 # Stable sort: higher priority first, ties preserve declaration order.
342 rules.sort(key=lambda r: -r.priority)
343 return rules
344
345
346 def resolve_strategy(
347 rules: list[AttributeRule],
348 path: str,
349 dimension: str = "*",
350 ) -> str:
351 """Return the first matching strategy for *path* and *dimension*.
352
353 Matching rules:
354
355 - **path**: ``fnmatch.fnmatch(path, rule.path_pattern)`` must be ``True``.
356 - **dimension**: ``rule.dimension`` must be ``"*"`` (matches anything) **or**
357 equal *dimension*.
358
359 First-match wins after priority ordering applied by :func:`load_attributes`.
360 Returns ``"auto"`` when no rule matches.
361
362 Args:
363 rules: Rule list from :func:`load_attributes`.
364 path: Workspace-relative POSIX path (e.g. ``"tracks/drums.mid"``).
365 dimension: Domain axis name or ``"*"`` to match any rule dimension.
366
367 Returns:
368 A strategy string: ``"ours"``, ``"theirs"``, ``"union"``, ``"base"``,
369 ``"auto"``, or ``"manual"``.
370 """
371 for rule in rules:
372 path_match = fnmatch.fnmatch(path, rule.path_pattern)
373 dim_match = (
374 rule.dimension == "*"
375 or rule.dimension == dimension
376 or dimension == "*"
377 )
378 if path_match and dim_match:
379 return rule.strategy
380 return "auto"