cgcardona / muse public
ignore.py python
225 lines 8.2 KB
bda49bdb feat: redesign .museignore as TOML with domain-scoped sections (#100) Gabriel Cardona <cgcardona@gmail.com> 1d ago
1 """Muse ignore — ``.museignore`` TOML parser and workspace path filter.
2
3 ``.museignore`` uses TOML with two kinds of sections:
4
5 ``[global]``
6 Patterns applied to every domain. Evaluated first, in array order.
7
8 ``[domain.<name>]``
9 Patterns applied only when the active domain is *<name>*. Appended
10 after global patterns and evaluated in array order.
11
12 Pattern syntax (gitignore-compatible):
13
14 - A trailing ``/`` marks a directory pattern; it is never matched against
15 individual files (Muse VCS tracks files, not directories).
16 - A leading ``/`` **anchors** the pattern to the repository root, so
17 ``/tmp/*.mid`` matches only ``tmp/drums.mid`` and not ``cache/tmp/drums.mid``.
18 - A leading ``!`` **negates** a pattern: a path previously matched by an ignore
19 rule is un-ignored when it matches a subsequent negation rule.
20 - ``*`` matches any sequence of characters **except** a path separator (``/``).
21 - ``**`` matches any sequence of characters **including** path separators.
22 - All other characters are matched literally.
23
24 Rule evaluation
25 ---------------
26 Patterns are evaluated in the order they appear (global first, then
27 domain-specific). The **last matching rule wins**, mirroring gitignore
28 behaviour. A later ``!important.tmp`` overrides an earlier ``*.tmp`` for
29 that specific path.
30
31 Public API
32 ----------
33 - :func:`load_ignore_config` — parse ``.museignore`` → :data:`MuseIgnoreConfig`
34 - :func:`resolve_patterns` — flatten config to ``list[str]`` for a domain
35 - :func:`is_ignored` — test a relative POSIX path against a pattern list
36 """
37
38 from __future__ import annotations
39
40 import fnmatch
41 import pathlib
42 import tomllib
43 from typing import TypedDict
44
45 _FILENAME = ".museignore"
46
47
48 class DomainSection(TypedDict, total=False):
49 """Patterns for one ignore section (global or a named domain)."""
50
51 patterns: list[str]
52
53
54 # ``global`` is a Python keyword, so we use the functional TypedDict form.
55 MuseIgnoreConfig = TypedDict(
56 "MuseIgnoreConfig",
57 {
58 "global": DomainSection,
59 "domain": dict[str, DomainSection],
60 },
61 total=False,
62 )
63
64
65 def load_ignore_config(root: pathlib.Path) -> MuseIgnoreConfig:
66 """Read ``.museignore`` from *root* and return the parsed configuration.
67
68 Builds :data:`MuseIgnoreConfig` from the raw TOML dict using explicit
69 ``isinstance`` checks — no ``Any`` propagated into the return value.
70
71 Args:
72 root: Repository root directory (the directory that contains ``.muse/``
73 and ``muse-work/``). The ``.museignore`` file, if present, lives
74 directly inside *root*.
75
76 Returns:
77 A :data:`MuseIgnoreConfig` mapping. Both the ``"global"`` key and the
78 ``"domain"`` key are optional; use :func:`resolve_patterns` which
79 handles all missing-key cases. Returns an empty mapping when
80 ``.museignore`` is absent.
81
82 Raises:
83 ValueError: When ``.museignore`` exists but contains invalid TOML.
84 """
85 ignore_file = root / _FILENAME
86 if not ignore_file.exists():
87 return {}
88
89 raw_bytes = ignore_file.read_bytes()
90 try:
91 raw = tomllib.loads(raw_bytes.decode("utf-8"))
92 except tomllib.TOMLDecodeError as exc:
93 raise ValueError(f"{_FILENAME}: TOML parse error — {exc}") from exc
94
95 result: MuseIgnoreConfig = {}
96
97 # [global] section
98 global_raw = raw.get("global")
99 if isinstance(global_raw, dict):
100 global_section: DomainSection = {}
101 global_patterns_val = global_raw.get("patterns")
102 if isinstance(global_patterns_val, list):
103 global_section["patterns"] = [
104 p for p in global_patterns_val if isinstance(p, str)
105 ]
106 result["global"] = global_section
107
108 # [domain.*] sections — each key under [domain] is a domain name.
109 domain_raw = raw.get("domain")
110 if isinstance(domain_raw, dict):
111 domain_map: dict[str, DomainSection] = {}
112 for domain_name, domain_val in domain_raw.items():
113 if isinstance(domain_name, str) and isinstance(domain_val, dict):
114 section: DomainSection = {}
115 domain_patterns_val = domain_val.get("patterns")
116 if isinstance(domain_patterns_val, list):
117 section["patterns"] = [
118 p for p in domain_patterns_val if isinstance(p, str)
119 ]
120 domain_map[domain_name] = section
121 result["domain"] = domain_map
122
123 return result
124
125
126 def resolve_patterns(config: MuseIgnoreConfig, domain: str) -> list[str]:
127 """Flatten *config* into an ordered pattern list for *domain*.
128
129 Global patterns come first (in array order), followed by domain-specific
130 patterns. Patterns declared under any other domain are never included.
131
132 Args:
133 config: Parsed ignore configuration from :func:`load_ignore_config`.
134 domain: The active domain name, e.g. ``"music"`` or ``"code"``.
135
136 Returns:
137 Ordered ``list[str]`` of raw glob pattern strings. Returns an empty
138 list when *config* is empty or neither section contains patterns.
139 """
140 global_patterns: list[str] = []
141 if "global" in config:
142 global_section = config["global"]
143 if "patterns" in global_section:
144 global_patterns = global_section["patterns"]
145
146 domain_patterns: list[str] = []
147 if "domain" in config:
148 domain_map = config["domain"]
149 if domain in domain_map:
150 domain_section = domain_map[domain]
151 if "patterns" in domain_section:
152 domain_patterns = domain_section["patterns"]
153
154 return global_patterns + domain_patterns
155
156
157 def is_ignored(rel_posix: str, patterns: list[str]) -> bool:
158 """Return ``True`` if *rel_posix* should be excluded from the snapshot.
159
160 Args:
161 rel_posix: Workspace-relative POSIX path, e.g. ``"tracks/drums.mid"``.
162 patterns: Ordered pattern list from :func:`resolve_patterns`.
163
164 Returns:
165 ``True`` when the path is ignored, ``False`` otherwise. An empty
166 *patterns* list means nothing is ignored.
167
168 The last matching rule wins. A negation rule (``!pattern``) can un-ignore
169 a path that was matched by an earlier rule.
170
171 Directory-only patterns (trailing ``/``) are silently skipped because Muse
172 tracks files, not directories.
173 """
174 p = pathlib.PurePosixPath(rel_posix)
175 ignored = False
176 for pattern in patterns:
177 negate = pattern.startswith("!")
178 pat = pattern[1:] if negate else pattern
179
180 # Directory-only patterns never match files.
181 if pat.endswith("/"):
182 continue
183
184 if _matches(p, pat):
185 ignored = not negate
186 return ignored
187
188
189 def _matches(p: pathlib.PurePosixPath, pattern: str) -> bool:
190 """Test whether the path *p* matches *pattern*.
191
192 Implements gitignore path-matching semantics:
193
194 - **Anchored** (leading ``/``): the pattern is matched against the full
195 path from the root using :func:`fnmatch.fnmatch`. The leading slash is
196 stripped before matching.
197 - **Pattern with embedded ``/``**: matched against the full relative path
198 from the right using :meth:`pathlib.PurePosixPath.match`.
199 - **Pattern without ``/``**: matched against every trailing suffix of the
200 path (i.e. the filename, the filename plus its parent, etc.) so that
201 ``*.tmp`` matches ``drums.tmp`` *and* ``tracks/drums.tmp``.
202 """
203 # Anchored pattern: must match the full path from the root.
204 if pattern.startswith("/"):
205 return fnmatch.fnmatch(str(p), pattern[1:])
206
207 # Non-anchored pattern with an embedded slash: match from the right.
208 # PurePosixPath.match() handles ** natively in Python 3.12+, but a
209 # leading **/ does not always match zero path components in CPython 3.13
210 # (implementation gap). When the direct match fails, strip the leading
211 # **/ and retry — this makes "**/cache/*.dat" match "cache/index.dat".
212 if "/" in pattern:
213 if p.match(pattern):
214 return True
215 if pattern.startswith("**/"):
216 return p.match(pattern[3:])
217 return False
218
219 # Pattern without any slash: match against the filename or any suffix.
220 # e.g. "*.tmp" must match "drums.tmp" (top-level) and "tracks/drums.tmp".
221 for start in range(len(p.parts)):
222 sub = pathlib.PurePosixPath(*p.parts[start:])
223 if sub.match(pattern):
224 return True
225 return False