gabriel / muse public
ignore.py python
126 lines 4.9 KB
5f1a074d feat: implement .museignore — gitignore-style snapshot exclusion (#7) Gabriel Cardona <cgcardona@gmail.com> 7d ago
1 """Muse ignore — ``.museignore`` parser and workspace path filter.
2
3 ``.museignore`` uses the same syntax as ``.gitignore``:
4
5 - Lines beginning with ``#`` are comments and are ignored.
6 - Blank lines are ignored.
7 - A trailing ``/`` marks a directory pattern; it is never matched against
8 individual files (Muse VCS tracks files, not directories).
9 - A leading ``/`` **anchors** the pattern to the repository root, so
10 ``/tmp/*.mid`` matches only ``tmp/drums.mid`` and not ``cache/tmp/drums.mid``.
11 - A leading ``!`` **negates** a pattern: a path that was previously matched by
12 an ignore rule is un-ignored if it matches a subsequent negation rule.
13 - ``*`` matches any sequence of characters **except** a path separator (``/``).
14 - ``**`` matches any sequence of characters **including** path separators.
15 - All other characters are matched literally.
16
17 Rule evaluation
18 ---------------
19 Rules are evaluated top-to-bottom. The **last matching rule wins**. This
20 mirrors gitignore behaviour: a later ``!important.tmp`` overrides an earlier
21 ``*.tmp`` for that specific path.
22
23 Public API
24 ----------
25 - :func:`load_patterns` — parse ``.museignore`` → ``list[str]``
26 - :func:`is_ignored` — test a relative POSIX path against a pattern list
27 """
28 from __future__ import annotations
29
30 import fnmatch
31 import pathlib
32
33
34 def load_patterns(root: pathlib.Path) -> list[str]:
35 """Read ``.museignore`` from *root* and return the non-empty, non-comment lines.
36
37 Args:
38 root: Repository root directory (the directory that contains ``.muse/``
39 and ``muse-work/``). The ``.museignore`` file, if present, lives
40 directly inside *root*.
41
42 Returns:
43 A list of raw pattern strings in file order. Blank lines and lines
44 starting with ``#`` are excluded. Returns an empty list when
45 ``.museignore`` is absent.
46 """
47 ignore_file = root / ".museignore"
48 if not ignore_file.exists():
49 return []
50 patterns: list[str] = []
51 for line in ignore_file.read_text(encoding="utf-8").splitlines():
52 stripped = line.strip()
53 if stripped and not stripped.startswith("#"):
54 patterns.append(stripped)
55 return patterns
56
57
58 def is_ignored(rel_posix: str, patterns: list[str]) -> bool:
59 """Return ``True`` if *rel_posix* should be excluded from the snapshot.
60
61 Args:
62 rel_posix: Workspace-relative POSIX path, e.g. ``"tracks/drums.mid"``.
63 patterns: Pattern list returned by :func:`load_patterns`.
64
65 Returns:
66 ``True`` when the path is ignored, ``False`` otherwise. An empty
67 *patterns* list means nothing is ignored.
68
69 The last matching rule wins. A negation rule (``!pattern``) can un-ignore
70 a path that was matched by an earlier rule.
71
72 Directory-only patterns (trailing ``/``) are silently skipped because Muse
73 tracks files, not directories.
74 """
75 p = pathlib.PurePosixPath(rel_posix)
76 ignored = False
77 for pattern in patterns:
78 negate = pattern.startswith("!")
79 pat = pattern[1:] if negate else pattern
80
81 # Directory-only patterns never match files.
82 if pat.endswith("/"):
83 continue
84
85 if _matches(p, pat):
86 ignored = not negate
87 return ignored
88
89
90 def _matches(p: pathlib.PurePosixPath, pattern: str) -> bool:
91 """Test whether the path *p* matches *pattern*.
92
93 Implements gitignore path-matching semantics:
94
95 - **Anchored** (leading ``/``): the pattern is matched against the full
96 path from the root using :func:`fnmatch.fnmatch`. The leading slash is
97 stripped before matching.
98 - **Pattern with embedded ``/``**: matched against the full relative path
99 from the right using :meth:`pathlib.PurePosixPath.match`.
100 - **Pattern without ``/``**: matched against every trailing suffix of the
101 path (i.e. the filename, the filename plus its parent, etc.) so that
102 ``*.tmp`` matches ``drums.tmp`` *and* ``tracks/drums.tmp``.
103 """
104 # Anchored pattern: must match the full path from the root.
105 if pattern.startswith("/"):
106 return fnmatch.fnmatch(str(p), pattern[1:])
107
108 # Non-anchored pattern with an embedded slash: match from the right.
109 # PurePosixPath.match() handles ** natively in Python 3.12+, but a
110 # leading **/ does not always match zero path components in CPython 3.13
111 # (implementation gap). When the direct match fails, strip the leading
112 # **/ and retry — this makes "**/cache/*.dat" match "cache/index.dat".
113 if "/" in pattern:
114 if p.match(pattern):
115 return True
116 if pattern.startswith("**/"):
117 return p.match(pattern[3:])
118 return False
119
120 # Pattern without any slash: match against the filename or any suffix.
121 # e.g. "*.tmp" must match "drums.tmp" (top-level) and "tracks/drums.tmp".
122 for start in range(len(p.parts)):
123 sub = pathlib.PurePosixPath(*p.parts[start:])
124 if sub.match(pattern):
125 return True
126 return False