gabriel / muse public
test_cmd_content_grep.py python
245 lines 8.2 KB
86000da9 fix: replace typer CliRunner with argparse-compatible test helper Gabriel Cardona <gabriel@tellurstori.com> 1d ago
1 """Tests for ``muse content-grep``.
2
3 Covers: no match exit-1, pattern found, --files-only, --count, --ignore-case,
4 --format json, binary skip, multi-file, stress: 100 files.
5 """
6
7 from __future__ import annotations
8
9 import datetime
10 import hashlib
11 import json
12 import pathlib
13
14 import pytest
15 from tests.cli_test_helper import CliRunner
16
17 cli = None # argparse migration — CliRunner ignores this arg
18 from muse.core.object_store import write_object
19 from muse.core.snapshot import compute_snapshot_id
20 from muse.core.store import CommitRecord, SnapshotRecord, write_commit, write_snapshot
21
22 runner = CliRunner()
23
24 _REPO_ID = "cgrep-test"
25
26
27 # ---------------------------------------------------------------------------
28 # Helpers
29 # ---------------------------------------------------------------------------
30
31
32 def _sha(data: bytes) -> str:
33 return hashlib.sha256(data).hexdigest()
34
35
36 def _init_repo(path: pathlib.Path) -> pathlib.Path:
37 muse = path / ".muse"
38 for d in ("commits", "snapshots", "objects", "refs/heads"):
39 (muse / d).mkdir(parents=True, exist_ok=True)
40 (muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
41 (muse / "repo.json").write_text(
42 json.dumps({"repo_id": _REPO_ID, "domain": "midi"}), encoding="utf-8"
43 )
44 return path
45
46
47 def _env(repo: pathlib.Path) -> dict[str, str]:
48 return {"MUSE_REPO_ROOT": str(repo)}
49
50
51 _counter = 0
52
53
54 def _commit_files(root: pathlib.Path, files: dict[str, bytes]) -> str:
55 global _counter
56 _counter += 1
57 manifest: dict[str, str] = {}
58 for rel_path, content in files.items():
59 obj_id = _sha(content)
60 write_object(root, obj_id, content)
61 manifest[rel_path] = obj_id
62 snap_id = compute_snapshot_id(manifest)
63 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
64 committed_at = datetime.datetime.now(datetime.timezone.utc)
65 commit_id = _sha(f"{_counter}:{snap_id}:{committed_at.isoformat()}".encode())
66 write_commit(root, CommitRecord(
67 commit_id=commit_id,
68 repo_id=_REPO_ID,
69 branch="main",
70 snapshot_id=snap_id,
71 message=f"commit {_counter}",
72 committed_at=committed_at,
73 ))
74 (root / ".muse" / "refs" / "heads" / "main").write_text(commit_id, encoding="utf-8")
75 return commit_id
76
77
78 # ---------------------------------------------------------------------------
79 # Unit: help
80 # ---------------------------------------------------------------------------
81
82
83 def test_content_grep_help() -> None:
84 result = runner.invoke(cli, ["content-grep", "--help"])
85 assert result.exit_code == 0
86 assert "--pattern" in result.output or "-p" in result.output
87
88
89 # ---------------------------------------------------------------------------
90 # Unit: no match → exit 1
91 # ---------------------------------------------------------------------------
92
93
94 def test_content_grep_no_match(tmp_path: pathlib.Path) -> None:
95 _init_repo(tmp_path)
96 _commit_files(tmp_path, {"song.txt": b"chord: Am\ntempo: 120\n"})
97 result = runner.invoke(cli, ["content-grep", "--pattern", "ZZZNOMATCH"], env=_env(tmp_path))
98 assert result.exit_code != 0
99
100
101 # ---------------------------------------------------------------------------
102 # Unit: match found → exit 0
103 # ---------------------------------------------------------------------------
104
105
106 def test_content_grep_match_found(tmp_path: pathlib.Path) -> None:
107 _init_repo(tmp_path)
108 _commit_files(tmp_path, {"song.txt": b"chord: Cm7\ntempo: 120\n"})
109 result = runner.invoke(cli, ["content-grep", "--pattern", "Cm7"], env=_env(tmp_path))
110 assert result.exit_code == 0
111 assert "song.txt" in result.output
112
113
114 # ---------------------------------------------------------------------------
115 # Unit: --ignore-case
116 # ---------------------------------------------------------------------------
117
118
119 def test_content_grep_ignore_case(tmp_path: pathlib.Path) -> None:
120 _init_repo(tmp_path)
121 _commit_files(tmp_path, {"notes.txt": b"VERSE: intro melody\n"})
122 result = runner.invoke(
123 cli, ["content-grep", "--pattern", "verse", "--ignore-case"], env=_env(tmp_path)
124 )
125 assert result.exit_code == 0
126 assert "notes.txt" in result.output
127
128
129 def test_content_grep_case_sensitive_no_match(tmp_path: pathlib.Path) -> None:
130 _init_repo(tmp_path)
131 _commit_files(tmp_path, {"notes.txt": b"VERSE: intro melody\n"})
132 result = runner.invoke(
133 cli, ["content-grep", "--pattern", "verse"], env=_env(tmp_path)
134 )
135 # Case-sensitive: "verse" ≠ "VERSE" → no match.
136 assert result.exit_code != 0
137
138
139 # ---------------------------------------------------------------------------
140 # Unit: --files-only
141 # ---------------------------------------------------------------------------
142
143
144 def test_content_grep_files_only(tmp_path: pathlib.Path) -> None:
145 _init_repo(tmp_path)
146 _commit_files(tmp_path, {
147 "a.txt": b"match here\n",
148 "b.txt": b"match here too\n",
149 })
150 result = runner.invoke(
151 cli, ["content-grep", "--pattern", "match", "--files-only"], env=_env(tmp_path)
152 )
153 assert result.exit_code == 0
154 lines = [l.strip() for l in result.output.strip().split("\n") if l.strip()]
155 for line in lines:
156 assert ":" not in line or line.startswith("a.txt") or line.startswith("b.txt")
157
158
159 # ---------------------------------------------------------------------------
160 # Unit: --count
161 # ---------------------------------------------------------------------------
162
163
164 def test_content_grep_count(tmp_path: pathlib.Path) -> None:
165 _init_repo(tmp_path)
166 _commit_files(tmp_path, {"multi.txt": b"hit\nhit\nhit\nmiss\n"})
167 result = runner.invoke(
168 cli, ["content-grep", "--pattern", "hit", "--count"], env=_env(tmp_path)
169 )
170 assert result.exit_code == 0
171 assert "3" in result.output
172
173
174 # ---------------------------------------------------------------------------
175 # Unit: --format json
176 # ---------------------------------------------------------------------------
177
178
179 def test_content_grep_json_output(tmp_path: pathlib.Path) -> None:
180 _init_repo(tmp_path)
181 _commit_files(tmp_path, {"song.midi.txt": b"note: C4\nnote: D4\n"})
182 result = runner.invoke(
183 cli, ["content-grep", "--pattern", "note", "--format", "json"], env=_env(tmp_path)
184 )
185 assert result.exit_code == 0
186 data = json.loads(result.output)
187 assert isinstance(data, list)
188 assert len(data) >= 1
189 assert data[0]["match_count"] >= 2
190
191
192 # ---------------------------------------------------------------------------
193 # Unit: binary file skipped silently
194 # ---------------------------------------------------------------------------
195
196
197 def test_content_grep_binary_skipped(tmp_path: pathlib.Path) -> None:
198 _init_repo(tmp_path)
199 binary_content = b"\x00\x01\x02\x03" * 100
200 text_content = b"searchable text here\n"
201 _commit_files(tmp_path, {
202 "binary.bin": binary_content,
203 "text.txt": text_content,
204 })
205 result = runner.invoke(
206 cli, ["content-grep", "--pattern", "searchable"], env=_env(tmp_path)
207 )
208 assert result.exit_code == 0
209 assert "text.txt" in result.output
210
211
212 # ---------------------------------------------------------------------------
213 # Unit: short flags work
214 # ---------------------------------------------------------------------------
215
216
217 def test_content_grep_short_flags(tmp_path: pathlib.Path) -> None:
218 _init_repo(tmp_path)
219 _commit_files(tmp_path, {"f.txt": b"hello world\n"})
220 result = runner.invoke(
221 cli, ["content-grep", "-p", "hello", "-i", "-f", "json"], env=_env(tmp_path)
222 )
223 assert result.exit_code == 0
224 data = json.loads(result.output)
225 assert len(data) >= 1
226
227
228 # ---------------------------------------------------------------------------
229 # Stress: 100 files, pattern matches 50
230 # ---------------------------------------------------------------------------
231
232
233 def test_content_grep_stress_100_files(tmp_path: pathlib.Path) -> None:
234 _init_repo(tmp_path)
235 files: dict[str, bytes] = {}
236 for i in range(100):
237 content = b"TARGET_LINE\n" if i % 2 == 0 else b"other content\n"
238 files[f"file_{i:04d}.txt"] = content
239 _commit_files(tmp_path, files)
240 result = runner.invoke(
241 cli, ["content-grep", "--pattern", "TARGET_LINE", "--format", "json"], env=_env(tmp_path)
242 )
243 assert result.exit_code == 0
244 data = json.loads(result.output)
245 assert len(data) == 50