cgcardona / muse public
test_stress_query_engine.py python
353 lines 12.9 KB
e6786943 feat: upgrade to Python 3.14, drop from __future__ import annotations Gabriel Cardona <cgcardona@gmail.com> 1d ago
1 """Stress tests for the generic query engine and code query DSL.
2
3 Covers:
4 - walk_history on linear chains of 100+ commits.
5 - CommitEvaluator with correct 3-arg signature.
6 - format_matches output format.
7 - Code query DSL: all field types, all operators, AND/OR composition.
8 - Code query DSL: unknown field raises ValueError.
9 - Query against large history (200 commits).
10 - Branch-scoped queries.
11 """
12
13 import datetime
14 import pathlib
15
16 import pytest
17
18 from muse.core.query_engine import CommitEvaluator, QueryMatch, format_matches, walk_history
19 from muse.core.store import CommitRecord, write_commit
20 from muse.domain import SemVerBump
21 from muse.plugins.code._code_query import build_evaluator
22
23
24 # ---------------------------------------------------------------------------
25 # Helpers
26 # ---------------------------------------------------------------------------
27
28
29 def _now() -> datetime.datetime:
30 return datetime.datetime.now(datetime.timezone.utc)
31
32
33 def _write(
34 root: pathlib.Path,
35 cid: str,
36 branch: str = "main",
37 parent: str | None = None,
38 author: str = "alice",
39 agent_id: str = "",
40 model_id: str = "",
41 sem_ver_bump: SemVerBump = "none",
42 message: str = "",
43 ) -> CommitRecord:
44 c = CommitRecord(
45 commit_id=cid,
46 repo_id="repo",
47 branch=branch,
48 snapshot_id=f"snap-{cid}",
49 message=message or f"commit {cid}",
50 committed_at=_now(),
51 parent_commit_id=parent,
52 author=author,
53 agent_id=agent_id,
54 model_id=model_id,
55 sem_ver_bump=sem_ver_bump,
56 )
57 write_commit(root, c)
58 ref = root / ".muse" / "refs" / "heads" / branch
59 ref.write_text(cid)
60 return c
61
62
63 def _make_match(commit: CommitRecord) -> QueryMatch:
64 return QueryMatch(
65 commit_id=commit.commit_id,
66 author=commit.author,
67 committed_at=commit.committed_at.isoformat(),
68 branch=commit.branch,
69 detail=f"matched commit {commit.commit_id}",
70 )
71
72
73 @pytest.fixture
74 def repo(tmp_path: pathlib.Path) -> pathlib.Path:
75 muse = tmp_path / ".muse"
76 (muse / "commits").mkdir(parents=True)
77 (muse / "refs" / "heads").mkdir(parents=True)
78 return tmp_path
79
80
81 # ===========================================================================
82 # walk_history — basic
83 # ===========================================================================
84
85
86 class TestWalkHistoryBasic:
87 def test_empty_history_no_matches(self, repo: pathlib.Path) -> None:
88 def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
89 return [_make_match(commit)]
90 result = walk_history(repo, "nonexistent-branch", ev)
91 assert result == []
92
93 def test_single_commit_matches(self, repo: pathlib.Path) -> None:
94 _write(repo, "only", branch="main")
95 def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
96 return [_make_match(commit)]
97 result = walk_history(repo, "main", ev)
98 assert len(result) == 1
99 assert result[0]["commit_id"] == "only"
100
101 def test_single_commit_no_match(self, repo: pathlib.Path) -> None:
102 _write(repo, "only", branch="main")
103 def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
104 return []
105 result = walk_history(repo, "main", ev)
106 assert result == []
107
108 def test_linear_chain_all_match(self, repo: pathlib.Path) -> None:
109 prev = None
110 for i in range(10):
111 cid = f"c{i:03d}"
112 _write(repo, cid, parent=prev)
113 prev = cid
114 def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
115 return [_make_match(commit)]
116 result = walk_history(repo, "main", ev)
117 assert len(result) == 10
118
119 def test_linear_chain_filtered(self, repo: pathlib.Path) -> None:
120 prev = None
121 for i in range(10):
122 cid = f"c{i:03d}"
123 author = "alice" if i % 2 == 0 else "bob"
124 _write(repo, cid, parent=prev, author=author)
125 prev = cid
126
127 def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
128 if commit.author == "alice":
129 return [_make_match(commit)]
130 return []
131
132 result = walk_history(repo, "main", ev)
133 assert len(result) == 5
134
135 def test_max_commits_limits_walk(self, repo: pathlib.Path) -> None:
136 prev = None
137 for i in range(50):
138 cid = f"c{i:03d}"
139 _write(repo, cid, parent=prev)
140 prev = cid
141 def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
142 return [_make_match(commit)]
143 result = walk_history(repo, "main", ev, max_commits=10)
144 assert len(result) == 10
145
146 def test_matches_include_commit_id_and_branch(self, repo: pathlib.Path) -> None:
147 _write(repo, "abc123", branch="main", author="alice")
148 def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
149 return [_make_match(commit)]
150 result = walk_history(repo, "main", ev)
151 assert result[0]["commit_id"] == "abc123"
152 assert result[0]["branch"] == "main"
153 assert result[0]["author"] == "alice"
154
155
156 # ===========================================================================
157 # walk_history — large history
158 # ===========================================================================
159
160
161 class TestWalkHistoryLarge:
162 def test_200_commit_chain_full_scan(self, repo: pathlib.Path) -> None:
163 prev = None
164 for i in range(200):
165 cid = f"large-{i:04d}"
166 _write(repo, cid, parent=prev, agent_id="bot" if i % 3 == 0 else "")
167 prev = cid
168
169 def bot_only(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
170 if commit.agent_id == "bot":
171 return [_make_match(commit)]
172 return []
173
174 result = walk_history(repo, "main", bot_only)
175 # 200 commits, every 3rd is bot: indices 0, 3, 6, ..., 198 → 67 commits.
176 assert len(result) == 67
177
178 def test_query_by_agent_across_100_commits(self, repo: pathlib.Path) -> None:
179 prev = None
180 for i in range(100):
181 cid = f"agent-test-{i:04d}"
182 agent = f"agent-{i % 5}"
183 _write(repo, cid, parent=prev, agent_id=agent)
184 prev = cid
185
186 def agent_0_only(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
187 if commit.agent_id == "agent-0":
188 return [_make_match(commit)]
189 return []
190
191 result = walk_history(repo, "main", agent_0_only)
192 assert len(result) == 20 # 100 / 5 = 20
193
194
195 # ===========================================================================
196 # format_matches
197 # ===========================================================================
198
199
200 class TestFormatMatches:
201 def test_empty_matches_produces_output(self) -> None:
202 out = format_matches([])
203 assert isinstance(out, str)
204
205 def test_single_match_includes_commit_id(self) -> None:
206 match = QueryMatch(
207 commit_id="abc12345",
208 branch="main",
209 author="alice",
210 committed_at=_now().isoformat(),
211 detail="test match",
212 )
213 out = format_matches([match])
214 assert "abc12345" in out
215
216 def test_multiple_matches_all_present(self) -> None:
217 matches = [
218 QueryMatch(
219 commit_id=f"id{i:04d}",
220 branch="main",
221 author="alice",
222 committed_at=_now().isoformat(),
223 detail="matched",
224 )
225 for i in range(5)
226 ]
227 out = format_matches(matches)
228 for i in range(5):
229 assert f"id{i:04d}" in out
230
231
232 # ===========================================================================
233 # Code query DSL — build_evaluator
234 # ===========================================================================
235
236
237 class TestCodeQueryDSL:
238 # --- author field ---
239
240 def test_author_equals(self, repo: pathlib.Path) -> None:
241 _write(repo, "a1", author="alice")
242 _write(repo, "a2", author="bob", parent="a1")
243 evaluator = build_evaluator("author == 'alice'")
244 result = walk_history(repo, "main", evaluator)
245 assert any(m["commit_id"] == "a1" for m in result)
246 assert not any(m["commit_id"] == "a2" for m in result)
247
248 def test_author_not_equals(self, repo: pathlib.Path) -> None:
249 _write(repo, "b1", author="alice")
250 _write(repo, "b2", author="bob", parent="b1")
251 evaluator = build_evaluator("author != 'alice'")
252 result = walk_history(repo, "main", evaluator)
253 assert all(m["author"] != "alice" for m in result)
254
255 def test_author_contains(self, repo: pathlib.Path) -> None:
256 _write(repo, "c1", author="alice-smith")
257 _write(repo, "c2", author="bob-jones", parent="c1")
258 evaluator = build_evaluator("author contains 'alice'")
259 result = walk_history(repo, "main", evaluator)
260 assert len(result) == 1
261 assert "alice" in result[0]["author"]
262
263 def test_author_startswith(self, repo: pathlib.Path) -> None:
264 _write(repo, "d1", author="agent-claude")
265 _write(repo, "d2", author="human-alice", parent="d1")
266 evaluator = build_evaluator("author startswith 'agent'")
267 result = walk_history(repo, "main", evaluator)
268 assert len(result) == 1
269 assert result[0]["author"].startswith("agent")
270
271 # --- agent_id field ---
272
273 def test_agent_id_equals(self, repo: pathlib.Path) -> None:
274 _write(repo, "e1", agent_id="claude-v4")
275 _write(repo, "e2", agent_id="gpt-4o", parent="e1")
276 evaluator = build_evaluator("agent_id == 'claude-v4'")
277 result = walk_history(repo, "main", evaluator)
278 assert len(result) == 1
279 assert result[0]["commit_id"] == "e1"
280
281 # --- sem_ver_bump field ---
282
283 def test_sem_ver_bump_major(self, repo: pathlib.Path) -> None:
284 _write(repo, "f1", sem_ver_bump="major")
285 _write(repo, "f2", sem_ver_bump="minor", parent="f1")
286 _write(repo, "f3", sem_ver_bump="patch", parent="f2")
287 evaluator = build_evaluator("sem_ver_bump == 'major'")
288 result = walk_history(repo, "main", evaluator)
289 assert len(result) == 1
290
291 # --- model_id field ---
292
293 def test_model_id_contains(self, repo: pathlib.Path) -> None:
294 _write(repo, "g1", model_id="claude-3-5-sonnet-20241022")
295 _write(repo, "g2", model_id="gpt-4o-2024-08-06", parent="g1")
296 evaluator = build_evaluator("model_id contains 'claude'")
297 result = walk_history(repo, "main", evaluator)
298 assert len(result) == 1
299
300 # --- AND composition ---
301
302 def test_and_composition(self, repo: pathlib.Path) -> None:
303 _write(repo, "h1", author="alice", agent_id="bot-1")
304 _write(repo, "h2", author="alice", agent_id="bot-2", parent="h1")
305 _write(repo, "h3", author="bob", agent_id="bot-1", parent="h2")
306 evaluator = build_evaluator("author == 'alice' and agent_id == 'bot-1'")
307 result = walk_history(repo, "main", evaluator)
308 assert len(result) == 1
309 assert result[0]["commit_id"] == "h1"
310
311 # --- OR composition ---
312
313 def test_or_composition(self, repo: pathlib.Path) -> None:
314 _write(repo, "i1", author="alice")
315 _write(repo, "i2", author="bob", parent="i1")
316 _write(repo, "i3", author="charlie", parent="i2")
317 evaluator = build_evaluator("author == 'alice' or author == 'bob'")
318 result = walk_history(repo, "main", evaluator)
319 assert len(result) == 2
320
321 # --- complex nested AND OR ---
322
323 def test_complex_and_or(self, repo: pathlib.Path) -> None:
324 _write(repo, "j1", author="alice", sem_ver_bump="major")
325 _write(repo, "j2", author="bob", sem_ver_bump="minor", parent="j1")
326 _write(repo, "j3", author="alice", sem_ver_bump="patch", parent="j2")
327 evaluator = build_evaluator(
328 "sem_ver_bump == 'major' or sem_ver_bump == 'minor'"
329 )
330 result = walk_history(repo, "main", evaluator)
331 assert len(result) == 2
332
333 # --- error cases ---
334
335 def test_unknown_field_raises_value_error(self) -> None:
336 with pytest.raises(ValueError):
337 build_evaluator("unknown_field == 'something'")
338
339 def test_unknown_operator_raises_value_error(self) -> None:
340 with pytest.raises(ValueError):
341 build_evaluator("author REGEX 'alice'")
342
343 def test_empty_query_raises(self) -> None:
344 with pytest.raises((ValueError, IndexError)):
345 build_evaluator("")
346
347 # --- branch field ---
348
349 def test_branch_field_matches_correctly(self, repo: pathlib.Path) -> None:
350 _write(repo, "k1", branch="main", author="alice")
351 evaluator = build_evaluator("branch == 'main'")
352 result = walk_history(repo, "main", evaluator)
353 assert all(m["branch"] == "main" for m in result)