tests/test_stress_query_engine.py · cgcardona/muse

test_stress_query_engine.py python

354 lines 12.9 KB

119290fc Add mission-critical stress test suite (9 new files, 1716 tests total) (#76) Gabriel Cardona <cgcardona@gmail.com> 1d ago

1	"""Stress tests for the generic query engine and code query DSL.
2
3	Covers:
4	- walk_history on linear chains of 100+ commits.
5	- CommitEvaluator with correct 3-arg signature.
6	- format_matches output format.
7	- Code query DSL: all field types, all operators, AND/OR composition.
8	- Code query DSL: unknown field raises ValueError.
9	- Query against large history (200 commits).
10	- Branch-scoped queries.
11	"""
12	from __future__ import annotations
13
14	import datetime
15	import pathlib
16
17	import pytest
18
19	from muse.core.query_engine import CommitEvaluator, QueryMatch, format_matches, walk_history
20	from muse.core.store import CommitRecord, write_commit
21	from muse.domain import SemVerBump
22	from muse.plugins.code._code_query import build_evaluator
23
24
25	# ---------------------------------------------------------------------------
26	# Helpers
27	# ---------------------------------------------------------------------------
28
29
30	def _now() -> datetime.datetime:
31	return datetime.datetime.now(datetime.timezone.utc)
32
33
34	def _write(
35	root: pathlib.Path,
36	cid: str,
37	branch: str = "main",
38	parent: str \| None = None,
39	author: str = "alice",
40	agent_id: str = "",
41	model_id: str = "",
42	sem_ver_bump: SemVerBump = "none",
43	message: str = "",
44	) -> CommitRecord:
45	c = CommitRecord(
46	commit_id=cid,
47	repo_id="repo",
48	branch=branch,
49	snapshot_id=f"snap-{cid}",
50	message=message or f"commit {cid}",
51	committed_at=_now(),
52	parent_commit_id=parent,
53	author=author,
54	agent_id=agent_id,
55	model_id=model_id,
56	sem_ver_bump=sem_ver_bump,
57	)
58	write_commit(root, c)
59	ref = root / ".muse" / "refs" / "heads" / branch
60	ref.write_text(cid)
61	return c
62
63
64	def _make_match(commit: CommitRecord) -> QueryMatch:
65	return QueryMatch(
66	commit_id=commit.commit_id,
67	author=commit.author,
68	committed_at=commit.committed_at.isoformat(),
69	branch=commit.branch,
70	detail=f"matched commit {commit.commit_id}",
71	)
72
73
74	@pytest.fixture
75	def repo(tmp_path: pathlib.Path) -> pathlib.Path:
76	muse = tmp_path / ".muse"
77	(muse / "commits").mkdir(parents=True)
78	(muse / "refs" / "heads").mkdir(parents=True)
79	return tmp_path
80
81
82	# ===========================================================================
83	# walk_history — basic
84	# ===========================================================================
85
86
87	class TestWalkHistoryBasic:
88	def test_empty_history_no_matches(self, repo: pathlib.Path) -> None:
89	def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
90	return [_make_match(commit)]
91	result = walk_history(repo, "nonexistent-branch", ev)
92	assert result == []
93
94	def test_single_commit_matches(self, repo: pathlib.Path) -> None:
95	_write(repo, "only", branch="main")
96	def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
97	return [_make_match(commit)]
98	result = walk_history(repo, "main", ev)
99	assert len(result) == 1
100	assert result[0]["commit_id"] == "only"
101
102	def test_single_commit_no_match(self, repo: pathlib.Path) -> None:
103	_write(repo, "only", branch="main")
104	def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
105	return []
106	result = walk_history(repo, "main", ev)
107	assert result == []
108
109	def test_linear_chain_all_match(self, repo: pathlib.Path) -> None:
110	prev = None
111	for i in range(10):
112	cid = f"c{i:03d}"
113	_write(repo, cid, parent=prev)
114	prev = cid
115	def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
116	return [_make_match(commit)]
117	result = walk_history(repo, "main", ev)
118	assert len(result) == 10
119
120	def test_linear_chain_filtered(self, repo: pathlib.Path) -> None:
121	prev = None
122	for i in range(10):
123	cid = f"c{i:03d}"
124	author = "alice" if i % 2 == 0 else "bob"
125	_write(repo, cid, parent=prev, author=author)
126	prev = cid
127
128	def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
129	if commit.author == "alice":
130	return [_make_match(commit)]
131	return []
132
133	result = walk_history(repo, "main", ev)
134	assert len(result) == 5
135
136	def test_max_commits_limits_walk(self, repo: pathlib.Path) -> None:
137	prev = None
138	for i in range(50):
139	cid = f"c{i:03d}"
140	_write(repo, cid, parent=prev)
141	prev = cid
142	def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
143	return [_make_match(commit)]
144	result = walk_history(repo, "main", ev, max_commits=10)
145	assert len(result) == 10
146
147	def test_matches_include_commit_id_and_branch(self, repo: pathlib.Path) -> None:
148	_write(repo, "abc123", branch="main", author="alice")
149	def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
150	return [_make_match(commit)]
151	result = walk_history(repo, "main", ev)
152	assert result[0]["commit_id"] == "abc123"
153	assert result[0]["branch"] == "main"
154	assert result[0]["author"] == "alice"
155
156
157	# ===========================================================================
158	# walk_history — large history
159	# ===========================================================================
160
161
162	class TestWalkHistoryLarge:
163	def test_200_commit_chain_full_scan(self, repo: pathlib.Path) -> None:
164	prev = None
165	for i in range(200):
166	cid = f"large-{i:04d}"
167	_write(repo, cid, parent=prev, agent_id="bot" if i % 3 == 0 else "")
168	prev = cid
169
170	def bot_only(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
171	if commit.agent_id == "bot":
172	return [_make_match(commit)]
173	return []
174
175	result = walk_history(repo, "main", bot_only)
176	# 200 commits, every 3rd is bot: indices 0, 3, 6, ..., 198 → 67 commits.
177	assert len(result) == 67
178
179	def test_query_by_agent_across_100_commits(self, repo: pathlib.Path) -> None:
180	prev = None
181	for i in range(100):
182	cid = f"agent-test-{i:04d}"
183	agent = f"agent-{i % 5}"
184	_write(repo, cid, parent=prev, agent_id=agent)
185	prev = cid
186
187	def agent_0_only(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
188	if commit.agent_id == "agent-0":
189	return [_make_match(commit)]
190	return []
191
192	result = walk_history(repo, "main", agent_0_only)
193	assert len(result) == 20 # 100 / 5 = 20
194
195
196	# ===========================================================================
197	# format_matches
198	# ===========================================================================
199
200
201	class TestFormatMatches:
202	def test_empty_matches_produces_output(self) -> None:
203	out = format_matches([])
204	assert isinstance(out, str)
205
206	def test_single_match_includes_commit_id(self) -> None:
207	match = QueryMatch(
208	commit_id="abc12345",
209	branch="main",
210	author="alice",
211	committed_at=_now().isoformat(),
212	detail="test match",
213	)
214	out = format_matches([match])
215	assert "abc12345" in out
216
217	def test_multiple_matches_all_present(self) -> None:
218	matches = [
219	QueryMatch(
220	commit_id=f"id{i:04d}",
221	branch="main",
222	author="alice",
223	committed_at=_now().isoformat(),
224	detail="matched",
225	)
226	for i in range(5)
227	]
228	out = format_matches(matches)
229	for i in range(5):
230	assert f"id{i:04d}" in out
231
232
233	# ===========================================================================
234	# Code query DSL — build_evaluator
235	# ===========================================================================
236
237
238	class TestCodeQueryDSL:
239	# --- author field ---
240
241	def test_author_equals(self, repo: pathlib.Path) -> None:
242	_write(repo, "a1", author="alice")
243	_write(repo, "a2", author="bob", parent="a1")
244	evaluator = build_evaluator("author == 'alice'")
245	result = walk_history(repo, "main", evaluator)
246	assert any(m["commit_id"] == "a1" for m in result)
247	assert not any(m["commit_id"] == "a2" for m in result)
248
249	def test_author_not_equals(self, repo: pathlib.Path) -> None:
250	_write(repo, "b1", author="alice")
251	_write(repo, "b2", author="bob", parent="b1")
252	evaluator = build_evaluator("author != 'alice'")
253	result = walk_history(repo, "main", evaluator)
254	assert all(m["author"] != "alice" for m in result)
255
256	def test_author_contains(self, repo: pathlib.Path) -> None:
257	_write(repo, "c1", author="alice-smith")
258	_write(repo, "c2", author="bob-jones", parent="c1")
259	evaluator = build_evaluator("author contains 'alice'")
260	result = walk_history(repo, "main", evaluator)
261	assert len(result) == 1
262	assert "alice" in result[0]["author"]
263
264	def test_author_startswith(self, repo: pathlib.Path) -> None:
265	_write(repo, "d1", author="agent-claude")
266	_write(repo, "d2", author="human-alice", parent="d1")
267	evaluator = build_evaluator("author startswith 'agent'")
268	result = walk_history(repo, "main", evaluator)
269	assert len(result) == 1
270	assert result[0]["author"].startswith("agent")
271
272	# --- agent_id field ---
273
274	def test_agent_id_equals(self, repo: pathlib.Path) -> None:
275	_write(repo, "e1", agent_id="claude-v4")
276	_write(repo, "e2", agent_id="gpt-4o", parent="e1")
277	evaluator = build_evaluator("agent_id == 'claude-v4'")
278	result = walk_history(repo, "main", evaluator)
279	assert len(result) == 1
280	assert result[0]["commit_id"] == "e1"
281
282	# --- sem_ver_bump field ---
283
284	def test_sem_ver_bump_major(self, repo: pathlib.Path) -> None:
285	_write(repo, "f1", sem_ver_bump="major")
286	_write(repo, "f2", sem_ver_bump="minor", parent="f1")
287	_write(repo, "f3", sem_ver_bump="patch", parent="f2")
288	evaluator = build_evaluator("sem_ver_bump == 'major'")
289	result = walk_history(repo, "main", evaluator)
290	assert len(result) == 1
291
292	# --- model_id field ---
293
294	def test_model_id_contains(self, repo: pathlib.Path) -> None:
295	_write(repo, "g1", model_id="claude-3-5-sonnet-20241022")
296	_write(repo, "g2", model_id="gpt-4o-2024-08-06", parent="g1")
297	evaluator = build_evaluator("model_id contains 'claude'")
298	result = walk_history(repo, "main", evaluator)
299	assert len(result) == 1
300
301	# --- AND composition ---
302
303	def test_and_composition(self, repo: pathlib.Path) -> None:
304	_write(repo, "h1", author="alice", agent_id="bot-1")
305	_write(repo, "h2", author="alice", agent_id="bot-2", parent="h1")
306	_write(repo, "h3", author="bob", agent_id="bot-1", parent="h2")
307	evaluator = build_evaluator("author == 'alice' and agent_id == 'bot-1'")
308	result = walk_history(repo, "main", evaluator)
309	assert len(result) == 1
310	assert result[0]["commit_id"] == "h1"
311
312	# --- OR composition ---
313
314	def test_or_composition(self, repo: pathlib.Path) -> None:
315	_write(repo, "i1", author="alice")
316	_write(repo, "i2", author="bob", parent="i1")
317	_write(repo, "i3", author="charlie", parent="i2")
318	evaluator = build_evaluator("author == 'alice' or author == 'bob'")
319	result = walk_history(repo, "main", evaluator)
320	assert len(result) == 2
321
322	# --- complex nested AND OR ---
323
324	def test_complex_and_or(self, repo: pathlib.Path) -> None:
325	_write(repo, "j1", author="alice", sem_ver_bump="major")
326	_write(repo, "j2", author="bob", sem_ver_bump="minor", parent="j1")
327	_write(repo, "j3", author="alice", sem_ver_bump="patch", parent="j2")
328	evaluator = build_evaluator(
329	"sem_ver_bump == 'major' or sem_ver_bump == 'minor'"
330	)
331	result = walk_history(repo, "main", evaluator)
332	assert len(result) == 2
333
334	# --- error cases ---
335
336	def test_unknown_field_raises_value_error(self) -> None:
337	with pytest.raises(ValueError):
338	build_evaluator("unknown_field == 'something'")
339
340	def test_unknown_operator_raises_value_error(self) -> None:
341	with pytest.raises(ValueError):
342	build_evaluator("author REGEX 'alice'")
343
344	def test_empty_query_raises(self) -> None:
345	with pytest.raises((ValueError, IndexError)):
346	build_evaluator("")
347
348	# --- branch field ---
349
350	def test_branch_field_matches_correctly(self, repo: pathlib.Path) -> None:
351	_write(repo, "k1", branch="main", author="alice")
352	evaluator = build_evaluator("branch == 'main'")
353	result = walk_history(repo, "main", evaluator)
354	assert all(m["branch"] == "main" for m in result)

Content Address

Object ID (SHA-256)

16642799b11454418eeca67428ee94716218306b67df32af871eba7c08ab2bda

This file is immutable and content-addressed. The same SHA always refers to the same bytes, across every clone and every time.

File Info

Path tests/test_stress_query_engine.py

Lines 354

Size 12.9 KB

Language python

Ref 119290fc

Snapshot 8978358fc058…

Last Modified

119290fc

Add mission-critical stress test suite (9 new files, 1716 tests total) (#76)

Gabriel Cardona <cgcardona@gmail.com> 1d ago

View commit →

Links

Browse tree at 119290fc All commits View raw