tests/test_stress_query_engine.py · cgcardona/muse

test_stress_query_engine.py python

353 lines 12.9 KB

e6786943 feat: upgrade to Python 3.14, drop from __future__ import annotations Gabriel Cardona <cgcardona@gmail.com> 1d ago

1	"""Stress tests for the generic query engine and code query DSL.
2
3	Covers:
4	- walk_history on linear chains of 100+ commits.
5	- CommitEvaluator with correct 3-arg signature.
6	- format_matches output format.
7	- Code query DSL: all field types, all operators, AND/OR composition.
8	- Code query DSL: unknown field raises ValueError.
9	- Query against large history (200 commits).
10	- Branch-scoped queries.
11	"""
12
13	import datetime
14	import pathlib
15
16	import pytest
17
18	from muse.core.query_engine import CommitEvaluator, QueryMatch, format_matches, walk_history
19	from muse.core.store import CommitRecord, write_commit
20	from muse.domain import SemVerBump
21	from muse.plugins.code._code_query import build_evaluator
22
23
24	# ---------------------------------------------------------------------------
25	# Helpers
26	# ---------------------------------------------------------------------------
27
28
29	def _now() -> datetime.datetime:
30	return datetime.datetime.now(datetime.timezone.utc)
31
32
33	def _write(
34	root: pathlib.Path,
35	cid: str,
36	branch: str = "main",
37	parent: str \| None = None,
38	author: str = "alice",
39	agent_id: str = "",
40	model_id: str = "",
41	sem_ver_bump: SemVerBump = "none",
42	message: str = "",
43	) -> CommitRecord:
44	c = CommitRecord(
45	commit_id=cid,
46	repo_id="repo",
47	branch=branch,
48	snapshot_id=f"snap-{cid}",
49	message=message or f"commit {cid}",
50	committed_at=_now(),
51	parent_commit_id=parent,
52	author=author,
53	agent_id=agent_id,
54	model_id=model_id,
55	sem_ver_bump=sem_ver_bump,
56	)
57	write_commit(root, c)
58	ref = root / ".muse" / "refs" / "heads" / branch
59	ref.write_text(cid)
60	return c
61
62
63	def _make_match(commit: CommitRecord) -> QueryMatch:
64	return QueryMatch(
65	commit_id=commit.commit_id,
66	author=commit.author,
67	committed_at=commit.committed_at.isoformat(),
68	branch=commit.branch,
69	detail=f"matched commit {commit.commit_id}",
70	)
71
72
73	@pytest.fixture
74	def repo(tmp_path: pathlib.Path) -> pathlib.Path:
75	muse = tmp_path / ".muse"
76	(muse / "commits").mkdir(parents=True)
77	(muse / "refs" / "heads").mkdir(parents=True)
78	return tmp_path
79
80
81	# ===========================================================================
82	# walk_history — basic
83	# ===========================================================================
84
85
86	class TestWalkHistoryBasic:
87	def test_empty_history_no_matches(self, repo: pathlib.Path) -> None:
88	def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
89	return [_make_match(commit)]
90	result = walk_history(repo, "nonexistent-branch", ev)
91	assert result == []
92
93	def test_single_commit_matches(self, repo: pathlib.Path) -> None:
94	_write(repo, "only", branch="main")
95	def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
96	return [_make_match(commit)]
97	result = walk_history(repo, "main", ev)
98	assert len(result) == 1
99	assert result[0]["commit_id"] == "only"
100
101	def test_single_commit_no_match(self, repo: pathlib.Path) -> None:
102	_write(repo, "only", branch="main")
103	def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
104	return []
105	result = walk_history(repo, "main", ev)
106	assert result == []
107
108	def test_linear_chain_all_match(self, repo: pathlib.Path) -> None:
109	prev = None
110	for i in range(10):
111	cid = f"c{i:03d}"
112	_write(repo, cid, parent=prev)
113	prev = cid
114	def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
115	return [_make_match(commit)]
116	result = walk_history(repo, "main", ev)
117	assert len(result) == 10
118
119	def test_linear_chain_filtered(self, repo: pathlib.Path) -> None:
120	prev = None
121	for i in range(10):
122	cid = f"c{i:03d}"
123	author = "alice" if i % 2 == 0 else "bob"
124	_write(repo, cid, parent=prev, author=author)
125	prev = cid
126
127	def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
128	if commit.author == "alice":
129	return [_make_match(commit)]
130	return []
131
132	result = walk_history(repo, "main", ev)
133	assert len(result) == 5
134
135	def test_max_commits_limits_walk(self, repo: pathlib.Path) -> None:
136	prev = None
137	for i in range(50):
138	cid = f"c{i:03d}"
139	_write(repo, cid, parent=prev)
140	prev = cid
141	def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
142	return [_make_match(commit)]
143	result = walk_history(repo, "main", ev, max_commits=10)
144	assert len(result) == 10
145
146	def test_matches_include_commit_id_and_branch(self, repo: pathlib.Path) -> None:
147	_write(repo, "abc123", branch="main", author="alice")
148	def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
149	return [_make_match(commit)]
150	result = walk_history(repo, "main", ev)
151	assert result[0]["commit_id"] == "abc123"
152	assert result[0]["branch"] == "main"
153	assert result[0]["author"] == "alice"
154
155
156	# ===========================================================================
157	# walk_history — large history
158	# ===========================================================================
159
160
161	class TestWalkHistoryLarge:
162	def test_200_commit_chain_full_scan(self, repo: pathlib.Path) -> None:
163	prev = None
164	for i in range(200):
165	cid = f"large-{i:04d}"
166	_write(repo, cid, parent=prev, agent_id="bot" if i % 3 == 0 else "")
167	prev = cid
168
169	def bot_only(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
170	if commit.agent_id == "bot":
171	return [_make_match(commit)]
172	return []
173
174	result = walk_history(repo, "main", bot_only)
175	# 200 commits, every 3rd is bot: indices 0, 3, 6, ..., 198 → 67 commits.
176	assert len(result) == 67
177
178	def test_query_by_agent_across_100_commits(self, repo: pathlib.Path) -> None:
179	prev = None
180	for i in range(100):
181	cid = f"agent-test-{i:04d}"
182	agent = f"agent-{i % 5}"
183	_write(repo, cid, parent=prev, agent_id=agent)
184	prev = cid
185
186	def agent_0_only(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]:
187	if commit.agent_id == "agent-0":
188	return [_make_match(commit)]
189	return []
190
191	result = walk_history(repo, "main", agent_0_only)
192	assert len(result) == 20 # 100 / 5 = 20
193
194
195	# ===========================================================================
196	# format_matches
197	# ===========================================================================
198
199
200	class TestFormatMatches:
201	def test_empty_matches_produces_output(self) -> None:
202	out = format_matches([])
203	assert isinstance(out, str)
204
205	def test_single_match_includes_commit_id(self) -> None:
206	match = QueryMatch(
207	commit_id="abc12345",
208	branch="main",
209	author="alice",
210	committed_at=_now().isoformat(),
211	detail="test match",
212	)
213	out = format_matches([match])
214	assert "abc12345" in out
215
216	def test_multiple_matches_all_present(self) -> None:
217	matches = [
218	QueryMatch(
219	commit_id=f"id{i:04d}",
220	branch="main",
221	author="alice",
222	committed_at=_now().isoformat(),
223	detail="matched",
224	)
225	for i in range(5)
226	]
227	out = format_matches(matches)
228	for i in range(5):
229	assert f"id{i:04d}" in out
230
231
232	# ===========================================================================
233	# Code query DSL — build_evaluator
234	# ===========================================================================
235
236
237	class TestCodeQueryDSL:
238	# --- author field ---
239
240	def test_author_equals(self, repo: pathlib.Path) -> None:
241	_write(repo, "a1", author="alice")
242	_write(repo, "a2", author="bob", parent="a1")
243	evaluator = build_evaluator("author == 'alice'")
244	result = walk_history(repo, "main", evaluator)
245	assert any(m["commit_id"] == "a1" for m in result)
246	assert not any(m["commit_id"] == "a2" for m in result)
247
248	def test_author_not_equals(self, repo: pathlib.Path) -> None:
249	_write(repo, "b1", author="alice")
250	_write(repo, "b2", author="bob", parent="b1")
251	evaluator = build_evaluator("author != 'alice'")
252	result = walk_history(repo, "main", evaluator)
253	assert all(m["author"] != "alice" for m in result)
254
255	def test_author_contains(self, repo: pathlib.Path) -> None:
256	_write(repo, "c1", author="alice-smith")
257	_write(repo, "c2", author="bob-jones", parent="c1")
258	evaluator = build_evaluator("author contains 'alice'")
259	result = walk_history(repo, "main", evaluator)
260	assert len(result) == 1
261	assert "alice" in result[0]["author"]
262
263	def test_author_startswith(self, repo: pathlib.Path) -> None:
264	_write(repo, "d1", author="agent-claude")
265	_write(repo, "d2", author="human-alice", parent="d1")
266	evaluator = build_evaluator("author startswith 'agent'")
267	result = walk_history(repo, "main", evaluator)
268	assert len(result) == 1
269	assert result[0]["author"].startswith("agent")
270
271	# --- agent_id field ---
272
273	def test_agent_id_equals(self, repo: pathlib.Path) -> None:
274	_write(repo, "e1", agent_id="claude-v4")
275	_write(repo, "e2", agent_id="gpt-4o", parent="e1")
276	evaluator = build_evaluator("agent_id == 'claude-v4'")
277	result = walk_history(repo, "main", evaluator)
278	assert len(result) == 1
279	assert result[0]["commit_id"] == "e1"
280
281	# --- sem_ver_bump field ---
282
283	def test_sem_ver_bump_major(self, repo: pathlib.Path) -> None:
284	_write(repo, "f1", sem_ver_bump="major")
285	_write(repo, "f2", sem_ver_bump="minor", parent="f1")
286	_write(repo, "f3", sem_ver_bump="patch", parent="f2")
287	evaluator = build_evaluator("sem_ver_bump == 'major'")
288	result = walk_history(repo, "main", evaluator)
289	assert len(result) == 1
290
291	# --- model_id field ---
292
293	def test_model_id_contains(self, repo: pathlib.Path) -> None:
294	_write(repo, "g1", model_id="claude-3-5-sonnet-20241022")
295	_write(repo, "g2", model_id="gpt-4o-2024-08-06", parent="g1")
296	evaluator = build_evaluator("model_id contains 'claude'")
297	result = walk_history(repo, "main", evaluator)
298	assert len(result) == 1
299
300	# --- AND composition ---
301
302	def test_and_composition(self, repo: pathlib.Path) -> None:
303	_write(repo, "h1", author="alice", agent_id="bot-1")
304	_write(repo, "h2", author="alice", agent_id="bot-2", parent="h1")
305	_write(repo, "h3", author="bob", agent_id="bot-1", parent="h2")
306	evaluator = build_evaluator("author == 'alice' and agent_id == 'bot-1'")
307	result = walk_history(repo, "main", evaluator)
308	assert len(result) == 1
309	assert result[0]["commit_id"] == "h1"
310
311	# --- OR composition ---
312
313	def test_or_composition(self, repo: pathlib.Path) -> None:
314	_write(repo, "i1", author="alice")
315	_write(repo, "i2", author="bob", parent="i1")
316	_write(repo, "i3", author="charlie", parent="i2")
317	evaluator = build_evaluator("author == 'alice' or author == 'bob'")
318	result = walk_history(repo, "main", evaluator)
319	assert len(result) == 2
320
321	# --- complex nested AND OR ---
322
323	def test_complex_and_or(self, repo: pathlib.Path) -> None:
324	_write(repo, "j1", author="alice", sem_ver_bump="major")
325	_write(repo, "j2", author="bob", sem_ver_bump="minor", parent="j1")
326	_write(repo, "j3", author="alice", sem_ver_bump="patch", parent="j2")
327	evaluator = build_evaluator(
328	"sem_ver_bump == 'major' or sem_ver_bump == 'minor'"
329	)
330	result = walk_history(repo, "main", evaluator)
331	assert len(result) == 2
332
333	# --- error cases ---
334
335	def test_unknown_field_raises_value_error(self) -> None:
336	with pytest.raises(ValueError):
337	build_evaluator("unknown_field == 'something'")
338
339	def test_unknown_operator_raises_value_error(self) -> None:
340	with pytest.raises(ValueError):
341	build_evaluator("author REGEX 'alice'")
342
343	def test_empty_query_raises(self) -> None:
344	with pytest.raises((ValueError, IndexError)):
345	build_evaluator("")
346
347	# --- branch field ---
348
349	def test_branch_field_matches_correctly(self, repo: pathlib.Path) -> None:
350	_write(repo, "k1", branch="main", author="alice")
351	evaluator = build_evaluator("branch == 'main'")
352	result = walk_history(repo, "main", evaluator)
353	assert all(m["branch"] == "main" for m in result)

Content Address

Object ID (SHA-256)

06b12a94c3045ce6e2bdd0aa050e056ccd6e180f5e29dba2cddbd40d2335f003

This file is immutable and content-addressed. The same SHA always refers to the same bytes, across every clone and every time.

File Info

Path tests/test_stress_query_engine.py

Lines 353

Size 12.9 KB

Language python

Ref e6786943

Snapshot 6aec3b5c9ccb…

Last Modified

e6786943

feat: upgrade to Python 3.14, drop from __future__ import annotations

Gabriel Cardona <cgcardona@gmail.com> 1d ago

View commit →

Links

Browse tree at e6786943 All commits View raw