test_stress_query_engine.py
python
| 1 | """Stress tests for the generic query engine and code query DSL. |
| 2 | |
| 3 | Covers: |
| 4 | - walk_history on linear chains of 100+ commits. |
| 5 | - CommitEvaluator with correct 3-arg signature. |
| 6 | - format_matches output format. |
| 7 | - Code query DSL: all field types, all operators, AND/OR composition. |
| 8 | - Code query DSL: unknown field raises ValueError. |
| 9 | - Query against large history (200 commits). |
| 10 | - Branch-scoped queries. |
| 11 | """ |
| 12 | from __future__ import annotations |
| 13 | |
| 14 | import datetime |
| 15 | import pathlib |
| 16 | |
| 17 | import pytest |
| 18 | |
| 19 | from muse.core.query_engine import CommitEvaluator, QueryMatch, format_matches, walk_history |
| 20 | from muse.core.store import CommitRecord, write_commit |
| 21 | from muse.domain import SemVerBump |
| 22 | from muse.plugins.code._code_query import build_evaluator |
| 23 | |
| 24 | |
| 25 | # --------------------------------------------------------------------------- |
| 26 | # Helpers |
| 27 | # --------------------------------------------------------------------------- |
| 28 | |
| 29 | |
| 30 | def _now() -> datetime.datetime: |
| 31 | return datetime.datetime.now(datetime.timezone.utc) |
| 32 | |
| 33 | |
| 34 | def _write( |
| 35 | root: pathlib.Path, |
| 36 | cid: str, |
| 37 | branch: str = "main", |
| 38 | parent: str | None = None, |
| 39 | author: str = "alice", |
| 40 | agent_id: str = "", |
| 41 | model_id: str = "", |
| 42 | sem_ver_bump: SemVerBump = "none", |
| 43 | message: str = "", |
| 44 | ) -> CommitRecord: |
| 45 | c = CommitRecord( |
| 46 | commit_id=cid, |
| 47 | repo_id="repo", |
| 48 | branch=branch, |
| 49 | snapshot_id=f"snap-{cid}", |
| 50 | message=message or f"commit {cid}", |
| 51 | committed_at=_now(), |
| 52 | parent_commit_id=parent, |
| 53 | author=author, |
| 54 | agent_id=agent_id, |
| 55 | model_id=model_id, |
| 56 | sem_ver_bump=sem_ver_bump, |
| 57 | ) |
| 58 | write_commit(root, c) |
| 59 | ref = root / ".muse" / "refs" / "heads" / branch |
| 60 | ref.write_text(cid) |
| 61 | return c |
| 62 | |
| 63 | |
| 64 | def _make_match(commit: CommitRecord) -> QueryMatch: |
| 65 | return QueryMatch( |
| 66 | commit_id=commit.commit_id, |
| 67 | author=commit.author, |
| 68 | committed_at=commit.committed_at.isoformat(), |
| 69 | branch=commit.branch, |
| 70 | detail=f"matched commit {commit.commit_id}", |
| 71 | ) |
| 72 | |
| 73 | |
| 74 | @pytest.fixture |
| 75 | def repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 76 | muse = tmp_path / ".muse" |
| 77 | (muse / "commits").mkdir(parents=True) |
| 78 | (muse / "refs" / "heads").mkdir(parents=True) |
| 79 | return tmp_path |
| 80 | |
| 81 | |
| 82 | # =========================================================================== |
| 83 | # walk_history — basic |
| 84 | # =========================================================================== |
| 85 | |
| 86 | |
| 87 | class TestWalkHistoryBasic: |
| 88 | def test_empty_history_no_matches(self, repo: pathlib.Path) -> None: |
| 89 | def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 90 | return [_make_match(commit)] |
| 91 | result = walk_history(repo, "nonexistent-branch", ev) |
| 92 | assert result == [] |
| 93 | |
| 94 | def test_single_commit_matches(self, repo: pathlib.Path) -> None: |
| 95 | _write(repo, "only", branch="main") |
| 96 | def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 97 | return [_make_match(commit)] |
| 98 | result = walk_history(repo, "main", ev) |
| 99 | assert len(result) == 1 |
| 100 | assert result[0]["commit_id"] == "only" |
| 101 | |
| 102 | def test_single_commit_no_match(self, repo: pathlib.Path) -> None: |
| 103 | _write(repo, "only", branch="main") |
| 104 | def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 105 | return [] |
| 106 | result = walk_history(repo, "main", ev) |
| 107 | assert result == [] |
| 108 | |
| 109 | def test_linear_chain_all_match(self, repo: pathlib.Path) -> None: |
| 110 | prev = None |
| 111 | for i in range(10): |
| 112 | cid = f"c{i:03d}" |
| 113 | _write(repo, cid, parent=prev) |
| 114 | prev = cid |
| 115 | def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 116 | return [_make_match(commit)] |
| 117 | result = walk_history(repo, "main", ev) |
| 118 | assert len(result) == 10 |
| 119 | |
| 120 | def test_linear_chain_filtered(self, repo: pathlib.Path) -> None: |
| 121 | prev = None |
| 122 | for i in range(10): |
| 123 | cid = f"c{i:03d}" |
| 124 | author = "alice" if i % 2 == 0 else "bob" |
| 125 | _write(repo, cid, parent=prev, author=author) |
| 126 | prev = cid |
| 127 | |
| 128 | def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 129 | if commit.author == "alice": |
| 130 | return [_make_match(commit)] |
| 131 | return [] |
| 132 | |
| 133 | result = walk_history(repo, "main", ev) |
| 134 | assert len(result) == 5 |
| 135 | |
| 136 | def test_max_commits_limits_walk(self, repo: pathlib.Path) -> None: |
| 137 | prev = None |
| 138 | for i in range(50): |
| 139 | cid = f"c{i:03d}" |
| 140 | _write(repo, cid, parent=prev) |
| 141 | prev = cid |
| 142 | def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 143 | return [_make_match(commit)] |
| 144 | result = walk_history(repo, "main", ev, max_commits=10) |
| 145 | assert len(result) == 10 |
| 146 | |
| 147 | def test_matches_include_commit_id_and_branch(self, repo: pathlib.Path) -> None: |
| 148 | _write(repo, "abc123", branch="main", author="alice") |
| 149 | def ev(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 150 | return [_make_match(commit)] |
| 151 | result = walk_history(repo, "main", ev) |
| 152 | assert result[0]["commit_id"] == "abc123" |
| 153 | assert result[0]["branch"] == "main" |
| 154 | assert result[0]["author"] == "alice" |
| 155 | |
| 156 | |
| 157 | # =========================================================================== |
| 158 | # walk_history — large history |
| 159 | # =========================================================================== |
| 160 | |
| 161 | |
| 162 | class TestWalkHistoryLarge: |
| 163 | def test_200_commit_chain_full_scan(self, repo: pathlib.Path) -> None: |
| 164 | prev = None |
| 165 | for i in range(200): |
| 166 | cid = f"large-{i:04d}" |
| 167 | _write(repo, cid, parent=prev, agent_id="bot" if i % 3 == 0 else "") |
| 168 | prev = cid |
| 169 | |
| 170 | def bot_only(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 171 | if commit.agent_id == "bot": |
| 172 | return [_make_match(commit)] |
| 173 | return [] |
| 174 | |
| 175 | result = walk_history(repo, "main", bot_only) |
| 176 | # 200 commits, every 3rd is bot: indices 0, 3, 6, ..., 198 → 67 commits. |
| 177 | assert len(result) == 67 |
| 178 | |
| 179 | def test_query_by_agent_across_100_commits(self, repo: pathlib.Path) -> None: |
| 180 | prev = None |
| 181 | for i in range(100): |
| 182 | cid = f"agent-test-{i:04d}" |
| 183 | agent = f"agent-{i % 5}" |
| 184 | _write(repo, cid, parent=prev, agent_id=agent) |
| 185 | prev = cid |
| 186 | |
| 187 | def agent_0_only(commit: CommitRecord, manifest: dict[str, str], root: pathlib.Path) -> list[QueryMatch]: |
| 188 | if commit.agent_id == "agent-0": |
| 189 | return [_make_match(commit)] |
| 190 | return [] |
| 191 | |
| 192 | result = walk_history(repo, "main", agent_0_only) |
| 193 | assert len(result) == 20 # 100 / 5 = 20 |
| 194 | |
| 195 | |
| 196 | # =========================================================================== |
| 197 | # format_matches |
| 198 | # =========================================================================== |
| 199 | |
| 200 | |
| 201 | class TestFormatMatches: |
| 202 | def test_empty_matches_produces_output(self) -> None: |
| 203 | out = format_matches([]) |
| 204 | assert isinstance(out, str) |
| 205 | |
| 206 | def test_single_match_includes_commit_id(self) -> None: |
| 207 | match = QueryMatch( |
| 208 | commit_id="abc12345", |
| 209 | branch="main", |
| 210 | author="alice", |
| 211 | committed_at=_now().isoformat(), |
| 212 | detail="test match", |
| 213 | ) |
| 214 | out = format_matches([match]) |
| 215 | assert "abc12345" in out |
| 216 | |
| 217 | def test_multiple_matches_all_present(self) -> None: |
| 218 | matches = [ |
| 219 | QueryMatch( |
| 220 | commit_id=f"id{i:04d}", |
| 221 | branch="main", |
| 222 | author="alice", |
| 223 | committed_at=_now().isoformat(), |
| 224 | detail="matched", |
| 225 | ) |
| 226 | for i in range(5) |
| 227 | ] |
| 228 | out = format_matches(matches) |
| 229 | for i in range(5): |
| 230 | assert f"id{i:04d}" in out |
| 231 | |
| 232 | |
| 233 | # =========================================================================== |
| 234 | # Code query DSL — build_evaluator |
| 235 | # =========================================================================== |
| 236 | |
| 237 | |
| 238 | class TestCodeQueryDSL: |
| 239 | # --- author field --- |
| 240 | |
| 241 | def test_author_equals(self, repo: pathlib.Path) -> None: |
| 242 | _write(repo, "a1", author="alice") |
| 243 | _write(repo, "a2", author="bob", parent="a1") |
| 244 | evaluator = build_evaluator("author == 'alice'") |
| 245 | result = walk_history(repo, "main", evaluator) |
| 246 | assert any(m["commit_id"] == "a1" for m in result) |
| 247 | assert not any(m["commit_id"] == "a2" for m in result) |
| 248 | |
| 249 | def test_author_not_equals(self, repo: pathlib.Path) -> None: |
| 250 | _write(repo, "b1", author="alice") |
| 251 | _write(repo, "b2", author="bob", parent="b1") |
| 252 | evaluator = build_evaluator("author != 'alice'") |
| 253 | result = walk_history(repo, "main", evaluator) |
| 254 | assert all(m["author"] != "alice" for m in result) |
| 255 | |
| 256 | def test_author_contains(self, repo: pathlib.Path) -> None: |
| 257 | _write(repo, "c1", author="alice-smith") |
| 258 | _write(repo, "c2", author="bob-jones", parent="c1") |
| 259 | evaluator = build_evaluator("author contains 'alice'") |
| 260 | result = walk_history(repo, "main", evaluator) |
| 261 | assert len(result) == 1 |
| 262 | assert "alice" in result[0]["author"] |
| 263 | |
| 264 | def test_author_startswith(self, repo: pathlib.Path) -> None: |
| 265 | _write(repo, "d1", author="agent-claude") |
| 266 | _write(repo, "d2", author="human-alice", parent="d1") |
| 267 | evaluator = build_evaluator("author startswith 'agent'") |
| 268 | result = walk_history(repo, "main", evaluator) |
| 269 | assert len(result) == 1 |
| 270 | assert result[0]["author"].startswith("agent") |
| 271 | |
| 272 | # --- agent_id field --- |
| 273 | |
| 274 | def test_agent_id_equals(self, repo: pathlib.Path) -> None: |
| 275 | _write(repo, "e1", agent_id="claude-v4") |
| 276 | _write(repo, "e2", agent_id="gpt-4o", parent="e1") |
| 277 | evaluator = build_evaluator("agent_id == 'claude-v4'") |
| 278 | result = walk_history(repo, "main", evaluator) |
| 279 | assert len(result) == 1 |
| 280 | assert result[0]["commit_id"] == "e1" |
| 281 | |
| 282 | # --- sem_ver_bump field --- |
| 283 | |
| 284 | def test_sem_ver_bump_major(self, repo: pathlib.Path) -> None: |
| 285 | _write(repo, "f1", sem_ver_bump="major") |
| 286 | _write(repo, "f2", sem_ver_bump="minor", parent="f1") |
| 287 | _write(repo, "f3", sem_ver_bump="patch", parent="f2") |
| 288 | evaluator = build_evaluator("sem_ver_bump == 'major'") |
| 289 | result = walk_history(repo, "main", evaluator) |
| 290 | assert len(result) == 1 |
| 291 | |
| 292 | # --- model_id field --- |
| 293 | |
| 294 | def test_model_id_contains(self, repo: pathlib.Path) -> None: |
| 295 | _write(repo, "g1", model_id="claude-3-5-sonnet-20241022") |
| 296 | _write(repo, "g2", model_id="gpt-4o-2024-08-06", parent="g1") |
| 297 | evaluator = build_evaluator("model_id contains 'claude'") |
| 298 | result = walk_history(repo, "main", evaluator) |
| 299 | assert len(result) == 1 |
| 300 | |
| 301 | # --- AND composition --- |
| 302 | |
| 303 | def test_and_composition(self, repo: pathlib.Path) -> None: |
| 304 | _write(repo, "h1", author="alice", agent_id="bot-1") |
| 305 | _write(repo, "h2", author="alice", agent_id="bot-2", parent="h1") |
| 306 | _write(repo, "h3", author="bob", agent_id="bot-1", parent="h2") |
| 307 | evaluator = build_evaluator("author == 'alice' and agent_id == 'bot-1'") |
| 308 | result = walk_history(repo, "main", evaluator) |
| 309 | assert len(result) == 1 |
| 310 | assert result[0]["commit_id"] == "h1" |
| 311 | |
| 312 | # --- OR composition --- |
| 313 | |
| 314 | def test_or_composition(self, repo: pathlib.Path) -> None: |
| 315 | _write(repo, "i1", author="alice") |
| 316 | _write(repo, "i2", author="bob", parent="i1") |
| 317 | _write(repo, "i3", author="charlie", parent="i2") |
| 318 | evaluator = build_evaluator("author == 'alice' or author == 'bob'") |
| 319 | result = walk_history(repo, "main", evaluator) |
| 320 | assert len(result) == 2 |
| 321 | |
| 322 | # --- complex nested AND OR --- |
| 323 | |
| 324 | def test_complex_and_or(self, repo: pathlib.Path) -> None: |
| 325 | _write(repo, "j1", author="alice", sem_ver_bump="major") |
| 326 | _write(repo, "j2", author="bob", sem_ver_bump="minor", parent="j1") |
| 327 | _write(repo, "j3", author="alice", sem_ver_bump="patch", parent="j2") |
| 328 | evaluator = build_evaluator( |
| 329 | "sem_ver_bump == 'major' or sem_ver_bump == 'minor'" |
| 330 | ) |
| 331 | result = walk_history(repo, "main", evaluator) |
| 332 | assert len(result) == 2 |
| 333 | |
| 334 | # --- error cases --- |
| 335 | |
| 336 | def test_unknown_field_raises_value_error(self) -> None: |
| 337 | with pytest.raises(ValueError): |
| 338 | build_evaluator("unknown_field == 'something'") |
| 339 | |
| 340 | def test_unknown_operator_raises_value_error(self) -> None: |
| 341 | with pytest.raises(ValueError): |
| 342 | build_evaluator("author REGEX 'alice'") |
| 343 | |
| 344 | def test_empty_query_raises(self) -> None: |
| 345 | with pytest.raises((ValueError, IndexError)): |
| 346 | build_evaluator("") |
| 347 | |
| 348 | # --- branch field --- |
| 349 | |
| 350 | def test_branch_field_matches_correctly(self, repo: pathlib.Path) -> None: |
| 351 | _write(repo, "k1", branch="main", author="alice") |
| 352 | evaluator = build_evaluator("branch == 'main'") |
| 353 | result = walk_history(repo, "main", evaluator) |
| 354 | assert all(m["branch"] == "main" for m in result) |