gabriel / musehub public
test_musehub_search.py python
892 lines 29.6 KB
c0f0b481 release: merge dev → main (#5) Gabriel Cardona <cgcardona@gmail.com> 5d ago
1 """Tests for MuseHub search endpoints.
2
3 Covers cross-repo global search:
4 - test_global_search_page_renders — GET /search returns 200 HTML
5 - test_global_search_results_grouped — JSON results are grouped by repo
6 - test_global_search_public_only — private repos are excluded
7 - test_global_search_json — JSON content-type returned
8 - test_global_search_empty_query_handled — graceful response for empty result set
9 - test_global_search_requires_auth — 401 without JWT
10 - test_global_search_keyword_mode — keyword mode matches across message terms
11 - test_global_search_pattern_mode — pattern mode uses SQL LIKE
12 - test_global_search_pagination — page/page_size params respected
13
14 Covers in-repo search:
15 - test_search_page_renders — GET /{repo_id}/search → 200 HTML
16 - test_search_keyword_mode — keyword search returns matching commits
17 - test_search_keyword_empty_query — empty keyword query returns empty matches
18 - test_search_musical_property — musical property filter works
19 - test_search_natural_language — ask mode returns matching commits
20 - test_search_pattern_message — pattern matches commit message
21 - test_search_pattern_branch — pattern matches branch name
22 - test_search_json_response — JSON search endpoint returns SearchResponse shape
23 - test_search_date_range_since — since filter excludes old commits
24 - test_search_date_range_until — until filter excludes future commits
25 - test_search_invalid_mode — invalid mode returns 422
26 - test_search_unknown_repo — unknown repo_id returns 404
27 - test_search_requires_auth — unauthenticated request returns 401
28 - test_search_limit_respected — limit caps result count
29
30 All tests use the shared ``client`` and ``auth_headers`` fixtures from conftest.py.
31 """
32 from __future__ import annotations
33
34 import uuid
35 from datetime import datetime, timezone
36
37 import pytest
38 from httpx import AsyncClient
39 from sqlalchemy.ext.asyncio import AsyncSession
40
41 from musehub.db.musehub_models import MusehubCommit, MusehubObject, MusehubRepo
42 from musehub.muse_cli.models import MuseCliCommit, MuseCliSnapshot
43
44
45 # ---------------------------------------------------------------------------
46 # Helpers — global search (uses MusehubCommit / MusehubRepo directly)
47 # ---------------------------------------------------------------------------
48
49
50 async def _make_repo(
51 db_session: AsyncSession,
52 *,
53 name: str = "test-repo",
54 visibility: str = "public",
55 owner: str = "test-owner",
56 ) -> str:
57 """Seed a MuseHub repo and return its repo_id."""
58 import re as _re
59 slug = _re.sub(r"[^a-z0-9]+", "-", name.lower()).strip("-")[:64].strip("-") or "repo"
60 repo = MusehubRepo(name=name, owner="testuser", slug=slug, visibility=visibility, owner_user_id=owner)
61 db_session.add(repo)
62 await db_session.commit()
63 await db_session.refresh(repo)
64 return str(repo.repo_id)
65
66
67 async def _make_commit(
68 db_session: AsyncSession,
69 repo_id: str,
70 *,
71 commit_id: str,
72 message: str,
73 author: str = "alice",
74 branch: str = "main",
75 ) -> None:
76 """Seed a MusehubCommit for global search tests."""
77 commit = MusehubCommit(
78 commit_id=commit_id,
79 repo_id=repo_id,
80 branch=branch,
81 parent_ids=[],
82 message=message,
83 author=author,
84 timestamp=datetime.now(tz=timezone.utc),
85 )
86 db_session.add(commit)
87 await db_session.commit()
88
89
90 # ---------------------------------------------------------------------------
91 # Helpers — in-repo search (uses MuseCliCommit / MuseCliSnapshot)
92 # ---------------------------------------------------------------------------
93
94
95 async def _make_search_repo(db: AsyncSession) -> str:
96 """Seed a minimal MuseHub repo for in-repo search tests; return repo_id."""
97 repo = MusehubRepo(
98 name="search-test-repo",
99 owner="testuser",
100 slug="search-test-repo",
101 visibility="private",
102 owner_user_id="test-owner",
103 )
104 db.add(repo)
105 await db.commit()
106 await db.refresh(repo)
107 return str(repo.repo_id)
108
109
110 async def _make_snapshot(db: AsyncSession, snapshot_id: str) -> None:
111 """Seed a minimal snapshot so FK constraint on MuseCliCommit is satisfied."""
112 snap = MuseCliSnapshot(snapshot_id=snapshot_id, manifest={})
113 db.add(snap)
114 await db.flush()
115
116
117 async def _make_search_commit(
118 db: AsyncSession,
119 *,
120 repo_id: str,
121 message: str,
122 branch: str = "main",
123 author: str = "test-author",
124 committed_at: datetime | None = None,
125 ) -> MuseCliCommit:
126 """Seed a MuseCliCommit for in-repo search tests."""
127 snap_id = "snap-" + str(uuid.uuid4()).replace("-", "")[:16]
128 await _make_snapshot(db, snap_id)
129 commit = MuseCliCommit(
130 commit_id=str(uuid.uuid4()).replace("-", ""),
131 repo_id=repo_id,
132 branch=branch,
133 snapshot_id=snap_id,
134 message=message,
135 author=author,
136 committed_at=committed_at or datetime.now(timezone.utc),
137 )
138 db.add(commit)
139 await db.flush()
140 return commit
141
142
143 # ---------------------------------------------------------------------------
144 # Global search — UI page
145 # ---------------------------------------------------------------------------
146
147
148 @pytest.mark.anyio
149 async def test_global_search_page_renders(
150 client: AsyncClient,
151 db_session: AsyncSession,
152 ) -> None:
153 """GET /search returns 200 HTML with a search form (no auth required)."""
154 response = await client.get("/search")
155 assert response.status_code == 200
156 assert "text/html" in response.headers["content-type"]
157 body = response.text
158 assert "Global Search" in body
159 assert "MuseHub" in body
160 assert 'name="q"' in body
161 assert 'name="mode"' in body
162
163
164 @pytest.mark.anyio
165 async def test_global_search_page_pre_fills_query(
166 client: AsyncClient,
167 db_session: AsyncSession,
168 ) -> None:
169 """GET /search?q=jazz pre-fills the search form with 'jazz'."""
170 response = await client.get("/search?q=jazz&mode=keyword")
171 assert response.status_code == 200
172 body = response.text
173 assert "jazz" in body
174
175
176 # ---------------------------------------------------------------------------
177 # Global search — JSON API
178 # ---------------------------------------------------------------------------
179
180
181 @pytest.mark.anyio
182 async def test_global_search_accessible_without_auth(
183 client: AsyncClient,
184 db_session: AsyncSession,
185 ) -> None:
186 """GET /api/v1/search returns 200 without a JWT.
187
188 Global search is a public endpoint — uses optional_token, so unauthenticated
189 requests are allowed and return results for public repos.
190 """
191 response = await client.get("/api/v1/search?q=jazz")
192 assert response.status_code == 200
193
194
195 @pytest.mark.anyio
196 async def test_global_search_json(
197 client: AsyncClient,
198 db_session: AsyncSession,
199 auth_headers: dict[str, str],
200 ) -> None:
201 """GET /api/v1/search returns JSON with correct content-type."""
202 response = await client.get(
203 "/api/v1/search?q=jazz",
204 headers=auth_headers,
205 )
206 assert response.status_code == 200
207 assert "application/json" in response.headers["content-type"]
208 data = response.json()
209 assert "groups" in data
210 assert "query" in data
211 assert data["query"] == "jazz"
212
213
214 @pytest.mark.anyio
215 async def test_global_search_public_only(
216 client: AsyncClient,
217 db_session: AsyncSession,
218 auth_headers: dict[str, str],
219 ) -> None:
220 """Private repos must not appear in global search results."""
221 public_id = await _make_repo(db_session, name="public-beats", visibility="public")
222 private_id = await _make_repo(db_session, name="secret-beats", visibility="private")
223
224 await _make_commit(
225 db_session, public_id, commit_id="pub001abc", message="jazz groove session"
226 )
227 await _make_commit(
228 db_session, private_id, commit_id="priv001abc", message="jazz private session"
229 )
230
231 response = await client.get(
232 "/api/v1/search?q=jazz",
233 headers=auth_headers,
234 )
235 assert response.status_code == 200
236 data = response.json()
237 repo_ids_in_results = {g["repoId"] for g in data["groups"]}
238 assert public_id in repo_ids_in_results
239 assert private_id not in repo_ids_in_results
240
241
242 @pytest.mark.anyio
243 async def test_global_search_results_grouped(
244 client: AsyncClient,
245 db_session: AsyncSession,
246 auth_headers: dict[str, str],
247 ) -> None:
248 """Results are grouped by repo — each group has repoId, repoName, matches list."""
249 repo_a = await _make_repo(db_session, name="repo-alpha", visibility="public")
250 repo_b = await _make_repo(db_session, name="repo-beta", visibility="public")
251
252 await _make_commit(
253 db_session, repo_a, commit_id="a001abc123", message="bossa nova rhythm"
254 )
255 await _make_commit(
256 db_session, repo_a, commit_id="a002abc123", message="bossa nova variation"
257 )
258 await _make_commit(
259 db_session, repo_b, commit_id="b001abc123", message="bossa nova groove"
260 )
261
262 response = await client.get(
263 "/api/v1/search?q=bossa+nova",
264 headers=auth_headers,
265 )
266 assert response.status_code == 200
267 data = response.json()
268 groups = data["groups"]
269
270 group_repo_ids = {g["repoId"] for g in groups}
271 assert repo_a in group_repo_ids
272 assert repo_b in group_repo_ids
273
274 for group in groups:
275 assert "repoId" in group
276 assert "repoName" in group
277 assert "repoOwner" in group
278 assert "repoSlug" in group # PR #282: slug required for UI link construction
279 assert "repoVisibility" in group
280 assert "matches" in group
281 assert "totalMatches" in group
282 assert isinstance(group["matches"], list)
283 assert isinstance(group["repoSlug"], str)
284 assert group["repoSlug"] != ""
285
286 group_a = next(g for g in groups if g["repoId"] == repo_a)
287 assert group_a["totalMatches"] == 2
288 assert len(group_a["matches"]) == 2
289
290
291 @pytest.mark.anyio
292 async def test_global_search_empty_query_handled(
293 client: AsyncClient,
294 db_session: AsyncSession,
295 auth_headers: dict[str, str],
296 ) -> None:
297 """A query that matches nothing returns empty groups and valid pagination metadata."""
298 await _make_repo(db_session, name="silent-repo", visibility="public")
299
300 response = await client.get(
301 "/api/v1/search?q=zyxqwvutsr_no_match",
302 headers=auth_headers,
303 )
304 assert response.status_code == 200
305 data = response.json()
306 assert data["groups"] == []
307 assert data["page"] == 1
308 assert "totalReposSearched" in data
309
310
311 @pytest.mark.anyio
312 async def test_global_search_keyword_mode(
313 client: AsyncClient,
314 db_session: AsyncSession,
315 auth_headers: dict[str, str],
316 ) -> None:
317 """Keyword mode matches any term in the query (OR logic, case-insensitive)."""
318 repo_id = await _make_repo(db_session, name="jazz-lab", visibility="public")
319 await _make_commit(
320 db_session, repo_id, commit_id="kw001abcde", message="Blues Shuffle in E"
321 )
322 await _make_commit(
323 db_session, repo_id, commit_id="kw002abcde", message="Jazz Waltz Trio"
324 )
325
326 response = await client.get(
327 "/api/v1/search?q=blues&mode=keyword",
328 headers=auth_headers,
329 )
330 assert response.status_code == 200
331 data = response.json()
332 group = next((g for g in data["groups"] if g["repoId"] == repo_id), None)
333 assert group is not None
334 messages = [m["message"] for m in group["matches"]]
335 assert any("Blues" in msg for msg in messages)
336
337
338 @pytest.mark.anyio
339 async def test_global_search_pattern_mode(
340 client: AsyncClient,
341 db_session: AsyncSession,
342 auth_headers: dict[str, str],
343 ) -> None:
344 """Pattern mode applies a raw SQL LIKE pattern to commit messages."""
345 repo_id = await _make_repo(db_session, name="pattern-lab", visibility="public")
346 await _make_commit(
347 db_session, repo_id, commit_id="pt001abcde", message="minor pentatonic run"
348 )
349 await _make_commit(
350 db_session, repo_id, commit_id="pt002abcde", message="major scale exercise"
351 )
352
353 response = await client.get(
354 "/api/v1/search?q=%25minor%25&mode=pattern",
355 headers=auth_headers,
356 )
357 assert response.status_code == 200
358 data = response.json()
359 group = next((g for g in data["groups"] if g["repoId"] == repo_id), None)
360 assert group is not None
361 assert group["totalMatches"] == 1
362 assert "minor" in group["matches"][0]["message"]
363
364
365 @pytest.mark.anyio
366 async def test_global_search_pagination(
367 client: AsyncClient,
368 db_session: AsyncSession,
369 auth_headers: dict[str, str],
370 ) -> None:
371 """page and page_size parameters control repo-group pagination."""
372 ids = []
373 for i in range(3):
374 rid = await _make_repo(
375 db_session, name=f"paged-repo-{i}", visibility="public", owner=f"owner-{i}"
376 )
377 ids.append(rid)
378 await _make_commit(
379 db_session, rid, commit_id=f"pg{i:03d}abcde", message="paginate funk groove"
380 )
381
382 response = await client.get(
383 "/api/v1/search?q=paginate&page=1&page_size=2",
384 headers=auth_headers,
385 )
386 assert response.status_code == 200
387 data = response.json()
388 assert len(data["groups"]) <= 2
389 assert data["page"] == 1
390 assert data["pageSize"] == 2
391
392 response2 = await client.get(
393 "/api/v1/search?q=paginate&page=2&page_size=2",
394 headers=auth_headers,
395 )
396 assert response2.status_code == 200
397 data2 = response2.json()
398 assert data2["page"] == 2
399
400
401 @pytest.mark.anyio
402 async def test_global_search_match_contains_required_fields(
403 client: AsyncClient,
404 db_session: AsyncSession,
405 auth_headers: dict[str, str],
406 ) -> None:
407 """Each match entry contains commitId, message, author, branch, timestamp, repoId."""
408 repo_id = await _make_repo(db_session, name="fields-check", visibility="public")
409 await _make_commit(
410 db_session,
411 repo_id,
412 commit_id="fc001abcde",
413 message="swing feel experiment",
414 author="charlie",
415 branch="main",
416 )
417
418 response = await client.get(
419 "/api/v1/search?q=swing",
420 headers=auth_headers,
421 )
422 assert response.status_code == 200
423 data = response.json()
424 group = next((g for g in data["groups"] if g["repoId"] == repo_id), None)
425 assert group is not None
426 match = group["matches"][0]
427 assert match["commitId"] == "fc001abcde"
428 assert match["message"] == "swing feel experiment"
429 assert match["author"] == "charlie"
430 assert match["branch"] == "main"
431 assert "timestamp" in match
432 assert match["repoId"] == repo_id
433
434
435 # ---------------------------------------------------------------------------
436 # Global search — audio preview batching
437 # ---------------------------------------------------------------------------
438
439
440 @pytest.mark.anyio
441 async def test_global_search_audio_preview_populated_for_multiple_repos(
442 client: AsyncClient,
443 db_session: AsyncSession,
444 auth_headers: dict[str, str],
445 ) -> None:
446 """Audio preview object IDs are resolved via a single batched query for all repos.
447
448 Verifies that when N repos all have audio files, each GlobalSearchRepoGroup
449 contains the correct audioObjectId — confirming the batched path works
450 end-to-end and produces the same result as the old N+1 per-repo loop.
451
452 Regression test for the N+1 bug fixed.
453 """
454 repo_a = await _make_repo(db_session, name="audio-repo-alpha", visibility="public")
455 repo_b = await _make_repo(db_session, name="audio-repo-beta", visibility="public")
456
457 await _make_commit(
458 db_session, repo_a, commit_id="ap001abcde", message="funky groove jam"
459 )
460 await _make_commit(
461 db_session, repo_b, commit_id="ap002abcde", message="funky bass session"
462 )
463
464 obj_a = MusehubObject(
465 object_id="sha256:audio-preview-alpha",
466 repo_id=repo_a,
467 path="preview.mp3",
468 size_bytes=1024,
469 disk_path="/tmp/preview-alpha.mp3",
470 )
471 obj_b = MusehubObject(
472 object_id="sha256:audio-preview-beta",
473 repo_id=repo_b,
474 path="preview.ogg",
475 size_bytes=2048,
476 disk_path="/tmp/preview-beta.ogg",
477 )
478 db_session.add(obj_a)
479 db_session.add(obj_b)
480 await db_session.commit()
481
482 response = await client.get(
483 "/api/v1/search?q=funky",
484 headers=auth_headers,
485 )
486 assert response.status_code == 200
487 data = response.json()
488
489 groups_by_id = {g["repoId"]: g for g in data["groups"]}
490 assert repo_a in groups_by_id
491 assert repo_b in groups_by_id
492
493 assert groups_by_id[repo_a]["matches"][0]["audioObjectId"] == "sha256:audio-preview-alpha"
494 assert groups_by_id[repo_b]["matches"][0]["audioObjectId"] == "sha256:audio-preview-beta"
495
496
497 @pytest.mark.anyio
498 async def test_global_search_audio_preview_absent_when_no_audio_objects(
499 client: AsyncClient,
500 db_session: AsyncSession,
501 auth_headers: dict[str, str],
502 ) -> None:
503 """Repos without audio objects return null audioObjectId in search results."""
504 repo_id = await _make_repo(db_session, name="no-audio-repo", visibility="public")
505 await _make_commit(
506 db_session, repo_id, commit_id="na001abcde", message="silent ambient piece"
507 )
508
509 response = await client.get(
510 "/api/v1/search?q=silent",
511 headers=auth_headers,
512 )
513 assert response.status_code == 200
514 data = response.json()
515 group = next((g for g in data["groups"] if g["repoId"] == repo_id), None)
516 assert group is not None
517 assert group["matches"][0]["audioObjectId"] is None
518
519
520 # ---------------------------------------------------------------------------
521 # In-repo search — UI page
522 # ---------------------------------------------------------------------------
523
524
525 @pytest.mark.anyio
526 async def test_search_page_renders(
527 client: AsyncClient,
528 db_session: AsyncSession,
529 ) -> None:
530 """GET /{repo_id}/search returns 200 HTML with mode dropdown."""
531 repo_id = await _make_search_repo(db_session)
532 response = await client.get("/testuser/search-test-repo/search")
533 assert response.status_code == 200
534 assert "text/html" in response.headers["content-type"]
535 body = response.text
536 assert "MuseHub" in body
537 assert "Search Commits" in body
538 assert 'name="q"' in body
539 assert 'name="mode"' in body
540 assert "keyword" in body
541
542
543 @pytest.mark.anyio
544 async def test_search_page_no_auth_required(
545 client: AsyncClient,
546 db_session: AsyncSession,
547 ) -> None:
548 """Search UI page is accessible without a JWT (HTML shell, JS handles auth)."""
549 repo_id = await _make_search_repo(db_session)
550 response = await client.get("/testuser/search-test-repo/search")
551 assert response.status_code == 200
552
553
554 # ---------------------------------------------------------------------------
555 # In-repo search — authentication
556 # ---------------------------------------------------------------------------
557
558
559 @pytest.mark.anyio
560 async def test_search_requires_auth(
561 client: AsyncClient,
562 db_session: AsyncSession,
563 ) -> None:
564 """GET /api/v1/repos/{repo_id}/search returns 401 without a token."""
565 repo_id = await _make_search_repo(db_session)
566 response = await client.get(f"/api/v1/repos/{repo_id}/search?mode=keyword&q=jazz")
567 assert response.status_code == 401
568
569
570 @pytest.mark.anyio
571 async def test_search_unknown_repo(
572 client: AsyncClient,
573 db_session: AsyncSession,
574 auth_headers: dict[str, str],
575 ) -> None:
576 """GET /api/v1/repos/{unknown}/search returns 404."""
577 response = await client.get(
578 "/api/v1/repos/does-not-exist/search?mode=keyword&q=test",
579 headers=auth_headers,
580 )
581 assert response.status_code == 404
582
583
584 @pytest.mark.anyio
585 async def test_search_invalid_mode(
586 client: AsyncClient,
587 db_session: AsyncSession,
588 auth_headers: dict[str, str],
589 ) -> None:
590 """GET search with an unknown mode returns 422."""
591 repo_id = await _make_search_repo(db_session)
592 response = await client.get(
593 f"/api/v1/repos/{repo_id}/search?mode=badmode&q=x",
594 headers=auth_headers,
595 )
596 assert response.status_code == 422
597
598
599 # ---------------------------------------------------------------------------
600 # In-repo search — keyword mode
601 # ---------------------------------------------------------------------------
602
603
604 @pytest.mark.anyio
605 async def test_search_keyword_mode(
606 client: AsyncClient,
607 db_session: AsyncSession,
608 auth_headers: dict[str, str],
609 ) -> None:
610 """Keyword search returns commits whose messages overlap with the query."""
611 repo_id = await _make_search_repo(db_session)
612 await db_session.commit()
613
614 await _make_search_commit(db_session, repo_id=repo_id, message="dark jazz bassline in Dm")
615 await _make_search_commit(db_session, repo_id=repo_id, message="classical piano intro section")
616 await _make_search_commit(db_session, repo_id=repo_id, message="hip hop drum fill pattern")
617 await db_session.commit()
618
619 response = await client.get(
620 f"/api/v1/repos/{repo_id}/search?mode=keyword&q=jazz+bassline",
621 headers=auth_headers,
622 )
623 assert response.status_code == 200
624 data = response.json()
625 assert data["mode"] == "keyword"
626 assert data["query"] == "jazz bassline"
627 assert any("jazz" in m["message"].lower() for m in data["matches"])
628
629
630 @pytest.mark.anyio
631 async def test_search_keyword_empty_query(
632 client: AsyncClient,
633 db_session: AsyncSession,
634 auth_headers: dict[str, str],
635 ) -> None:
636 """Empty keyword query returns empty matches (no tokens → no overlap)."""
637 repo_id = await _make_search_repo(db_session)
638 await db_session.commit()
639 await _make_search_commit(db_session, repo_id=repo_id, message="some commit")
640 await db_session.commit()
641
642 response = await client.get(
643 f"/api/v1/repos/{repo_id}/search?mode=keyword&q=",
644 headers=auth_headers,
645 )
646 assert response.status_code == 200
647 data = response.json()
648 assert data["mode"] == "keyword"
649 assert data["matches"] == []
650
651
652 @pytest.mark.anyio
653 async def test_search_json_response(
654 client: AsyncClient,
655 db_session: AsyncSession,
656 auth_headers: dict[str, str],
657 ) -> None:
658 """Search response has the expected SearchResponse JSON shape."""
659 repo_id = await _make_search_repo(db_session)
660 await db_session.commit()
661 await _make_search_commit(db_session, repo_id=repo_id, message="piano chord progression F Bb Eb")
662 await db_session.commit()
663
664 response = await client.get(
665 f"/api/v1/repos/{repo_id}/search?mode=keyword&q=piano",
666 headers=auth_headers,
667 )
668 assert response.status_code == 200
669 data = response.json()
670
671 assert "mode" in data
672 assert "query" in data
673 assert "matches" in data
674 assert "totalScanned" in data
675 assert "limit" in data
676
677 if data["matches"]:
678 m = data["matches"][0]
679 assert "commitId" in m
680 assert "branch" in m
681 assert "message" in m
682 assert "author" in m
683 assert "timestamp" in m
684 assert "score" in m
685 assert "matchSource" in m
686
687
688 # ---------------------------------------------------------------------------
689 # In-repo search — musical property mode
690 # ---------------------------------------------------------------------------
691
692
693 @pytest.mark.anyio
694 async def test_search_musical_property(
695 client: AsyncClient,
696 db_session: AsyncSession,
697 auth_headers: dict[str, str],
698 ) -> None:
699 """Property mode returns a valid response (muse-extraction may be unavailable in test)."""
700 repo_id = await _make_search_repo(db_session)
701 await db_session.commit()
702
703 await _make_search_commit(db_session, repo_id=repo_id, message="add harmony=Eb bridge section")
704 await _make_search_commit(db_session, repo_id=repo_id, message="drum groove tweak no harmony")
705 await db_session.commit()
706
707 response = await client.get(
708 f"/api/v1/repos/{repo_id}/search?mode=property&harmony=Eb",
709 headers=auth_headers,
710 )
711 assert response.status_code == 200
712 data = response.json()
713 assert data["mode"] == "property"
714 assert "matches" in data
715 assert isinstance(data["matches"], list)
716
717
718 # ---------------------------------------------------------------------------
719 # In-repo search — natural language (ask) mode
720 # ---------------------------------------------------------------------------
721
722
723 @pytest.mark.anyio
724 async def test_search_natural_language(
725 client: AsyncClient,
726 db_session: AsyncSession,
727 auth_headers: dict[str, str],
728 ) -> None:
729 """Ask mode extracts keywords and returns relevant commits."""
730 repo_id = await _make_search_repo(db_session)
731 await db_session.commit()
732
733 await _make_search_commit(db_session, repo_id=repo_id, message="switched tempo to 140bpm for drop")
734 await _make_search_commit(db_session, repo_id=repo_id, message="piano melody in minor key")
735 await db_session.commit()
736
737 response = await client.get(
738 f"/api/v1/repos/{repo_id}/search?mode=ask&q=what+tempo+changes+did+I+make",
739 headers=auth_headers,
740 )
741 assert response.status_code == 200
742 data = response.json()
743 assert data["mode"] == "ask"
744 assert any("tempo" in m["message"].lower() for m in data["matches"])
745
746
747 # ---------------------------------------------------------------------------
748 # In-repo search — pattern mode
749 # ---------------------------------------------------------------------------
750
751
752 @pytest.mark.anyio
753 async def test_search_pattern_message(
754 client: AsyncClient,
755 db_session: AsyncSession,
756 auth_headers: dict[str, str],
757 ) -> None:
758 """Pattern mode matches substring in commit message."""
759 repo_id = await _make_search_repo(db_session)
760 await db_session.commit()
761
762 await _make_search_commit(db_session, repo_id=repo_id, message="add Cm7 chord voicing in bridge")
763 await _make_search_commit(db_session, repo_id=repo_id, message="fix timing on verse drums")
764 await db_session.commit()
765
766 response = await client.get(
767 f"/api/v1/repos/{repo_id}/search?mode=pattern&q=Cm7",
768 headers=auth_headers,
769 )
770 assert response.status_code == 200
771 data = response.json()
772 assert data["mode"] == "pattern"
773 assert len(data["matches"]) == 1
774 assert "Cm7" in data["matches"][0]["message"]
775 assert data["matches"][0]["matchSource"] == "message"
776
777
778 @pytest.mark.anyio
779 async def test_search_pattern_branch(
780 client: AsyncClient,
781 db_session: AsyncSession,
782 auth_headers: dict[str, str],
783 ) -> None:
784 """Pattern mode matches substring in branch name when message doesn't match."""
785 repo_id = await _make_search_repo(db_session)
786 await db_session.commit()
787
788 await _make_search_commit(
789 db_session,
790 repo_id=repo_id,
791 message="rough cut",
792 branch="feature/hip-hop-session",
793 )
794 await db_session.commit()
795
796 response = await client.get(
797 f"/api/v1/repos/{repo_id}/search?mode=pattern&q=hip-hop",
798 headers=auth_headers,
799 )
800 assert response.status_code == 200
801 data = response.json()
802 assert data["mode"] == "pattern"
803 assert len(data["matches"]) == 1
804 assert data["matches"][0]["matchSource"] == "branch"
805
806
807 # ---------------------------------------------------------------------------
808 # In-repo search — date range filters
809 # ---------------------------------------------------------------------------
810
811
812 @pytest.mark.anyio
813 async def test_search_date_range_since(
814 client: AsyncClient,
815 db_session: AsyncSession,
816 auth_headers: dict[str, str],
817 ) -> None:
818 """since filter excludes commits committed before the given datetime."""
819 repo_id = await _make_search_repo(db_session)
820 await db_session.commit()
821
822 old_ts = datetime(2024, 1, 1, tzinfo=timezone.utc)
823 new_ts = datetime(2026, 1, 1, tzinfo=timezone.utc)
824
825 await _make_search_commit(db_session, repo_id=repo_id, message="old jazz commit", committed_at=old_ts)
826 await _make_search_commit(db_session, repo_id=repo_id, message="new jazz commit", committed_at=new_ts)
827 await db_session.commit()
828
829 response = await client.get(
830 f"/api/v1/repos/{repo_id}/search?mode=keyword&q=jazz&since=2025-06-01T00:00:00Z",
831 headers=auth_headers,
832 )
833 assert response.status_code == 200
834 data = response.json()
835 assert all(m["message"] != "old jazz commit" for m in data["matches"])
836 assert any(m["message"] == "new jazz commit" for m in data["matches"])
837
838
839 @pytest.mark.anyio
840 async def test_search_date_range_until(
841 client: AsyncClient,
842 db_session: AsyncSession,
843 auth_headers: dict[str, str],
844 ) -> None:
845 """until filter excludes commits committed after the given datetime."""
846 repo_id = await _make_search_repo(db_session)
847 await db_session.commit()
848
849 old_ts = datetime(2024, 1, 1, tzinfo=timezone.utc)
850 new_ts = datetime(2026, 1, 1, tzinfo=timezone.utc)
851
852 await _make_search_commit(db_session, repo_id=repo_id, message="old piano commit", committed_at=old_ts)
853 await _make_search_commit(db_session, repo_id=repo_id, message="new piano commit", committed_at=new_ts)
854 await db_session.commit()
855
856 response = await client.get(
857 f"/api/v1/repos/{repo_id}/search?mode=keyword&q=piano&until=2025-06-01T00:00:00Z",
858 headers=auth_headers,
859 )
860 assert response.status_code == 200
861 data = response.json()
862 assert any(m["message"] == "old piano commit" for m in data["matches"])
863 assert all(m["message"] != "new piano commit" for m in data["matches"])
864
865
866 # ---------------------------------------------------------------------------
867 # In-repo search — limit
868 # ---------------------------------------------------------------------------
869
870
871 @pytest.mark.anyio
872 async def test_search_limit_respected(
873 client: AsyncClient,
874 db_session: AsyncSession,
875 auth_headers: dict[str, str],
876 ) -> None:
877 """The limit parameter caps the number of results returned."""
878 repo_id = await _make_search_repo(db_session)
879 await db_session.commit()
880
881 for i in range(10):
882 await _make_search_commit(db_session, repo_id=repo_id, message=f"bass groove iteration {i}")
883 await db_session.commit()
884
885 response = await client.get(
886 f"/api/v1/repos/{repo_id}/search?mode=keyword&q=bass&limit=3",
887 headers=auth_headers,
888 )
889 assert response.status_code == 200
890 data = response.json()
891 assert len(data["matches"]) <= 3
892 assert data["limit"] == 3