gabriel / musehub public
test_musehub_search.py python
895 lines 30.0 KB
e893a97c Remove LLM, Storpheus, HuggingFace, and Qdrant from codebase Gabriel Cardona <gabriel@tellurstori.com> 6d ago
1 """Tests for MuseHub search endpoints.
2
3 Covers cross-repo global search:
4 - test_global_search_page_renders — GET /musehub/ui/search returns 200 HTML
5 - test_global_search_results_grouped — JSON results are grouped by repo
6 - test_global_search_public_only — private repos are excluded
7 - test_global_search_json — JSON content-type returned
8 - test_global_search_empty_query_handled — graceful response for empty result set
9 - test_global_search_requires_auth — 401 without JWT
10 - test_global_search_keyword_mode — keyword mode matches across message terms
11 - test_global_search_pattern_mode — pattern mode uses SQL LIKE
12 - test_global_search_pagination — page/page_size params respected
13
14 Covers in-repo search:
15 - test_search_page_renders — GET /musehub/ui/{repo_id}/search → 200 HTML
16 - test_search_keyword_mode — keyword search returns matching commits
17 - test_search_keyword_empty_query — empty keyword query returns empty matches
18 - test_search_musical_property — musical property filter works
19 - test_search_natural_language — ask mode returns matching commits
20 - test_search_pattern_message — pattern matches commit message
21 - test_search_pattern_branch — pattern matches branch name
22 - test_search_json_response — JSON search endpoint returns SearchResponse shape
23 - test_search_date_range_since — since filter excludes old commits
24 - test_search_date_range_until — until filter excludes future commits
25 - test_search_invalid_mode — invalid mode returns 422
26 - test_search_unknown_repo — unknown repo_id returns 404
27 - test_search_requires_auth — unauthenticated request returns 401
28 - test_search_limit_respected — limit caps result count
29
30 All tests use the shared ``client`` and ``auth_headers`` fixtures from conftest.py.
31 """
32 from __future__ import annotations
33
34 import uuid
35 from datetime import datetime, timezone
36
37 import pytest
38 from httpx import AsyncClient
39 from sqlalchemy.ext.asyncio import AsyncSession
40
41 from musehub.db.musehub_models import MusehubCommit, MusehubObject, MusehubRepo
42 from musehub.muse_cli.models import MuseCliCommit, MuseCliSnapshot
43
44
45 # ---------------------------------------------------------------------------
46 # Helpers — global search (uses MusehubCommit / MusehubRepo directly)
47 # ---------------------------------------------------------------------------
48
49
50 async def _make_repo(
51 db_session: AsyncSession,
52 *,
53 name: str = "test-repo",
54 visibility: str = "public",
55 owner: str = "test-owner",
56 ) -> str:
57 """Seed a MuseHub repo and return its repo_id."""
58 import re as _re
59 slug = _re.sub(r"[^a-z0-9]+", "-", name.lower()).strip("-")[:64].strip("-") or "repo"
60 repo = MusehubRepo(name=name, owner="testuser", slug=slug, visibility=visibility, owner_user_id=owner)
61 db_session.add(repo)
62 await db_session.commit()
63 await db_session.refresh(repo)
64 return str(repo.repo_id)
65
66
67 async def _make_commit(
68 db_session: AsyncSession,
69 repo_id: str,
70 *,
71 commit_id: str,
72 message: str,
73 author: str = "alice",
74 branch: str = "main",
75 ) -> None:
76 """Seed a MusehubCommit for global search tests."""
77 commit = MusehubCommit(
78 commit_id=commit_id,
79 repo_id=repo_id,
80 branch=branch,
81 parent_ids=[],
82 message=message,
83 author=author,
84 timestamp=datetime.now(tz=timezone.utc),
85 )
86 db_session.add(commit)
87 await db_session.commit()
88
89
90 # ---------------------------------------------------------------------------
91 # Helpers — in-repo search (uses MuseCliCommit / MuseCliSnapshot)
92 # ---------------------------------------------------------------------------
93
94
95 async def _make_search_repo(db: AsyncSession) -> str:
96 """Seed a minimal MuseHub repo for in-repo search tests; return repo_id."""
97 repo = MusehubRepo(
98 name="search-test-repo",
99 owner="testuser",
100 slug="search-test-repo",
101 visibility="private",
102 owner_user_id="test-owner",
103 )
104 db.add(repo)
105 await db.commit()
106 await db.refresh(repo)
107 return str(repo.repo_id)
108
109
110 async def _make_snapshot(db: AsyncSession, snapshot_id: str) -> None:
111 """Seed a minimal snapshot so FK constraint on MuseCliCommit is satisfied."""
112 snap = MuseCliSnapshot(snapshot_id=snapshot_id, manifest={})
113 db.add(snap)
114 await db.flush()
115
116
117 async def _make_search_commit(
118 db: AsyncSession,
119 *,
120 repo_id: str,
121 message: str,
122 branch: str = "main",
123 author: str = "test-author",
124 committed_at: datetime | None = None,
125 ) -> MuseCliCommit:
126 """Seed a MuseCliCommit for in-repo search tests."""
127 snap_id = "snap-" + str(uuid.uuid4()).replace("-", "")[:16]
128 await _make_snapshot(db, snap_id)
129 commit = MuseCliCommit(
130 commit_id=str(uuid.uuid4()).replace("-", ""),
131 repo_id=repo_id,
132 branch=branch,
133 snapshot_id=snap_id,
134 message=message,
135 author=author,
136 committed_at=committed_at or datetime.now(timezone.utc),
137 )
138 db.add(commit)
139 await db.flush()
140 return commit
141
142
143 # ---------------------------------------------------------------------------
144 # Global search — UI page
145 # ---------------------------------------------------------------------------
146
147
148 @pytest.mark.anyio
149 async def test_global_search_page_renders(
150 client: AsyncClient,
151 db_session: AsyncSession,
152 ) -> None:
153 """GET /musehub/ui/search returns 200 HTML with a search form (no auth required)."""
154 response = await client.get("/musehub/ui/search")
155 assert response.status_code == 200
156 assert "text/html" in response.headers["content-type"]
157 body = response.text
158 assert "Global Search" in body
159 assert "Muse Hub" in body
160 assert "q-input" in body
161 assert "mode-sel" in body
162
163
164 @pytest.mark.anyio
165 async def test_global_search_page_pre_fills_query(
166 client: AsyncClient,
167 db_session: AsyncSession,
168 ) -> None:
169 """GET /musehub/ui/search?q=jazz pre-fills the search form with 'jazz'."""
170 response = await client.get("/musehub/ui/search?q=jazz&mode=keyword")
171 assert response.status_code == 200
172 body = response.text
173 assert "jazz" in body
174
175
176 # ---------------------------------------------------------------------------
177 # Global search — JSON API
178 # ---------------------------------------------------------------------------
179
180
181 @pytest.mark.anyio
182 async def test_global_search_accessible_without_auth(
183 client: AsyncClient,
184 db_session: AsyncSession,
185 ) -> None:
186 """GET /api/v1/musehub/search returns 200 without a JWT.
187
188 Global search is a public endpoint — uses optional_token, so unauthenticated
189 requests are allowed and return results for public repos.
190 """
191 response = await client.get("/api/v1/musehub/search?q=jazz")
192 assert response.status_code == 200
193
194
195 @pytest.mark.anyio
196 async def test_global_search_json(
197 client: AsyncClient,
198 db_session: AsyncSession,
199 auth_headers: dict[str, str],
200 ) -> None:
201 """GET /api/v1/musehub/search returns JSON with correct content-type."""
202 response = await client.get(
203 "/api/v1/musehub/search?q=jazz",
204 headers=auth_headers,
205 )
206 assert response.status_code == 200
207 assert "application/json" in response.headers["content-type"]
208 data = response.json()
209 assert "groups" in data
210 assert "query" in data
211 assert data["query"] == "jazz"
212
213
214 @pytest.mark.anyio
215 async def test_global_search_public_only(
216 client: AsyncClient,
217 db_session: AsyncSession,
218 auth_headers: dict[str, str],
219 ) -> None:
220 """Private repos must not appear in global search results."""
221 public_id = await _make_repo(db_session, name="public-beats", visibility="public")
222 private_id = await _make_repo(db_session, name="secret-beats", visibility="private")
223
224 await _make_commit(
225 db_session, public_id, commit_id="pub001abc", message="jazz groove session"
226 )
227 await _make_commit(
228 db_session, private_id, commit_id="priv001abc", message="jazz private session"
229 )
230
231 response = await client.get(
232 "/api/v1/musehub/search?q=jazz",
233 headers=auth_headers,
234 )
235 assert response.status_code == 200
236 data = response.json()
237 repo_ids_in_results = {g["repoId"] for g in data["groups"]}
238 assert public_id in repo_ids_in_results
239 assert private_id not in repo_ids_in_results
240
241
242 @pytest.mark.anyio
243 async def test_global_search_results_grouped(
244 client: AsyncClient,
245 db_session: AsyncSession,
246 auth_headers: dict[str, str],
247 ) -> None:
248 """Results are grouped by repo — each group has repoId, repoName, matches list."""
249 repo_a = await _make_repo(db_session, name="repo-alpha", visibility="public")
250 repo_b = await _make_repo(db_session, name="repo-beta", visibility="public")
251
252 await _make_commit(
253 db_session, repo_a, commit_id="a001abc123", message="bossa nova rhythm"
254 )
255 await _make_commit(
256 db_session, repo_a, commit_id="a002abc123", message="bossa nova variation"
257 )
258 await _make_commit(
259 db_session, repo_b, commit_id="b001abc123", message="bossa nova groove"
260 )
261
262 response = await client.get(
263 "/api/v1/musehub/search?q=bossa+nova",
264 headers=auth_headers,
265 )
266 assert response.status_code == 200
267 data = response.json()
268 groups = data["groups"]
269
270 group_repo_ids = {g["repoId"] for g in groups}
271 assert repo_a in group_repo_ids
272 assert repo_b in group_repo_ids
273
274 for group in groups:
275 assert "repoId" in group
276 assert "repoName" in group
277 assert "repoOwner" in group
278 assert "repoSlug" in group # PR #282: slug required for UI link construction
279 assert "repoVisibility" in group
280 assert "matches" in group
281 assert "totalMatches" in group
282 assert isinstance(group["matches"], list)
283 assert isinstance(group["repoSlug"], str)
284 assert group["repoSlug"] != ""
285
286 group_a = next(g for g in groups if g["repoId"] == repo_a)
287 assert group_a["totalMatches"] == 2
288 assert len(group_a["matches"]) == 2
289
290
291 @pytest.mark.anyio
292 async def test_global_search_empty_query_handled(
293 client: AsyncClient,
294 db_session: AsyncSession,
295 auth_headers: dict[str, str],
296 ) -> None:
297 """A query that matches nothing returns empty groups and valid pagination metadata."""
298 await _make_repo(db_session, name="silent-repo", visibility="public")
299
300 response = await client.get(
301 "/api/v1/musehub/search?q=zyxqwvutsr_no_match",
302 headers=auth_headers,
303 )
304 assert response.status_code == 200
305 data = response.json()
306 assert data["groups"] == []
307 assert data["page"] == 1
308 assert "totalReposSearched" in data
309
310
311 @pytest.mark.anyio
312 async def test_global_search_keyword_mode(
313 client: AsyncClient,
314 db_session: AsyncSession,
315 auth_headers: dict[str, str],
316 ) -> None:
317 """Keyword mode matches any term in the query (OR logic, case-insensitive)."""
318 repo_id = await _make_repo(db_session, name="jazz-lab", visibility="public")
319 await _make_commit(
320 db_session, repo_id, commit_id="kw001abcde", message="Blues Shuffle in E"
321 )
322 await _make_commit(
323 db_session, repo_id, commit_id="kw002abcde", message="Jazz Waltz Trio"
324 )
325
326 response = await client.get(
327 "/api/v1/musehub/search?q=blues&mode=keyword",
328 headers=auth_headers,
329 )
330 assert response.status_code == 200
331 data = response.json()
332 group = next((g for g in data["groups"] if g["repoId"] == repo_id), None)
333 assert group is not None
334 messages = [m["message"] for m in group["matches"]]
335 assert any("Blues" in msg for msg in messages)
336
337
338 @pytest.mark.anyio
339 async def test_global_search_pattern_mode(
340 client: AsyncClient,
341 db_session: AsyncSession,
342 auth_headers: dict[str, str],
343 ) -> None:
344 """Pattern mode applies a raw SQL LIKE pattern to commit messages."""
345 repo_id = await _make_repo(db_session, name="pattern-lab", visibility="public")
346 await _make_commit(
347 db_session, repo_id, commit_id="pt001abcde", message="minor pentatonic run"
348 )
349 await _make_commit(
350 db_session, repo_id, commit_id="pt002abcde", message="major scale exercise"
351 )
352
353 response = await client.get(
354 "/api/v1/musehub/search?q=%25minor%25&mode=pattern",
355 headers=auth_headers,
356 )
357 assert response.status_code == 200
358 data = response.json()
359 group = next((g for g in data["groups"] if g["repoId"] == repo_id), None)
360 assert group is not None
361 assert group["totalMatches"] == 1
362 assert "minor" in group["matches"][0]["message"]
363
364
365 @pytest.mark.anyio
366 async def test_global_search_pagination(
367 client: AsyncClient,
368 db_session: AsyncSession,
369 auth_headers: dict[str, str],
370 ) -> None:
371 """page and page_size parameters control repo-group pagination."""
372 ids = []
373 for i in range(3):
374 rid = await _make_repo(
375 db_session, name=f"paged-repo-{i}", visibility="public", owner=f"owner-{i}"
376 )
377 ids.append(rid)
378 await _make_commit(
379 db_session, rid, commit_id=f"pg{i:03d}abcde", message="paginate funk groove"
380 )
381
382 response = await client.get(
383 "/api/v1/musehub/search?q=paginate&page=1&page_size=2",
384 headers=auth_headers,
385 )
386 assert response.status_code == 200
387 data = response.json()
388 assert len(data["groups"]) <= 2
389 assert data["page"] == 1
390 assert data["pageSize"] == 2
391
392 response2 = await client.get(
393 "/api/v1/musehub/search?q=paginate&page=2&page_size=2",
394 headers=auth_headers,
395 )
396 assert response2.status_code == 200
397 data2 = response2.json()
398 assert data2["page"] == 2
399
400
401 @pytest.mark.anyio
402 async def test_global_search_match_contains_required_fields(
403 client: AsyncClient,
404 db_session: AsyncSession,
405 auth_headers: dict[str, str],
406 ) -> None:
407 """Each match entry contains commitId, message, author, branch, timestamp, repoId."""
408 repo_id = await _make_repo(db_session, name="fields-check", visibility="public")
409 await _make_commit(
410 db_session,
411 repo_id,
412 commit_id="fc001abcde",
413 message="swing feel experiment",
414 author="charlie",
415 branch="main",
416 )
417
418 response = await client.get(
419 "/api/v1/musehub/search?q=swing",
420 headers=auth_headers,
421 )
422 assert response.status_code == 200
423 data = response.json()
424 group = next((g for g in data["groups"] if g["repoId"] == repo_id), None)
425 assert group is not None
426 match = group["matches"][0]
427 assert match["commitId"] == "fc001abcde"
428 assert match["message"] == "swing feel experiment"
429 assert match["author"] == "charlie"
430 assert match["branch"] == "main"
431 assert "timestamp" in match
432 assert match["repoId"] == repo_id
433
434
435 # ---------------------------------------------------------------------------
436 # Global search — audio preview batching
437 # ---------------------------------------------------------------------------
438
439
440 @pytest.mark.anyio
441 async def test_global_search_audio_preview_populated_for_multiple_repos(
442 client: AsyncClient,
443 db_session: AsyncSession,
444 auth_headers: dict[str, str],
445 ) -> None:
446 """Audio preview object IDs are resolved via a single batched query for all repos.
447
448 Verifies that when N repos all have audio files, each GlobalSearchRepoGroup
449 contains the correct audioObjectId — confirming the batched path works
450 end-to-end and produces the same result as the old N+1 per-repo loop.
451
452 Regression test for the N+1 bug fixed.
453 """
454 repo_a = await _make_repo(db_session, name="audio-repo-alpha", visibility="public")
455 repo_b = await _make_repo(db_session, name="audio-repo-beta", visibility="public")
456
457 await _make_commit(
458 db_session, repo_a, commit_id="ap001abcde", message="funky groove jam"
459 )
460 await _make_commit(
461 db_session, repo_b, commit_id="ap002abcde", message="funky bass session"
462 )
463
464 obj_a = MusehubObject(
465 object_id="sha256:audio-preview-alpha",
466 repo_id=repo_a,
467 path="preview.mp3",
468 size_bytes=1024,
469 disk_path="/tmp/preview-alpha.mp3",
470 )
471 obj_b = MusehubObject(
472 object_id="sha256:audio-preview-beta",
473 repo_id=repo_b,
474 path="preview.ogg",
475 size_bytes=2048,
476 disk_path="/tmp/preview-beta.ogg",
477 )
478 db_session.add(obj_a)
479 db_session.add(obj_b)
480 await db_session.commit()
481
482 response = await client.get(
483 "/api/v1/musehub/search?q=funky",
484 headers=auth_headers,
485 )
486 assert response.status_code == 200
487 data = response.json()
488
489 groups_by_id = {g["repoId"]: g for g in data["groups"]}
490 assert repo_a in groups_by_id
491 assert repo_b in groups_by_id
492
493 assert groups_by_id[repo_a]["matches"][0]["audioObjectId"] == "sha256:audio-preview-alpha"
494 assert groups_by_id[repo_b]["matches"][0]["audioObjectId"] == "sha256:audio-preview-beta"
495
496
497 @pytest.mark.anyio
498 async def test_global_search_audio_preview_absent_when_no_audio_objects(
499 client: AsyncClient,
500 db_session: AsyncSession,
501 auth_headers: dict[str, str],
502 ) -> None:
503 """Repos without audio objects return null audioObjectId in search results."""
504 repo_id = await _make_repo(db_session, name="no-audio-repo", visibility="public")
505 await _make_commit(
506 db_session, repo_id, commit_id="na001abcde", message="silent ambient piece"
507 )
508
509 response = await client.get(
510 "/api/v1/musehub/search?q=silent",
511 headers=auth_headers,
512 )
513 assert response.status_code == 200
514 data = response.json()
515 group = next((g for g in data["groups"] if g["repoId"] == repo_id), None)
516 assert group is not None
517 assert group["matches"][0]["audioObjectId"] is None
518
519
520 # ---------------------------------------------------------------------------
521 # In-repo search — UI page
522 # ---------------------------------------------------------------------------
523
524
525 @pytest.mark.anyio
526 async def test_search_page_renders(
527 client: AsyncClient,
528 db_session: AsyncSession,
529 ) -> None:
530 """GET /musehub/ui/{repo_id}/search returns 200 HTML with mode tabs."""
531 repo_id = await _make_search_repo(db_session)
532 response = await client.get("/musehub/ui/testuser/search-test-repo/search")
533 assert response.status_code == 200
534 assert "text/html" in response.headers["content-type"]
535 body = response.text
536 assert "Muse Hub" in body
537 assert "Search Commits" in body
538 assert "Keyword" in body
539 assert "Natural Language" in body
540 assert "Pattern" in body
541 assert "Musical Properties" in body
542 assert "inp-since" in body
543 assert "inp-until" in body
544
545
546 @pytest.mark.anyio
547 async def test_search_page_no_auth_required(
548 client: AsyncClient,
549 db_session: AsyncSession,
550 ) -> None:
551 """Search UI page is accessible without a JWT (HTML shell, JS handles auth)."""
552 repo_id = await _make_search_repo(db_session)
553 response = await client.get("/musehub/ui/testuser/search-test-repo/search")
554 assert response.status_code == 200
555
556
557 # ---------------------------------------------------------------------------
558 # In-repo search — authentication
559 # ---------------------------------------------------------------------------
560
561
562 @pytest.mark.anyio
563 async def test_search_requires_auth(
564 client: AsyncClient,
565 db_session: AsyncSession,
566 ) -> None:
567 """GET /api/v1/musehub/repos/{repo_id}/search returns 401 without a token."""
568 repo_id = await _make_search_repo(db_session)
569 response = await client.get(f"/api/v1/musehub/repos/{repo_id}/search?mode=keyword&q=jazz")
570 assert response.status_code == 401
571
572
573 @pytest.mark.anyio
574 async def test_search_unknown_repo(
575 client: AsyncClient,
576 db_session: AsyncSession,
577 auth_headers: dict[str, str],
578 ) -> None:
579 """GET /api/v1/musehub/repos/{unknown}/search returns 404."""
580 response = await client.get(
581 "/api/v1/musehub/repos/does-not-exist/search?mode=keyword&q=test",
582 headers=auth_headers,
583 )
584 assert response.status_code == 404
585
586
587 @pytest.mark.anyio
588 async def test_search_invalid_mode(
589 client: AsyncClient,
590 db_session: AsyncSession,
591 auth_headers: dict[str, str],
592 ) -> None:
593 """GET search with an unknown mode returns 422."""
594 repo_id = await _make_search_repo(db_session)
595 response = await client.get(
596 f"/api/v1/musehub/repos/{repo_id}/search?mode=badmode&q=x",
597 headers=auth_headers,
598 )
599 assert response.status_code == 422
600
601
602 # ---------------------------------------------------------------------------
603 # In-repo search — keyword mode
604 # ---------------------------------------------------------------------------
605
606
607 @pytest.mark.anyio
608 async def test_search_keyword_mode(
609 client: AsyncClient,
610 db_session: AsyncSession,
611 auth_headers: dict[str, str],
612 ) -> None:
613 """Keyword search returns commits whose messages overlap with the query."""
614 repo_id = await _make_search_repo(db_session)
615 await db_session.commit()
616
617 await _make_search_commit(db_session, repo_id=repo_id, message="dark jazz bassline in Dm")
618 await _make_search_commit(db_session, repo_id=repo_id, message="classical piano intro section")
619 await _make_search_commit(db_session, repo_id=repo_id, message="hip hop drum fill pattern")
620 await db_session.commit()
621
622 response = await client.get(
623 f"/api/v1/musehub/repos/{repo_id}/search?mode=keyword&q=jazz+bassline",
624 headers=auth_headers,
625 )
626 assert response.status_code == 200
627 data = response.json()
628 assert data["mode"] == "keyword"
629 assert data["query"] == "jazz bassline"
630 assert any("jazz" in m["message"].lower() for m in data["matches"])
631
632
633 @pytest.mark.anyio
634 async def test_search_keyword_empty_query(
635 client: AsyncClient,
636 db_session: AsyncSession,
637 auth_headers: dict[str, str],
638 ) -> None:
639 """Empty keyword query returns empty matches (no tokens → no overlap)."""
640 repo_id = await _make_search_repo(db_session)
641 await db_session.commit()
642 await _make_search_commit(db_session, repo_id=repo_id, message="some commit")
643 await db_session.commit()
644
645 response = await client.get(
646 f"/api/v1/musehub/repos/{repo_id}/search?mode=keyword&q=",
647 headers=auth_headers,
648 )
649 assert response.status_code == 200
650 data = response.json()
651 assert data["mode"] == "keyword"
652 assert data["matches"] == []
653
654
655 @pytest.mark.anyio
656 async def test_search_json_response(
657 client: AsyncClient,
658 db_session: AsyncSession,
659 auth_headers: dict[str, str],
660 ) -> None:
661 """Search response has the expected SearchResponse JSON shape."""
662 repo_id = await _make_search_repo(db_session)
663 await db_session.commit()
664 await _make_search_commit(db_session, repo_id=repo_id, message="piano chord progression F Bb Eb")
665 await db_session.commit()
666
667 response = await client.get(
668 f"/api/v1/musehub/repos/{repo_id}/search?mode=keyword&q=piano",
669 headers=auth_headers,
670 )
671 assert response.status_code == 200
672 data = response.json()
673
674 assert "mode" in data
675 assert "query" in data
676 assert "matches" in data
677 assert "totalScanned" in data
678 assert "limit" in data
679
680 if data["matches"]:
681 m = data["matches"][0]
682 assert "commitId" in m
683 assert "branch" in m
684 assert "message" in m
685 assert "author" in m
686 assert "timestamp" in m
687 assert "score" in m
688 assert "matchSource" in m
689
690
691 # ---------------------------------------------------------------------------
692 # In-repo search — musical property mode
693 # ---------------------------------------------------------------------------
694
695
696 @pytest.mark.anyio
697 async def test_search_musical_property(
698 client: AsyncClient,
699 db_session: AsyncSession,
700 auth_headers: dict[str, str],
701 ) -> None:
702 """Property mode filters commits containing the harmony string."""
703 repo_id = await _make_search_repo(db_session)
704 await db_session.commit()
705
706 await _make_search_commit(db_session, repo_id=repo_id, message="add harmony=Eb bridge section")
707 await _make_search_commit(db_session, repo_id=repo_id, message="drum groove tweak no harmony")
708 await db_session.commit()
709
710 response = await client.get(
711 f"/api/v1/musehub/repos/{repo_id}/search?mode=property&harmony=Eb",
712 headers=auth_headers,
713 )
714 assert response.status_code == 200
715 data = response.json()
716 assert data["mode"] == "property"
717 assert len(data["matches"]) >= 1
718 assert all("Eb" in m["message"] for m in data["matches"])
719
720
721 # ---------------------------------------------------------------------------
722 # In-repo search — natural language (ask) mode
723 # ---------------------------------------------------------------------------
724
725
726 @pytest.mark.anyio
727 async def test_search_natural_language(
728 client: AsyncClient,
729 db_session: AsyncSession,
730 auth_headers: dict[str, str],
731 ) -> None:
732 """Ask mode extracts keywords and returns relevant commits."""
733 repo_id = await _make_search_repo(db_session)
734 await db_session.commit()
735
736 await _make_search_commit(db_session, repo_id=repo_id, message="switched tempo to 140bpm for drop")
737 await _make_search_commit(db_session, repo_id=repo_id, message="piano melody in minor key")
738 await db_session.commit()
739
740 response = await client.get(
741 f"/api/v1/musehub/repos/{repo_id}/search?mode=ask&q=what+tempo+changes+did+I+make",
742 headers=auth_headers,
743 )
744 assert response.status_code == 200
745 data = response.json()
746 assert data["mode"] == "ask"
747 assert any("tempo" in m["message"].lower() for m in data["matches"])
748
749
750 # ---------------------------------------------------------------------------
751 # In-repo search — pattern mode
752 # ---------------------------------------------------------------------------
753
754
755 @pytest.mark.anyio
756 async def test_search_pattern_message(
757 client: AsyncClient,
758 db_session: AsyncSession,
759 auth_headers: dict[str, str],
760 ) -> None:
761 """Pattern mode matches substring in commit message."""
762 repo_id = await _make_search_repo(db_session)
763 await db_session.commit()
764
765 await _make_search_commit(db_session, repo_id=repo_id, message="add Cm7 chord voicing in bridge")
766 await _make_search_commit(db_session, repo_id=repo_id, message="fix timing on verse drums")
767 await db_session.commit()
768
769 response = await client.get(
770 f"/api/v1/musehub/repos/{repo_id}/search?mode=pattern&q=Cm7",
771 headers=auth_headers,
772 )
773 assert response.status_code == 200
774 data = response.json()
775 assert data["mode"] == "pattern"
776 assert len(data["matches"]) == 1
777 assert "Cm7" in data["matches"][0]["message"]
778 assert data["matches"][0]["matchSource"] == "message"
779
780
781 @pytest.mark.anyio
782 async def test_search_pattern_branch(
783 client: AsyncClient,
784 db_session: AsyncSession,
785 auth_headers: dict[str, str],
786 ) -> None:
787 """Pattern mode matches substring in branch name when message doesn't match."""
788 repo_id = await _make_search_repo(db_session)
789 await db_session.commit()
790
791 await _make_search_commit(
792 db_session,
793 repo_id=repo_id,
794 message="rough cut",
795 branch="feature/hip-hop-session",
796 )
797 await db_session.commit()
798
799 response = await client.get(
800 f"/api/v1/musehub/repos/{repo_id}/search?mode=pattern&q=hip-hop",
801 headers=auth_headers,
802 )
803 assert response.status_code == 200
804 data = response.json()
805 assert data["mode"] == "pattern"
806 assert len(data["matches"]) == 1
807 assert data["matches"][0]["matchSource"] == "branch"
808
809
810 # ---------------------------------------------------------------------------
811 # In-repo search — date range filters
812 # ---------------------------------------------------------------------------
813
814
815 @pytest.mark.anyio
816 async def test_search_date_range_since(
817 client: AsyncClient,
818 db_session: AsyncSession,
819 auth_headers: dict[str, str],
820 ) -> None:
821 """since filter excludes commits committed before the given datetime."""
822 repo_id = await _make_search_repo(db_session)
823 await db_session.commit()
824
825 old_ts = datetime(2024, 1, 1, tzinfo=timezone.utc)
826 new_ts = datetime(2026, 1, 1, tzinfo=timezone.utc)
827
828 await _make_search_commit(db_session, repo_id=repo_id, message="old jazz commit", committed_at=old_ts)
829 await _make_search_commit(db_session, repo_id=repo_id, message="new jazz commit", committed_at=new_ts)
830 await db_session.commit()
831
832 response = await client.get(
833 f"/api/v1/musehub/repos/{repo_id}/search?mode=keyword&q=jazz&since=2025-06-01T00:00:00Z",
834 headers=auth_headers,
835 )
836 assert response.status_code == 200
837 data = response.json()
838 assert all(m["message"] != "old jazz commit" for m in data["matches"])
839 assert any(m["message"] == "new jazz commit" for m in data["matches"])
840
841
842 @pytest.mark.anyio
843 async def test_search_date_range_until(
844 client: AsyncClient,
845 db_session: AsyncSession,
846 auth_headers: dict[str, str],
847 ) -> None:
848 """until filter excludes commits committed after the given datetime."""
849 repo_id = await _make_search_repo(db_session)
850 await db_session.commit()
851
852 old_ts = datetime(2024, 1, 1, tzinfo=timezone.utc)
853 new_ts = datetime(2026, 1, 1, tzinfo=timezone.utc)
854
855 await _make_search_commit(db_session, repo_id=repo_id, message="old piano commit", committed_at=old_ts)
856 await _make_search_commit(db_session, repo_id=repo_id, message="new piano commit", committed_at=new_ts)
857 await db_session.commit()
858
859 response = await client.get(
860 f"/api/v1/musehub/repos/{repo_id}/search?mode=keyword&q=piano&until=2025-06-01T00:00:00Z",
861 headers=auth_headers,
862 )
863 assert response.status_code == 200
864 data = response.json()
865 assert any(m["message"] == "old piano commit" for m in data["matches"])
866 assert all(m["message"] != "new piano commit" for m in data["matches"])
867
868
869 # ---------------------------------------------------------------------------
870 # In-repo search — limit
871 # ---------------------------------------------------------------------------
872
873
874 @pytest.mark.anyio
875 async def test_search_limit_respected(
876 client: AsyncClient,
877 db_session: AsyncSession,
878 auth_headers: dict[str, str],
879 ) -> None:
880 """The limit parameter caps the number of results returned."""
881 repo_id = await _make_search_repo(db_session)
882 await db_session.commit()
883
884 for i in range(10):
885 await _make_search_commit(db_session, repo_id=repo_id, message=f"bass groove iteration {i}")
886 await db_session.commit()
887
888 response = await client.get(
889 f"/api/v1/musehub/repos/{repo_id}/search?mode=keyword&q=bass&limit=3",
890 headers=auth_headers,
891 )
892 assert response.status_code == 200
893 data = response.json()
894 assert len(data["matches"]) <= 3
895 assert data["limit"] == 3