gabriel / musehub public
test_musehub_search.py python
1287 lines 43.1 KB
cd448303 Initial extraction of MuseHub from maestro monorepo. Gabriel Cardona <gabriel@tellurstori.com> 7d ago
1 """Tests for MuseHub search endpoints.
2
3 Covers semantic similarity search:
4 - GET /musehub/search/similar?commit={sha} returns ranked results
5 - Private repos are excluded from results (public_only enforced)
6 - 404 when commit SHA is not found
7 - 503 when Qdrant is unavailable
8 - Results are sorted by descending score
9 - Unauthenticated requests return 401
10
11 Covers cross-repo global search:
12 - test_global_search_page_renders — GET /musehub/ui/search returns 200 HTML
13 - test_global_search_results_grouped — JSON results are grouped by repo
14 - test_global_search_public_only — private repos are excluded
15 - test_global_search_json — JSON content-type returned
16 - test_global_search_empty_query_handled — graceful response for empty result set
17 - test_global_search_requires_auth — 401 without JWT
18 - test_global_search_keyword_mode — keyword mode matches across message terms
19 - test_global_search_pattern_mode — pattern mode uses SQL LIKE
20 - test_global_search_pagination — page/page_size params respected
21
22 Covers in-repo search:
23 - test_search_page_renders — GET /musehub/ui/{repo_id}/search → 200 HTML
24 - test_search_keyword_mode — keyword search returns matching commits
25 - test_search_keyword_empty_query — empty keyword query returns empty matches
26 - test_search_musical_property — musical property filter works
27 - test_search_natural_language — ask mode returns matching commits
28 - test_search_pattern_message — pattern matches commit message
29 - test_search_pattern_branch — pattern matches branch name
30 - test_search_json_response — JSON search endpoint returns SearchResponse shape
31 - test_search_date_range_since — since filter excludes old commits
32 - test_search_date_range_until — until filter excludes future commits
33 - test_search_invalid_mode — invalid mode returns 422
34 - test_search_unknown_repo — unknown repo_id returns 404
35 - test_search_requires_auth — unauthenticated request returns 401
36 - test_search_limit_respected — limit caps result count
37
38 All tests use the shared ``client`` and ``auth_headers`` fixtures from
39 conftest.py. Qdrant calls are mocked — no live vector database required.
40 """
41 from __future__ import annotations
42
43 import uuid
44 from datetime import datetime, timezone
45 from unittest.mock import MagicMock, patch
46
47 import pytest
48 from httpx import AsyncClient
49 from sqlalchemy.ext.asyncio import AsyncSession
50
51 from musehub.db.musehub_models import MusehubCommit, MusehubObject, MusehubRepo
52 from musehub.main import app
53 from musehub.muse_cli.models import MuseCliCommit, MuseCliSnapshot
54 from musehub.services.musehub_qdrant import SimilarCommitResult, get_qdrant_client
55
56
57 # ---------------------------------------------------------------------------
58 # Helpers — similarity search
59 # ---------------------------------------------------------------------------
60
61
62 def _make_similar_result(
63 commit_id: str,
64 repo_id: str = "repo-pub",
65 score: float = 0.9,
66 branch: str = "main",
67 author: str = "composer",
68 ) -> SimilarCommitResult:
69 return SimilarCommitResult(
70 commit_id=commit_id,
71 repo_id=repo_id,
72 score=score,
73 branch=branch,
74 author=author,
75 )
76
77
78 # ---------------------------------------------------------------------------
79 # Helpers — global search (uses MusehubCommit / MusehubRepo directly)
80 # ---------------------------------------------------------------------------
81
82
83 async def _make_repo(
84 db_session: AsyncSession,
85 *,
86 name: str = "test-repo",
87 visibility: str = "public",
88 owner: str = "test-owner",
89 ) -> str:
90 """Seed a MuseHub repo and return its repo_id."""
91 import re as _re
92 slug = _re.sub(r"[^a-z0-9]+", "-", name.lower()).strip("-")[:64].strip("-") or "repo"
93 repo = MusehubRepo(name=name, owner="testuser", slug=slug, visibility=visibility, owner_user_id=owner)
94 db_session.add(repo)
95 await db_session.commit()
96 await db_session.refresh(repo)
97 return str(repo.repo_id)
98
99
100 async def _make_commit(
101 db_session: AsyncSession,
102 repo_id: str,
103 *,
104 commit_id: str,
105 message: str,
106 author: str = "alice",
107 branch: str = "main",
108 ) -> None:
109 """Seed a MusehubCommit for global search tests."""
110 commit = MusehubCommit(
111 commit_id=commit_id,
112 repo_id=repo_id,
113 branch=branch,
114 parent_ids=[],
115 message=message,
116 author=author,
117 timestamp=datetime.now(tz=timezone.utc),
118 )
119 db_session.add(commit)
120 await db_session.commit()
121
122
123 # ---------------------------------------------------------------------------
124 # Helpers — in-repo search (uses MuseCliCommit / MuseCliSnapshot)
125 # ---------------------------------------------------------------------------
126
127
128 async def _make_search_repo(db: AsyncSession) -> str:
129 """Seed a minimal MuseHub repo for in-repo search tests; return repo_id."""
130 repo = MusehubRepo(
131 name="search-test-repo",
132 owner="testuser",
133 slug="search-test-repo",
134 visibility="private",
135 owner_user_id="test-owner",
136 )
137 db.add(repo)
138 await db.commit()
139 await db.refresh(repo)
140 return str(repo.repo_id)
141
142
143 async def _make_snapshot(db: AsyncSession, snapshot_id: str) -> None:
144 """Seed a minimal snapshot so FK constraint on MuseCliCommit is satisfied."""
145 snap = MuseCliSnapshot(snapshot_id=snapshot_id, manifest={})
146 db.add(snap)
147 await db.flush()
148
149
150 async def _make_search_commit(
151 db: AsyncSession,
152 *,
153 repo_id: str,
154 message: str,
155 branch: str = "main",
156 author: str = "test-author",
157 committed_at: datetime | None = None,
158 ) -> MuseCliCommit:
159 """Seed a MuseCliCommit for in-repo search tests."""
160 snap_id = "snap-" + str(uuid.uuid4()).replace("-", "")[:16]
161 await _make_snapshot(db, snap_id)
162 commit = MuseCliCommit(
163 commit_id=str(uuid.uuid4()).replace("-", ""),
164 repo_id=repo_id,
165 branch=branch,
166 snapshot_id=snap_id,
167 message=message,
168 author=author,
169 committed_at=committed_at or datetime.now(timezone.utc),
170 )
171 db.add(commit)
172 await db.flush()
173 return commit
174
175
176 # ---------------------------------------------------------------------------
177 # Similarity search — authentication
178 # ---------------------------------------------------------------------------
179
180
181 @pytest.mark.anyio
182 async def test_similar_search_unknown_commit_returns_404_without_auth(
183 client: AsyncClient,
184 ) -> None:
185 """GET /musehub/search/similar returns 404 for an unknown commit without a token.
186
187 Uses optional_token — the endpoint is public; a non-existent commit → 404.
188 """
189 resp = await client.get("/api/v1/musehub/search/similar?commit=non-existent-commit-id")
190 assert resp.status_code == 404
191
192
193 # ---------------------------------------------------------------------------
194 # Similarity search — 404 for unknown commit
195 # ---------------------------------------------------------------------------
196
197
198 @pytest.mark.anyio
199 async def test_similar_search_returns_404_for_unknown_commit(
200 client: AsyncClient,
201 auth_headers: dict[str, str],
202 ) -> None:
203 """404 is returned when the query commit SHA does not exist in the Hub."""
204 resp = await client.get(
205 "/api/v1/musehub/search/similar?commit=nonexistent-sha",
206 headers=auth_headers,
207 )
208 assert resp.status_code == 404
209 assert "nonexistent-sha" in resp.json()["detail"]
210
211
212 # ---------------------------------------------------------------------------
213 # Similarity search — successful search
214 # ---------------------------------------------------------------------------
215
216
217 @pytest.mark.anyio
218 async def test_similar_search_returns_results(
219 client: AsyncClient,
220 auth_headers: dict[str, str],
221 ) -> None:
222 """Successful search returns ranked SimilarCommitResponse list."""
223 create_resp = await client.post(
224 "/api/v1/musehub/repos",
225 json={"name": "jazz-ballad", "owner": "testuser", "visibility": "public"},
226 headers=auth_headers,
227 )
228 assert create_resp.status_code == 201
229 repo_id = create_resp.json()["repoId"]
230
231 commit_id = "abc-jazz-001"
232 with patch("musehub.api.routes.musehub.sync.embed_push_commits"):
233 push_resp = await client.post(
234 f"/api/v1/musehub/repos/{repo_id}/push",
235 json={
236 "branch": "main",
237 "headCommitId": commit_id,
238 "commits": [
239 {
240 "commitId": commit_id,
241 "parentIds": [],
242 "message": "Jazz ballad in Db major at 72 BPM",
243 "timestamp": "2024-01-01T00:00:00Z",
244 }
245 ],
246 "objects": [],
247 "force": False,
248 },
249 headers=auth_headers,
250 )
251 assert push_resp.status_code == 200
252
253 mock_results = [
254 _make_similar_result("similar-001", score=0.95),
255 _make_similar_result("similar-002", score=0.87),
256 ]
257 mock_qdrant = MagicMock()
258 mock_qdrant.search_similar.return_value = mock_results
259 app.dependency_overrides[get_qdrant_client] = lambda: mock_qdrant
260 try:
261 search_resp = await client.get(
262 f"/api/v1/musehub/search/similar?commit={commit_id}&limit=5",
263 headers=auth_headers,
264 )
265 finally:
266 app.dependency_overrides.pop(get_qdrant_client, None)
267
268 assert search_resp.status_code == 200
269 data = search_resp.json()
270 assert data["queryCommit"] == commit_id
271 assert len(data["results"]) == 2
272 assert data["results"][0]["score"] >= data["results"][1]["score"]
273
274
275 @pytest.mark.anyio
276 async def test_similar_search_public_only_enforced(
277 client: AsyncClient,
278 auth_headers: dict[str, str],
279 ) -> None:
280 """search_similar is called with public_only=True — private commits excluded."""
281 create_resp = await client.post(
282 "/api/v1/musehub/repos",
283 json={"name": "public-jazz", "owner": "testuser", "visibility": "public"},
284 headers=auth_headers,
285 )
286 assert create_resp.status_code == 201
287 repo_id = create_resp.json()["repoId"]
288 commit_id = "pub-commit-001"
289
290 with patch("musehub.api.routes.musehub.sync.embed_push_commits"):
291 await client.post(
292 f"/api/v1/musehub/repos/{repo_id}/push",
293 json={
294 "branch": "main",
295 "headCommitId": commit_id,
296 "commits": [
297 {
298 "commitId": commit_id,
299 "parentIds": [],
300 "message": "C major 120 BPM",
301 "timestamp": "2024-01-01T00:00:00Z",
302 }
303 ],
304 "objects": [],
305 "force": False,
306 },
307 headers=auth_headers,
308 )
309
310 mock_qdrant = MagicMock()
311 mock_qdrant.search_similar.return_value = []
312 app.dependency_overrides[get_qdrant_client] = lambda: mock_qdrant
313 try:
314 resp = await client.get(
315 f"/api/v1/musehub/search/similar?commit={commit_id}",
316 headers=auth_headers,
317 )
318 finally:
319 app.dependency_overrides.pop(get_qdrant_client, None)
320
321 assert resp.status_code == 200
322 call_kwargs = mock_qdrant.search_similar.call_args.kwargs
323 assert call_kwargs.get("public_only") is True
324
325
326 @pytest.mark.anyio
327 async def test_similar_search_excludes_query_commit(
328 client: AsyncClient,
329 auth_headers: dict[str, str],
330 ) -> None:
331 """The query commit itself is passed as exclude_commit_id to avoid self-match."""
332 create_resp = await client.post(
333 "/api/v1/musehub/repos",
334 json={"name": "self-exclude-test", "owner": "testuser", "visibility": "public"},
335 headers=auth_headers,
336 )
337 repo_id = create_resp.json()["repoId"]
338 commit_id = "self-excl-001"
339
340 with patch("musehub.api.routes.musehub.sync.embed_push_commits"):
341 await client.post(
342 f"/api/v1/musehub/repos/{repo_id}/push",
343 json={
344 "branch": "main",
345 "headCommitId": commit_id,
346 "commits": [
347 {
348 "commitId": commit_id,
349 "parentIds": [],
350 "message": "G minor 85 BPM",
351 "timestamp": "2024-01-01T00:00:00Z",
352 }
353 ],
354 "objects": [],
355 "force": False,
356 },
357 headers=auth_headers,
358 )
359
360 mock_qdrant = MagicMock()
361 mock_qdrant.search_similar.return_value = []
362 app.dependency_overrides[get_qdrant_client] = lambda: mock_qdrant
363 try:
364 await client.get(
365 f"/api/v1/musehub/search/similar?commit={commit_id}",
366 headers=auth_headers,
367 )
368 finally:
369 app.dependency_overrides.pop(get_qdrant_client, None)
370
371 call_kwargs = mock_qdrant.search_similar.call_args.kwargs
372 assert call_kwargs.get("exclude_commit_id") == commit_id
373
374
375 @pytest.mark.anyio
376 async def test_similar_search_503_when_qdrant_unavailable(
377 client: AsyncClient,
378 auth_headers: dict[str, str],
379 ) -> None:
380 """503 is returned when Qdrant raises an exception."""
381 create_resp = await client.post(
382 "/api/v1/musehub/repos",
383 json={"name": "qdrant-fail-test", "owner": "testuser", "visibility": "public"},
384 headers=auth_headers,
385 )
386 repo_id = create_resp.json()["repoId"]
387 commit_id = "qdrant-fail-001"
388
389 with patch("musehub.api.routes.musehub.sync.embed_push_commits"):
390 await client.post(
391 f"/api/v1/musehub/repos/{repo_id}/push",
392 json={
393 "branch": "main",
394 "headCommitId": commit_id,
395 "commits": [
396 {
397 "commitId": commit_id,
398 "parentIds": [],
399 "message": "A minor 95 BPM",
400 "timestamp": "2024-01-01T00:00:00Z",
401 }
402 ],
403 "objects": [],
404 "force": False,
405 },
406 headers=auth_headers,
407 )
408
409 mock_qdrant = MagicMock()
410 mock_qdrant.search_similar.side_effect = ConnectionError("Qdrant down")
411 app.dependency_overrides[get_qdrant_client] = lambda: mock_qdrant
412 try:
413 resp = await client.get(
414 f"/api/v1/musehub/search/similar?commit={commit_id}",
415 headers=auth_headers,
416 )
417 finally:
418 app.dependency_overrides.pop(get_qdrant_client, None)
419
420 assert resp.status_code == 503
421
422
423 @pytest.mark.anyio
424 async def test_similar_search_limit_respected(
425 client: AsyncClient,
426 auth_headers: dict[str, str],
427 ) -> None:
428 """The limit query parameter is forwarded to Qdrant search_similar."""
429 create_resp = await client.post(
430 "/api/v1/musehub/repos",
431 json={"name": "limit-test", "owner": "testuser", "visibility": "public"},
432 headers=auth_headers,
433 )
434 repo_id = create_resp.json()["repoId"]
435 commit_id = "limit-test-001"
436
437 with patch("musehub.api.routes.musehub.sync.embed_push_commits"):
438 await client.post(
439 f"/api/v1/musehub/repos/{repo_id}/push",
440 json={
441 "branch": "main",
442 "headCommitId": commit_id,
443 "commits": [
444 {
445 "commitId": commit_id,
446 "parentIds": [],
447 "message": "E major 140 BPM",
448 "timestamp": "2024-01-01T00:00:00Z",
449 }
450 ],
451 "objects": [],
452 "force": False,
453 },
454 headers=auth_headers,
455 )
456
457 mock_qdrant = MagicMock()
458 mock_qdrant.search_similar.return_value = []
459 app.dependency_overrides[get_qdrant_client] = lambda: mock_qdrant
460 try:
461 await client.get(
462 f"/api/v1/musehub/search/similar?commit={commit_id}&limit=3",
463 headers=auth_headers,
464 )
465 finally:
466 app.dependency_overrides.pop(get_qdrant_client, None)
467
468 call_kwargs = mock_qdrant.search_similar.call_args.kwargs
469 assert call_kwargs.get("limit") == 3
470
471
472 # ---------------------------------------------------------------------------
473 # Similarity search — DI override (regression)
474 # ---------------------------------------------------------------------------
475
476
477 @pytest.mark.anyio
478 async def test_similar_search_qdrant_injected_via_dependency_override(
479 client: AsyncClient,
480 auth_headers: dict[str, str],
481 ) -> None:
482 """Qdrant client is injected via FastAPI dependency_overrides, not module patching.
483
484 Regression test: confirms that get_qdrant_client is a proper
485 FastAPI dependency so tests can override it without patching module internals.
486 """
487 create_resp = await client.post(
488 "/api/v1/musehub/repos",
489 json={"name": "di-test-repo", "owner": "testuser", "visibility": "public"},
490 headers=auth_headers,
491 )
492 assert create_resp.status_code == 201
493 repo_id = create_resp.json()["repoId"]
494 commit_id = "di-test-commit-001"
495
496 with patch("musehub.api.routes.musehub.sync.embed_push_commits"):
497 await client.post(
498 f"/api/v1/musehub/repos/{repo_id}/push",
499 json={
500 "branch": "main",
501 "headCommitId": commit_id,
502 "commits": [
503 {
504 "commitId": commit_id,
505 "parentIds": [],
506 "message": "DI refactor test commit in Cmaj",
507 "timestamp": "2024-01-01T00:00:00Z",
508 }
509 ],
510 "objects": [],
511 "force": False,
512 },
513 headers=auth_headers,
514 )
515
516 mock_qdrant = MagicMock()
517 mock_qdrant.search_similar.return_value = [_make_similar_result("di-result-001", score=0.88)]
518 app.dependency_overrides[get_qdrant_client] = lambda: mock_qdrant
519 try:
520 resp = await client.get(
521 f"/api/v1/musehub/search/similar?commit={commit_id}&limit=5",
522 headers=auth_headers,
523 )
524 finally:
525 app.dependency_overrides.pop(get_qdrant_client, None)
526
527 assert resp.status_code == 200
528 data = resp.json()
529 assert data["queryCommit"] == commit_id
530 assert len(data["results"]) == 1
531 assert data["results"][0]["commitId"] == "di-result-001"
532 assert mock_qdrant.search_similar.called
533
534
535 # ---------------------------------------------------------------------------
536 # Global search — UI page
537 # ---------------------------------------------------------------------------
538
539
540 @pytest.mark.anyio
541 async def test_global_search_page_renders(
542 client: AsyncClient,
543 db_session: AsyncSession,
544 ) -> None:
545 """GET /musehub/ui/search returns 200 HTML with a search form (no auth required)."""
546 response = await client.get("/musehub/ui/search")
547 assert response.status_code == 200
548 assert "text/html" in response.headers["content-type"]
549 body = response.text
550 assert "Global Search" in body
551 assert "Muse Hub" in body
552 assert "q-input" in body
553 assert "mode-sel" in body
554
555
556 @pytest.mark.anyio
557 async def test_global_search_page_pre_fills_query(
558 client: AsyncClient,
559 db_session: AsyncSession,
560 ) -> None:
561 """GET /musehub/ui/search?q=jazz pre-fills the search form with 'jazz'."""
562 response = await client.get("/musehub/ui/search?q=jazz&mode=keyword")
563 assert response.status_code == 200
564 body = response.text
565 assert "jazz" in body
566
567
568 # ---------------------------------------------------------------------------
569 # Global search — JSON API
570 # ---------------------------------------------------------------------------
571
572
573 @pytest.mark.anyio
574 async def test_global_search_accessible_without_auth(
575 client: AsyncClient,
576 db_session: AsyncSession,
577 ) -> None:
578 """GET /api/v1/musehub/search returns 200 without a JWT.
579
580 Global search is a public endpoint — uses optional_token, so unauthenticated
581 requests are allowed and return results for public repos.
582 """
583 response = await client.get("/api/v1/musehub/search?q=jazz")
584 assert response.status_code == 200
585
586
587 @pytest.mark.anyio
588 async def test_global_search_json(
589 client: AsyncClient,
590 db_session: AsyncSession,
591 auth_headers: dict[str, str],
592 ) -> None:
593 """GET /api/v1/musehub/search returns JSON with correct content-type."""
594 response = await client.get(
595 "/api/v1/musehub/search?q=jazz",
596 headers=auth_headers,
597 )
598 assert response.status_code == 200
599 assert "application/json" in response.headers["content-type"]
600 data = response.json()
601 assert "groups" in data
602 assert "query" in data
603 assert data["query"] == "jazz"
604
605
606 @pytest.mark.anyio
607 async def test_global_search_public_only(
608 client: AsyncClient,
609 db_session: AsyncSession,
610 auth_headers: dict[str, str],
611 ) -> None:
612 """Private repos must not appear in global search results."""
613 public_id = await _make_repo(db_session, name="public-beats", visibility="public")
614 private_id = await _make_repo(db_session, name="secret-beats", visibility="private")
615
616 await _make_commit(
617 db_session, public_id, commit_id="pub001abc", message="jazz groove session"
618 )
619 await _make_commit(
620 db_session, private_id, commit_id="priv001abc", message="jazz private session"
621 )
622
623 response = await client.get(
624 "/api/v1/musehub/search?q=jazz",
625 headers=auth_headers,
626 )
627 assert response.status_code == 200
628 data = response.json()
629 repo_ids_in_results = {g["repoId"] for g in data["groups"]}
630 assert public_id in repo_ids_in_results
631 assert private_id not in repo_ids_in_results
632
633
634 @pytest.mark.anyio
635 async def test_global_search_results_grouped(
636 client: AsyncClient,
637 db_session: AsyncSession,
638 auth_headers: dict[str, str],
639 ) -> None:
640 """Results are grouped by repo — each group has repoId, repoName, matches list."""
641 repo_a = await _make_repo(db_session, name="repo-alpha", visibility="public")
642 repo_b = await _make_repo(db_session, name="repo-beta", visibility="public")
643
644 await _make_commit(
645 db_session, repo_a, commit_id="a001abc123", message="bossa nova rhythm"
646 )
647 await _make_commit(
648 db_session, repo_a, commit_id="a002abc123", message="bossa nova variation"
649 )
650 await _make_commit(
651 db_session, repo_b, commit_id="b001abc123", message="bossa nova groove"
652 )
653
654 response = await client.get(
655 "/api/v1/musehub/search?q=bossa+nova",
656 headers=auth_headers,
657 )
658 assert response.status_code == 200
659 data = response.json()
660 groups = data["groups"]
661
662 group_repo_ids = {g["repoId"] for g in groups}
663 assert repo_a in group_repo_ids
664 assert repo_b in group_repo_ids
665
666 for group in groups:
667 assert "repoId" in group
668 assert "repoName" in group
669 assert "repoOwner" in group
670 assert "repoSlug" in group # PR #282: slug required for UI link construction
671 assert "repoVisibility" in group
672 assert "matches" in group
673 assert "totalMatches" in group
674 assert isinstance(group["matches"], list)
675 assert isinstance(group["repoSlug"], str)
676 assert group["repoSlug"] != ""
677
678 group_a = next(g for g in groups if g["repoId"] == repo_a)
679 assert group_a["totalMatches"] == 2
680 assert len(group_a["matches"]) == 2
681
682
683 @pytest.mark.anyio
684 async def test_global_search_empty_query_handled(
685 client: AsyncClient,
686 db_session: AsyncSession,
687 auth_headers: dict[str, str],
688 ) -> None:
689 """A query that matches nothing returns empty groups and valid pagination metadata."""
690 await _make_repo(db_session, name="silent-repo", visibility="public")
691
692 response = await client.get(
693 "/api/v1/musehub/search?q=zyxqwvutsr_no_match",
694 headers=auth_headers,
695 )
696 assert response.status_code == 200
697 data = response.json()
698 assert data["groups"] == []
699 assert data["page"] == 1
700 assert "totalReposSearched" in data
701
702
703 @pytest.mark.anyio
704 async def test_global_search_keyword_mode(
705 client: AsyncClient,
706 db_session: AsyncSession,
707 auth_headers: dict[str, str],
708 ) -> None:
709 """Keyword mode matches any term in the query (OR logic, case-insensitive)."""
710 repo_id = await _make_repo(db_session, name="jazz-lab", visibility="public")
711 await _make_commit(
712 db_session, repo_id, commit_id="kw001abcde", message="Blues Shuffle in E"
713 )
714 await _make_commit(
715 db_session, repo_id, commit_id="kw002abcde", message="Jazz Waltz Trio"
716 )
717
718 response = await client.get(
719 "/api/v1/musehub/search?q=blues&mode=keyword",
720 headers=auth_headers,
721 )
722 assert response.status_code == 200
723 data = response.json()
724 group = next((g for g in data["groups"] if g["repoId"] == repo_id), None)
725 assert group is not None
726 messages = [m["message"] for m in group["matches"]]
727 assert any("Blues" in msg for msg in messages)
728
729
730 @pytest.mark.anyio
731 async def test_global_search_pattern_mode(
732 client: AsyncClient,
733 db_session: AsyncSession,
734 auth_headers: dict[str, str],
735 ) -> None:
736 """Pattern mode applies a raw SQL LIKE pattern to commit messages."""
737 repo_id = await _make_repo(db_session, name="pattern-lab", visibility="public")
738 await _make_commit(
739 db_session, repo_id, commit_id="pt001abcde", message="minor pentatonic run"
740 )
741 await _make_commit(
742 db_session, repo_id, commit_id="pt002abcde", message="major scale exercise"
743 )
744
745 response = await client.get(
746 "/api/v1/musehub/search?q=%25minor%25&mode=pattern",
747 headers=auth_headers,
748 )
749 assert response.status_code == 200
750 data = response.json()
751 group = next((g for g in data["groups"] if g["repoId"] == repo_id), None)
752 assert group is not None
753 assert group["totalMatches"] == 1
754 assert "minor" in group["matches"][0]["message"]
755
756
757 @pytest.mark.anyio
758 async def test_global_search_pagination(
759 client: AsyncClient,
760 db_session: AsyncSession,
761 auth_headers: dict[str, str],
762 ) -> None:
763 """page and page_size parameters control repo-group pagination."""
764 ids = []
765 for i in range(3):
766 rid = await _make_repo(
767 db_session, name=f"paged-repo-{i}", visibility="public", owner=f"owner-{i}"
768 )
769 ids.append(rid)
770 await _make_commit(
771 db_session, rid, commit_id=f"pg{i:03d}abcde", message="paginate funk groove"
772 )
773
774 response = await client.get(
775 "/api/v1/musehub/search?q=paginate&page=1&page_size=2",
776 headers=auth_headers,
777 )
778 assert response.status_code == 200
779 data = response.json()
780 assert len(data["groups"]) <= 2
781 assert data["page"] == 1
782 assert data["pageSize"] == 2
783
784 response2 = await client.get(
785 "/api/v1/musehub/search?q=paginate&page=2&page_size=2",
786 headers=auth_headers,
787 )
788 assert response2.status_code == 200
789 data2 = response2.json()
790 assert data2["page"] == 2
791
792
793 @pytest.mark.anyio
794 async def test_global_search_match_contains_required_fields(
795 client: AsyncClient,
796 db_session: AsyncSession,
797 auth_headers: dict[str, str],
798 ) -> None:
799 """Each match entry contains commitId, message, author, branch, timestamp, repoId."""
800 repo_id = await _make_repo(db_session, name="fields-check", visibility="public")
801 await _make_commit(
802 db_session,
803 repo_id,
804 commit_id="fc001abcde",
805 message="swing feel experiment",
806 author="charlie",
807 branch="main",
808 )
809
810 response = await client.get(
811 "/api/v1/musehub/search?q=swing",
812 headers=auth_headers,
813 )
814 assert response.status_code == 200
815 data = response.json()
816 group = next((g for g in data["groups"] if g["repoId"] == repo_id), None)
817 assert group is not None
818 match = group["matches"][0]
819 assert match["commitId"] == "fc001abcde"
820 assert match["message"] == "swing feel experiment"
821 assert match["author"] == "charlie"
822 assert match["branch"] == "main"
823 assert "timestamp" in match
824 assert match["repoId"] == repo_id
825
826
827 # ---------------------------------------------------------------------------
828 # Global search — audio preview batching
829 # ---------------------------------------------------------------------------
830
831
832 @pytest.mark.anyio
833 async def test_global_search_audio_preview_populated_for_multiple_repos(
834 client: AsyncClient,
835 db_session: AsyncSession,
836 auth_headers: dict[str, str],
837 ) -> None:
838 """Audio preview object IDs are resolved via a single batched query for all repos.
839
840 Verifies that when N repos all have audio files, each GlobalSearchRepoGroup
841 contains the correct audioObjectId — confirming the batched path works
842 end-to-end and produces the same result as the old N+1 per-repo loop.
843
844 Regression test for the N+1 bug fixed.
845 """
846 repo_a = await _make_repo(db_session, name="audio-repo-alpha", visibility="public")
847 repo_b = await _make_repo(db_session, name="audio-repo-beta", visibility="public")
848
849 await _make_commit(
850 db_session, repo_a, commit_id="ap001abcde", message="funky groove jam"
851 )
852 await _make_commit(
853 db_session, repo_b, commit_id="ap002abcde", message="funky bass session"
854 )
855
856 obj_a = MusehubObject(
857 object_id="sha256:audio-preview-alpha",
858 repo_id=repo_a,
859 path="preview.mp3",
860 size_bytes=1024,
861 disk_path="/tmp/preview-alpha.mp3",
862 )
863 obj_b = MusehubObject(
864 object_id="sha256:audio-preview-beta",
865 repo_id=repo_b,
866 path="preview.ogg",
867 size_bytes=2048,
868 disk_path="/tmp/preview-beta.ogg",
869 )
870 db_session.add(obj_a)
871 db_session.add(obj_b)
872 await db_session.commit()
873
874 response = await client.get(
875 "/api/v1/musehub/search?q=funky",
876 headers=auth_headers,
877 )
878 assert response.status_code == 200
879 data = response.json()
880
881 groups_by_id = {g["repoId"]: g for g in data["groups"]}
882 assert repo_a in groups_by_id
883 assert repo_b in groups_by_id
884
885 assert groups_by_id[repo_a]["matches"][0]["audioObjectId"] == "sha256:audio-preview-alpha"
886 assert groups_by_id[repo_b]["matches"][0]["audioObjectId"] == "sha256:audio-preview-beta"
887
888
889 @pytest.mark.anyio
890 async def test_global_search_audio_preview_absent_when_no_audio_objects(
891 client: AsyncClient,
892 db_session: AsyncSession,
893 auth_headers: dict[str, str],
894 ) -> None:
895 """Repos without audio objects return null audioObjectId in search results."""
896 repo_id = await _make_repo(db_session, name="no-audio-repo", visibility="public")
897 await _make_commit(
898 db_session, repo_id, commit_id="na001abcde", message="silent ambient piece"
899 )
900
901 response = await client.get(
902 "/api/v1/musehub/search?q=silent",
903 headers=auth_headers,
904 )
905 assert response.status_code == 200
906 data = response.json()
907 group = next((g for g in data["groups"] if g["repoId"] == repo_id), None)
908 assert group is not None
909 assert group["matches"][0]["audioObjectId"] is None
910
911
912 # ---------------------------------------------------------------------------
913 # In-repo search — UI page
914 # ---------------------------------------------------------------------------
915
916
917 @pytest.mark.anyio
918 async def test_search_page_renders(
919 client: AsyncClient,
920 db_session: AsyncSession,
921 ) -> None:
922 """GET /musehub/ui/{repo_id}/search returns 200 HTML with mode tabs."""
923 repo_id = await _make_search_repo(db_session)
924 response = await client.get("/musehub/ui/testuser/search-test-repo/search")
925 assert response.status_code == 200
926 assert "text/html" in response.headers["content-type"]
927 body = response.text
928 assert "Muse Hub" in body
929 assert "Search Commits" in body
930 assert "Keyword" in body
931 assert "Natural Language" in body
932 assert "Pattern" in body
933 assert "Musical Properties" in body
934 assert "inp-since" in body
935 assert "inp-until" in body
936
937
938 @pytest.mark.anyio
939 async def test_search_page_no_auth_required(
940 client: AsyncClient,
941 db_session: AsyncSession,
942 ) -> None:
943 """Search UI page is accessible without a JWT (HTML shell, JS handles auth)."""
944 repo_id = await _make_search_repo(db_session)
945 response = await client.get("/musehub/ui/testuser/search-test-repo/search")
946 assert response.status_code == 200
947
948
949 # ---------------------------------------------------------------------------
950 # In-repo search — authentication
951 # ---------------------------------------------------------------------------
952
953
954 @pytest.mark.anyio
955 async def test_search_requires_auth(
956 client: AsyncClient,
957 db_session: AsyncSession,
958 ) -> None:
959 """GET /api/v1/musehub/repos/{repo_id}/search returns 401 without a token."""
960 repo_id = await _make_search_repo(db_session)
961 response = await client.get(f"/api/v1/musehub/repos/{repo_id}/search?mode=keyword&q=jazz")
962 assert response.status_code == 401
963
964
965 @pytest.mark.anyio
966 async def test_search_unknown_repo(
967 client: AsyncClient,
968 db_session: AsyncSession,
969 auth_headers: dict[str, str],
970 ) -> None:
971 """GET /api/v1/musehub/repos/{unknown}/search returns 404."""
972 response = await client.get(
973 "/api/v1/musehub/repos/does-not-exist/search?mode=keyword&q=test",
974 headers=auth_headers,
975 )
976 assert response.status_code == 404
977
978
979 @pytest.mark.anyio
980 async def test_search_invalid_mode(
981 client: AsyncClient,
982 db_session: AsyncSession,
983 auth_headers: dict[str, str],
984 ) -> None:
985 """GET search with an unknown mode returns 422."""
986 repo_id = await _make_search_repo(db_session)
987 response = await client.get(
988 f"/api/v1/musehub/repos/{repo_id}/search?mode=badmode&q=x",
989 headers=auth_headers,
990 )
991 assert response.status_code == 422
992
993
994 # ---------------------------------------------------------------------------
995 # In-repo search — keyword mode
996 # ---------------------------------------------------------------------------
997
998
999 @pytest.mark.anyio
1000 async def test_search_keyword_mode(
1001 client: AsyncClient,
1002 db_session: AsyncSession,
1003 auth_headers: dict[str, str],
1004 ) -> None:
1005 """Keyword search returns commits whose messages overlap with the query."""
1006 repo_id = await _make_search_repo(db_session)
1007 await db_session.commit()
1008
1009 await _make_search_commit(db_session, repo_id=repo_id, message="dark jazz bassline in Dm")
1010 await _make_search_commit(db_session, repo_id=repo_id, message="classical piano intro section")
1011 await _make_search_commit(db_session, repo_id=repo_id, message="hip hop drum fill pattern")
1012 await db_session.commit()
1013
1014 response = await client.get(
1015 f"/api/v1/musehub/repos/{repo_id}/search?mode=keyword&q=jazz+bassline",
1016 headers=auth_headers,
1017 )
1018 assert response.status_code == 200
1019 data = response.json()
1020 assert data["mode"] == "keyword"
1021 assert data["query"] == "jazz bassline"
1022 assert any("jazz" in m["message"].lower() for m in data["matches"])
1023
1024
1025 @pytest.mark.anyio
1026 async def test_search_keyword_empty_query(
1027 client: AsyncClient,
1028 db_session: AsyncSession,
1029 auth_headers: dict[str, str],
1030 ) -> None:
1031 """Empty keyword query returns empty matches (no tokens → no overlap)."""
1032 repo_id = await _make_search_repo(db_session)
1033 await db_session.commit()
1034 await _make_search_commit(db_session, repo_id=repo_id, message="some commit")
1035 await db_session.commit()
1036
1037 response = await client.get(
1038 f"/api/v1/musehub/repos/{repo_id}/search?mode=keyword&q=",
1039 headers=auth_headers,
1040 )
1041 assert response.status_code == 200
1042 data = response.json()
1043 assert data["mode"] == "keyword"
1044 assert data["matches"] == []
1045
1046
1047 @pytest.mark.anyio
1048 async def test_search_json_response(
1049 client: AsyncClient,
1050 db_session: AsyncSession,
1051 auth_headers: dict[str, str],
1052 ) -> None:
1053 """Search response has the expected SearchResponse JSON shape."""
1054 repo_id = await _make_search_repo(db_session)
1055 await db_session.commit()
1056 await _make_search_commit(db_session, repo_id=repo_id, message="piano chord progression F Bb Eb")
1057 await db_session.commit()
1058
1059 response = await client.get(
1060 f"/api/v1/musehub/repos/{repo_id}/search?mode=keyword&q=piano",
1061 headers=auth_headers,
1062 )
1063 assert response.status_code == 200
1064 data = response.json()
1065
1066 assert "mode" in data
1067 assert "query" in data
1068 assert "matches" in data
1069 assert "totalScanned" in data
1070 assert "limit" in data
1071
1072 if data["matches"]:
1073 m = data["matches"][0]
1074 assert "commitId" in m
1075 assert "branch" in m
1076 assert "message" in m
1077 assert "author" in m
1078 assert "timestamp" in m
1079 assert "score" in m
1080 assert "matchSource" in m
1081
1082
1083 # ---------------------------------------------------------------------------
1084 # In-repo search — musical property mode
1085 # ---------------------------------------------------------------------------
1086
1087
1088 @pytest.mark.anyio
1089 async def test_search_musical_property(
1090 client: AsyncClient,
1091 db_session: AsyncSession,
1092 auth_headers: dict[str, str],
1093 ) -> None:
1094 """Property mode filters commits containing the harmony string."""
1095 repo_id = await _make_search_repo(db_session)
1096 await db_session.commit()
1097
1098 await _make_search_commit(db_session, repo_id=repo_id, message="add harmony=Eb bridge section")
1099 await _make_search_commit(db_session, repo_id=repo_id, message="drum groove tweak no harmony")
1100 await db_session.commit()
1101
1102 response = await client.get(
1103 f"/api/v1/musehub/repos/{repo_id}/search?mode=property&harmony=Eb",
1104 headers=auth_headers,
1105 )
1106 assert response.status_code == 200
1107 data = response.json()
1108 assert data["mode"] == "property"
1109 assert len(data["matches"]) >= 1
1110 assert all("Eb" in m["message"] for m in data["matches"])
1111
1112
1113 # ---------------------------------------------------------------------------
1114 # In-repo search — natural language (ask) mode
1115 # ---------------------------------------------------------------------------
1116
1117
1118 @pytest.mark.anyio
1119 async def test_search_natural_language(
1120 client: AsyncClient,
1121 db_session: AsyncSession,
1122 auth_headers: dict[str, str],
1123 ) -> None:
1124 """Ask mode extracts keywords and returns relevant commits."""
1125 repo_id = await _make_search_repo(db_session)
1126 await db_session.commit()
1127
1128 await _make_search_commit(db_session, repo_id=repo_id, message="switched tempo to 140bpm for drop")
1129 await _make_search_commit(db_session, repo_id=repo_id, message="piano melody in minor key")
1130 await db_session.commit()
1131
1132 response = await client.get(
1133 f"/api/v1/musehub/repos/{repo_id}/search?mode=ask&q=what+tempo+changes+did+I+make",
1134 headers=auth_headers,
1135 )
1136 assert response.status_code == 200
1137 data = response.json()
1138 assert data["mode"] == "ask"
1139 assert any("tempo" in m["message"].lower() for m in data["matches"])
1140
1141
1142 # ---------------------------------------------------------------------------
1143 # In-repo search — pattern mode
1144 # ---------------------------------------------------------------------------
1145
1146
1147 @pytest.mark.anyio
1148 async def test_search_pattern_message(
1149 client: AsyncClient,
1150 db_session: AsyncSession,
1151 auth_headers: dict[str, str],
1152 ) -> None:
1153 """Pattern mode matches substring in commit message."""
1154 repo_id = await _make_search_repo(db_session)
1155 await db_session.commit()
1156
1157 await _make_search_commit(db_session, repo_id=repo_id, message="add Cm7 chord voicing in bridge")
1158 await _make_search_commit(db_session, repo_id=repo_id, message="fix timing on verse drums")
1159 await db_session.commit()
1160
1161 response = await client.get(
1162 f"/api/v1/musehub/repos/{repo_id}/search?mode=pattern&q=Cm7",
1163 headers=auth_headers,
1164 )
1165 assert response.status_code == 200
1166 data = response.json()
1167 assert data["mode"] == "pattern"
1168 assert len(data["matches"]) == 1
1169 assert "Cm7" in data["matches"][0]["message"]
1170 assert data["matches"][0]["matchSource"] == "message"
1171
1172
1173 @pytest.mark.anyio
1174 async def test_search_pattern_branch(
1175 client: AsyncClient,
1176 db_session: AsyncSession,
1177 auth_headers: dict[str, str],
1178 ) -> None:
1179 """Pattern mode matches substring in branch name when message doesn't match."""
1180 repo_id = await _make_search_repo(db_session)
1181 await db_session.commit()
1182
1183 await _make_search_commit(
1184 db_session,
1185 repo_id=repo_id,
1186 message="rough cut",
1187 branch="feature/hip-hop-session",
1188 )
1189 await db_session.commit()
1190
1191 response = await client.get(
1192 f"/api/v1/musehub/repos/{repo_id}/search?mode=pattern&q=hip-hop",
1193 headers=auth_headers,
1194 )
1195 assert response.status_code == 200
1196 data = response.json()
1197 assert data["mode"] == "pattern"
1198 assert len(data["matches"]) == 1
1199 assert data["matches"][0]["matchSource"] == "branch"
1200
1201
1202 # ---------------------------------------------------------------------------
1203 # In-repo search — date range filters
1204 # ---------------------------------------------------------------------------
1205
1206
1207 @pytest.mark.anyio
1208 async def test_search_date_range_since(
1209 client: AsyncClient,
1210 db_session: AsyncSession,
1211 auth_headers: dict[str, str],
1212 ) -> None:
1213 """since filter excludes commits committed before the given datetime."""
1214 repo_id = await _make_search_repo(db_session)
1215 await db_session.commit()
1216
1217 old_ts = datetime(2024, 1, 1, tzinfo=timezone.utc)
1218 new_ts = datetime(2026, 1, 1, tzinfo=timezone.utc)
1219
1220 await _make_search_commit(db_session, repo_id=repo_id, message="old jazz commit", committed_at=old_ts)
1221 await _make_search_commit(db_session, repo_id=repo_id, message="new jazz commit", committed_at=new_ts)
1222 await db_session.commit()
1223
1224 response = await client.get(
1225 f"/api/v1/musehub/repos/{repo_id}/search?mode=keyword&q=jazz&since=2025-06-01T00:00:00Z",
1226 headers=auth_headers,
1227 )
1228 assert response.status_code == 200
1229 data = response.json()
1230 assert all(m["message"] != "old jazz commit" for m in data["matches"])
1231 assert any(m["message"] == "new jazz commit" for m in data["matches"])
1232
1233
1234 @pytest.mark.anyio
1235 async def test_search_date_range_until(
1236 client: AsyncClient,
1237 db_session: AsyncSession,
1238 auth_headers: dict[str, str],
1239 ) -> None:
1240 """until filter excludes commits committed after the given datetime."""
1241 repo_id = await _make_search_repo(db_session)
1242 await db_session.commit()
1243
1244 old_ts = datetime(2024, 1, 1, tzinfo=timezone.utc)
1245 new_ts = datetime(2026, 1, 1, tzinfo=timezone.utc)
1246
1247 await _make_search_commit(db_session, repo_id=repo_id, message="old piano commit", committed_at=old_ts)
1248 await _make_search_commit(db_session, repo_id=repo_id, message="new piano commit", committed_at=new_ts)
1249 await db_session.commit()
1250
1251 response = await client.get(
1252 f"/api/v1/musehub/repos/{repo_id}/search?mode=keyword&q=piano&until=2025-06-01T00:00:00Z",
1253 headers=auth_headers,
1254 )
1255 assert response.status_code == 200
1256 data = response.json()
1257 assert any(m["message"] == "old piano commit" for m in data["matches"])
1258 assert all(m["message"] != "new piano commit" for m in data["matches"])
1259
1260
1261 # ---------------------------------------------------------------------------
1262 # In-repo search — limit
1263 # ---------------------------------------------------------------------------
1264
1265
1266 @pytest.mark.anyio
1267 async def test_search_limit_respected(
1268 client: AsyncClient,
1269 db_session: AsyncSession,
1270 auth_headers: dict[str, str],
1271 ) -> None:
1272 """The limit parameter caps the number of results returned."""
1273 repo_id = await _make_search_repo(db_session)
1274 await db_session.commit()
1275
1276 for i in range(10):
1277 await _make_search_commit(db_session, repo_id=repo_id, message=f"bass groove iteration {i}")
1278 await db_session.commit()
1279
1280 response = await client.get(
1281 f"/api/v1/musehub/repos/{repo_id}/search?mode=keyword&q=bass&limit=3",
1282 headers=auth_headers,
1283 )
1284 assert response.status_code == 200
1285 data = response.json()
1286 assert len(data["matches"]) <= 3
1287 assert data["limit"] == 3