gabriel / musehub public
musehub_discover.py python
276 lines 11.0 KB
04faf0e3 feat: supercharge all repo pages, enforce separation of concerns Gabriel Cardona <cgcardona@gmail.com> 5d ago
1 """MuseHub discover/explore service — public repo discovery with filtering and sorting.
2
3 This module is the ONLY place that executes the discover query. Route handlers
4 delegate here; no filtering or sorting logic lives in routes.
5
6 Boundary rules:
7 - Must NOT import state stores, SSE queues, or LLM clients.
8 - Must NOT import musehub.core.* modules.
9 - May import ORM models from musehub.db.musehub_models.
10 - May import Pydantic response models from musehub.models.musehub.
11
12 Sort semantics:
13 "stars" — repos with the most stars first (trending signal)
14 "activity" — repos with the most recent commit first
15 "commits" — repos with the highest total commit count first
16 "created" — newest repos first (default for explore page)
17
18 Tag filtering uses a contains check on the JSON ``tags`` column. For portability
19 across Postgres and SQLite (tests), the check is done server-side via a
20 ``cast(tags, Text).ilike`` pattern rather than JSON containment operators, which
21 differ between engines and are not needed at this scale.
22 """
23 from __future__ import annotations
24
25 import logging
26 from typing import Literal
27
28 from sqlalchemy import Text, desc, func, or_, outerjoin, select
29 from sqlalchemy.ext.asyncio import AsyncSession
30
31 from musehub.db import musehub_models as db
32 from musehub.db import muse_cli_models as cli_db
33 from musehub.models.musehub import (
34 ExploreRepoResult,
35 ExploreResponse,
36 StarResponse,
37 )
38
39 logger = logging.getLogger(__name__)
40
41 SortField = Literal["stars", "activity", "commits", "created", "trending"]
42
43 _PAGE_SIZE_MAX = 100
44
45
46 async def list_public_repos(
47 session: AsyncSession,
48 *,
49 genre: str | None = None,
50 key: str | None = None,
51 tempo_min: int | None = None,
52 tempo_max: int | None = None,
53 instrumentation: str | None = None,
54 langs: list[str] | None = None,
55 topics: list[str] | None = None,
56 license: str | None = None,
57 sort: SortField = "created",
58 page: int = 1,
59 page_size: int = 24,
60 ) -> ExploreResponse:
61 """Return a paginated list of public repos that match the given filters.
62
63 Only repos with ``visibility = 'public'`` are returned. All filter parameters
64 are optional; omitting them returns all public repos in the requested sort order.
65
66 Args:
67 session: Async DB session.
68 genre: Case-insensitive substring match against the repo's ``tags`` JSON.
69 Matches repos where any tag contains this string (e.g. "jazz").
70 key: Exact case-insensitive match against ``key_signature`` (e.g. "F# minor").
71 tempo_min: Include only repos with ``tempo_bpm >= tempo_min``.
72 tempo_max: Include only repos with ``tempo_bpm <= tempo_max``.
73 instrumentation: Case-insensitive substring match against tags — used to
74 filter by instrument presence (e.g. "bass", "drums").
75 langs: Multi-select language/instrument chips — repo must have at least one
76 matching tag in the muse_tags table (OR across selections).
77 topics: Multi-select topic chips — repo.tags JSON must contain at least one
78 of the selected values (OR across selections).
79 license: Exact match against ``settings['license']`` (e.g. "CC BY").
80 sort: One of "stars", "activity", "commits", "created".
81 page: 1-based page number.
82 page_size: Number of results per page (clamped to _PAGE_SIZE_MAX).
83
84 Returns:
85 ExploreResponse with repo cards and pagination metadata.
86 """
87 page_size = min(page_size, _PAGE_SIZE_MAX)
88 offset = (max(page, 1) - 1) * page_size
89
90 # Aggregated sub-expressions ─────────────────────────────────────────────
91 star_count_col = func.count(db.MusehubStar.star_id).label("star_count")
92 commit_count_col = func.count(db.MusehubCommit.commit_id).label("commit_count")
93 latest_commit_col = func.max(db.MusehubCommit.timestamp).label("latest_commit")
94
95 # Build the base aggregated query over public repos.
96 # Left-join stars and commits so repos with zero stars/commits are included.
97 base_q = (
98 select(
99 db.MusehubRepo,
100 star_count_col,
101 commit_count_col,
102 latest_commit_col,
103 )
104 .select_from(
105 outerjoin(
106 outerjoin(
107 db.MusehubRepo,
108 db.MusehubStar,
109 db.MusehubRepo.repo_id == db.MusehubStar.repo_id,
110 ),
111 db.MusehubCommit,
112 db.MusehubRepo.repo_id == db.MusehubCommit.repo_id,
113 )
114 )
115 .where(db.MusehubRepo.visibility == "public")
116 .group_by(db.MusehubRepo.repo_id)
117 )
118
119 # Apply filters ──────────────────────────────────────────────────────────
120 if genre:
121 # Match repos where any tag contains the genre string (case-insensitive).
122 # We cast the JSON column to text and use ILIKE for cross-engine compat.
123 base_q = base_q.where(
124 func.cast(db.MusehubRepo.tags, Text).ilike(f"%{genre.lower()}%")
125 )
126 if instrumentation:
127 base_q = base_q.where(
128 func.cast(db.MusehubRepo.tags, Text).ilike(f"%{instrumentation.lower()}%")
129 )
130 if key:
131 base_q = base_q.where(
132 func.lower(db.MusehubRepo.key_signature) == key.lower()
133 )
134 if tempo_min is not None:
135 base_q = base_q.where(db.MusehubRepo.tempo_bpm >= tempo_min)
136 if tempo_max is not None:
137 base_q = base_q.where(db.MusehubRepo.tempo_bpm <= tempo_max)
138
139 # Multi-select language/instrument chips — filter by musehub_repos.tags JSON (OR across values).
140 # Tags may be prefixed (emotion:melancholic) or bare (jazz); ilike with the raw value
141 # matches both since "melancholic" is a substring of "emotion:melancholic".
142 if langs:
143 lang_conditions = [
144 func.cast(db.MusehubRepo.tags, Text).ilike(f"%{v.lower()}%")
145 for v in langs
146 ]
147 base_q = base_q.where(or_(*lang_conditions))
148
149 # Multi-select topic chips — filter on repo.tags JSON (OR across values).
150 if topics:
151 topic_conditions = [
152 func.cast(db.MusehubRepo.tags, Text).ilike(f"%{t.lower()}%")
153 for t in topics
154 ]
155 base_q = base_q.where(or_(*topic_conditions))
156
157 # License filter — matches settings['license'] key in the repo settings JSON.
158 if license:
159 base_q = base_q.where(
160 func.cast(db.MusehubRepo.settings, Text).ilike(f"%{license}%")
161 )
162
163 # Count total results before pagination ──────────────────────────────────
164 count_q = select(func.count()).select_from(base_q.subquery())
165 total: int = (await session.execute(count_q)).scalar_one()
166
167 # Apply sort ─────────────────────────────────────────────────────────────
168 if sort == "stars":
169 base_q = base_q.order_by(desc("star_count"), desc(db.MusehubRepo.created_at))
170 elif sort == "activity":
171 base_q = base_q.order_by(desc("latest_commit"), desc(db.MusehubRepo.created_at))
172 elif sort == "commits":
173 base_q = base_q.order_by(desc("commit_count"), desc(db.MusehubRepo.created_at))
174 elif sort == "trending":
175 # Composite score: stars carry more weight than raw commit volume,
176 # so a repo with 10 stars + 5 commits outranks one with 0 stars + 100 commits.
177 # star_count_col and commit_count_col are already in the grouped SELECT.
178 base_q = base_q.order_by(
179 desc(star_count_col * 3 + commit_count_col),
180 desc("latest_commit"),
181 desc(db.MusehubRepo.created_at),
182 )
183 else: # "created"
184 base_q = base_q.order_by(desc(db.MusehubRepo.created_at))
185
186 rows = (await session.execute(base_q.offset(offset).limit(page_size))).all()
187
188 results = [
189 ExploreRepoResult(
190 repo_id=row.MusehubRepo.repo_id,
191 name=row.MusehubRepo.name,
192 owner=row.MusehubRepo.owner,
193 slug=row.MusehubRepo.slug,
194 owner_user_id=row.MusehubRepo.owner_user_id,
195 description=row.MusehubRepo.description,
196 tags=list(row.MusehubRepo.tags or []),
197 key_signature=row.MusehubRepo.key_signature,
198 tempo_bpm=row.MusehubRepo.tempo_bpm,
199 star_count=row.star_count or 0,
200 commit_count=row.commit_count or 0,
201 created_at=row.MusehubRepo.created_at,
202 )
203 for row in rows
204 ]
205
206 logger.debug("✅ Explore query: %d/%d repos (page %d, sort=%s)", len(results), total, page, sort)
207 return ExploreResponse(repos=results, total=total, page=page, page_size=page_size)
208
209
210 async def star_repo(session: AsyncSession, repo_id: str, user_id: str) -> StarResponse:
211 """Add a star for user_id on repo_id. Idempotent — duplicate stars are silently ignored.
212
213 Returns StarResponse with the new total star count and ``starred=True``.
214 Raises ValueError if the repo does not exist or is not public.
215 """
216 repo = await session.get(db.MusehubRepo, repo_id)
217 if repo is None:
218 raise ValueError(f"Repo {repo_id!r} not found")
219 if repo.visibility != "public":
220 raise ValueError(f"Repo {repo_id!r} is not public")
221
222 # Check for existing star to make the operation idempotent.
223 existing = (
224 await session.execute(
225 select(db.MusehubStar).where(
226 db.MusehubStar.repo_id == repo_id,
227 db.MusehubStar.user_id == user_id,
228 )
229 )
230 ).scalars().first()
231
232 if existing is None:
233 star = db.MusehubStar(repo_id=repo_id, user_id=user_id)
234 session.add(star)
235 await session.flush()
236 logger.info("✅ User %s starred repo %s", user_id, repo_id)
237
238 count: int = (
239 await session.execute(
240 select(func.count(db.MusehubStar.star_id)).where(
241 db.MusehubStar.repo_id == repo_id
242 )
243 )
244 ).scalar_one()
245
246 return StarResponse(starred=True, star_count=count)
247
248
249 async def unstar_repo(session: AsyncSession, repo_id: str, user_id: str) -> StarResponse:
250 """Remove a star for user_id on repo_id. Idempotent — no-op if not starred.
251
252 Returns StarResponse with the new total star count and ``starred=False``.
253 """
254 existing = (
255 await session.execute(
256 select(db.MusehubStar).where(
257 db.MusehubStar.repo_id == repo_id,
258 db.MusehubStar.user_id == user_id,
259 )
260 )
261 ).scalars().first()
262
263 if existing is not None:
264 await session.delete(existing)
265 await session.flush()
266 logger.info("✅ User %s unstarred repo %s", user_id, repo_id)
267
268 count: int = (
269 await session.execute(
270 select(func.count(db.MusehubStar.star_id)).where(
271 db.MusehubStar.repo_id == repo_id
272 )
273 )
274 ).scalar_one()
275
276 return StarResponse(starred=False, star_count=count)