musehub_discover.py
python
| 1 | """MuseHub discover/explore service — public repo discovery with filtering and sorting. |
| 2 | |
| 3 | This module is the ONLY place that executes the discover query. Route handlers |
| 4 | delegate here; no filtering or sorting logic lives in routes. |
| 5 | |
| 6 | Boundary rules: |
| 7 | - Must NOT import state stores, SSE queues, or LLM clients. |
| 8 | - Must NOT import musehub.core.* modules. |
| 9 | - May import ORM models from musehub.db.musehub_models. |
| 10 | - May import Pydantic response models from musehub.models.musehub. |
| 11 | |
| 12 | Sort semantics: |
| 13 | "stars" — repos with the most stars first (trending signal) |
| 14 | "activity" — repos with the most recent commit first |
| 15 | "commits" — repos with the highest total commit count first |
| 16 | "created" — newest repos first (default for explore page) |
| 17 | |
| 18 | Tag filtering uses a contains check on the JSON ``tags`` column. For portability |
| 19 | across Postgres and SQLite (tests), the check is done server-side via a |
| 20 | ``cast(tags, Text).ilike`` pattern rather than JSON containment operators, which |
| 21 | differ between engines and are not needed at this scale. |
| 22 | """ |
| 23 | from __future__ import annotations |
| 24 | |
| 25 | import logging |
| 26 | from typing import Literal |
| 27 | |
| 28 | from sqlalchemy import Text, desc, func, or_, outerjoin, select |
| 29 | from sqlalchemy.ext.asyncio import AsyncSession |
| 30 | |
| 31 | from musehub.db import musehub_models as db |
| 32 | from musehub.db import muse_cli_models as cli_db |
| 33 | from musehub.models.musehub import ( |
| 34 | ExploreRepoResult, |
| 35 | ExploreResponse, |
| 36 | StarResponse, |
| 37 | ) |
| 38 | |
| 39 | logger = logging.getLogger(__name__) |
| 40 | |
| 41 | SortField = Literal["stars", "activity", "commits", "created", "trending"] |
| 42 | |
| 43 | _PAGE_SIZE_MAX = 100 |
| 44 | |
| 45 | |
| 46 | async def list_public_repos( |
| 47 | session: AsyncSession, |
| 48 | *, |
| 49 | genre: str | None = None, |
| 50 | key: str | None = None, |
| 51 | tempo_min: int | None = None, |
| 52 | tempo_max: int | None = None, |
| 53 | instrumentation: str | None = None, |
| 54 | langs: list[str] | None = None, |
| 55 | topics: list[str] | None = None, |
| 56 | license: str | None = None, |
| 57 | sort: SortField = "created", |
| 58 | page: int = 1, |
| 59 | page_size: int = 24, |
| 60 | ) -> ExploreResponse: |
| 61 | """Return a paginated list of public repos that match the given filters. |
| 62 | |
| 63 | Only repos with ``visibility = 'public'`` are returned. All filter parameters |
| 64 | are optional; omitting them returns all public repos in the requested sort order. |
| 65 | |
| 66 | Args: |
| 67 | session: Async DB session. |
| 68 | genre: Case-insensitive substring match against the repo's ``tags`` JSON. |
| 69 | Matches repos where any tag contains this string (e.g. "jazz"). |
| 70 | key: Exact case-insensitive match against ``key_signature`` (e.g. "F# minor"). |
| 71 | tempo_min: Include only repos with ``tempo_bpm >= tempo_min``. |
| 72 | tempo_max: Include only repos with ``tempo_bpm <= tempo_max``. |
| 73 | instrumentation: Case-insensitive substring match against tags — used to |
| 74 | filter by instrument presence (e.g. "bass", "drums"). |
| 75 | langs: Multi-select language/instrument chips — repo must have at least one |
| 76 | matching tag in the muse_tags table (OR across selections). |
| 77 | topics: Multi-select topic chips — repo.tags JSON must contain at least one |
| 78 | of the selected values (OR across selections). |
| 79 | license: Exact match against ``settings['license']`` (e.g. "CC BY"). |
| 80 | sort: One of "stars", "activity", "commits", "created". |
| 81 | page: 1-based page number. |
| 82 | page_size: Number of results per page (clamped to _PAGE_SIZE_MAX). |
| 83 | |
| 84 | Returns: |
| 85 | ExploreResponse with repo cards and pagination metadata. |
| 86 | """ |
| 87 | page_size = min(page_size, _PAGE_SIZE_MAX) |
| 88 | offset = (max(page, 1) - 1) * page_size |
| 89 | |
| 90 | # Aggregated sub-expressions ───────────────────────────────────────────── |
| 91 | star_count_col = func.count(db.MusehubStar.star_id).label("star_count") |
| 92 | commit_count_col = func.count(db.MusehubCommit.commit_id).label("commit_count") |
| 93 | latest_commit_col = func.max(db.MusehubCommit.timestamp).label("latest_commit") |
| 94 | |
| 95 | # Build the base aggregated query over public repos. |
| 96 | # Left-join stars and commits so repos with zero stars/commits are included. |
| 97 | base_q = ( |
| 98 | select( |
| 99 | db.MusehubRepo, |
| 100 | star_count_col, |
| 101 | commit_count_col, |
| 102 | latest_commit_col, |
| 103 | ) |
| 104 | .select_from( |
| 105 | outerjoin( |
| 106 | outerjoin( |
| 107 | db.MusehubRepo, |
| 108 | db.MusehubStar, |
| 109 | db.MusehubRepo.repo_id == db.MusehubStar.repo_id, |
| 110 | ), |
| 111 | db.MusehubCommit, |
| 112 | db.MusehubRepo.repo_id == db.MusehubCommit.repo_id, |
| 113 | ) |
| 114 | ) |
| 115 | .where(db.MusehubRepo.visibility == "public") |
| 116 | .group_by(db.MusehubRepo.repo_id) |
| 117 | ) |
| 118 | |
| 119 | # Apply filters ────────────────────────────────────────────────────────── |
| 120 | if genre: |
| 121 | # Match repos where any tag contains the genre string (case-insensitive). |
| 122 | # We cast the JSON column to text and use ILIKE for cross-engine compat. |
| 123 | base_q = base_q.where( |
| 124 | func.cast(db.MusehubRepo.tags, Text).ilike(f"%{genre.lower()}%") |
| 125 | ) |
| 126 | if instrumentation: |
| 127 | base_q = base_q.where( |
| 128 | func.cast(db.MusehubRepo.tags, Text).ilike(f"%{instrumentation.lower()}%") |
| 129 | ) |
| 130 | if key: |
| 131 | base_q = base_q.where( |
| 132 | func.lower(db.MusehubRepo.key_signature) == key.lower() |
| 133 | ) |
| 134 | if tempo_min is not None: |
| 135 | base_q = base_q.where(db.MusehubRepo.tempo_bpm >= tempo_min) |
| 136 | if tempo_max is not None: |
| 137 | base_q = base_q.where(db.MusehubRepo.tempo_bpm <= tempo_max) |
| 138 | |
| 139 | # Multi-select language/instrument chips — filter by musehub_repos.tags JSON (OR across values). |
| 140 | # Tags may be prefixed (emotion:melancholic) or bare (jazz); ilike with the raw value |
| 141 | # matches both since "melancholic" is a substring of "emotion:melancholic". |
| 142 | if langs: |
| 143 | lang_conditions = [ |
| 144 | func.cast(db.MusehubRepo.tags, Text).ilike(f"%{v.lower()}%") |
| 145 | for v in langs |
| 146 | ] |
| 147 | base_q = base_q.where(or_(*lang_conditions)) |
| 148 | |
| 149 | # Multi-select topic chips — filter on repo.tags JSON (OR across values). |
| 150 | if topics: |
| 151 | topic_conditions = [ |
| 152 | func.cast(db.MusehubRepo.tags, Text).ilike(f"%{t.lower()}%") |
| 153 | for t in topics |
| 154 | ] |
| 155 | base_q = base_q.where(or_(*topic_conditions)) |
| 156 | |
| 157 | # License filter — matches settings['license'] key in the repo settings JSON. |
| 158 | if license: |
| 159 | base_q = base_q.where( |
| 160 | func.cast(db.MusehubRepo.settings, Text).ilike(f"%{license}%") |
| 161 | ) |
| 162 | |
| 163 | # Count total results before pagination ────────────────────────────────── |
| 164 | count_q = select(func.count()).select_from(base_q.subquery()) |
| 165 | total: int = (await session.execute(count_q)).scalar_one() |
| 166 | |
| 167 | # Apply sort ───────────────────────────────────────────────────────────── |
| 168 | if sort == "stars": |
| 169 | base_q = base_q.order_by(desc("star_count"), desc(db.MusehubRepo.created_at)) |
| 170 | elif sort == "activity": |
| 171 | base_q = base_q.order_by(desc("latest_commit"), desc(db.MusehubRepo.created_at)) |
| 172 | elif sort == "commits": |
| 173 | base_q = base_q.order_by(desc("commit_count"), desc(db.MusehubRepo.created_at)) |
| 174 | elif sort == "trending": |
| 175 | # Composite score: stars carry more weight than raw commit volume, |
| 176 | # so a repo with 10 stars + 5 commits outranks one with 0 stars + 100 commits. |
| 177 | # star_count_col and commit_count_col are already in the grouped SELECT. |
| 178 | base_q = base_q.order_by( |
| 179 | desc(star_count_col * 3 + commit_count_col), |
| 180 | desc("latest_commit"), |
| 181 | desc(db.MusehubRepo.created_at), |
| 182 | ) |
| 183 | else: # "created" |
| 184 | base_q = base_q.order_by(desc(db.MusehubRepo.created_at)) |
| 185 | |
| 186 | rows = (await session.execute(base_q.offset(offset).limit(page_size))).all() |
| 187 | |
| 188 | results = [ |
| 189 | ExploreRepoResult( |
| 190 | repo_id=row.MusehubRepo.repo_id, |
| 191 | name=row.MusehubRepo.name, |
| 192 | owner=row.MusehubRepo.owner, |
| 193 | slug=row.MusehubRepo.slug, |
| 194 | owner_user_id=row.MusehubRepo.owner_user_id, |
| 195 | description=row.MusehubRepo.description, |
| 196 | tags=list(row.MusehubRepo.tags or []), |
| 197 | key_signature=row.MusehubRepo.key_signature, |
| 198 | tempo_bpm=row.MusehubRepo.tempo_bpm, |
| 199 | star_count=row.star_count or 0, |
| 200 | commit_count=row.commit_count or 0, |
| 201 | created_at=row.MusehubRepo.created_at, |
| 202 | ) |
| 203 | for row in rows |
| 204 | ] |
| 205 | |
| 206 | logger.debug("✅ Explore query: %d/%d repos (page %d, sort=%s)", len(results), total, page, sort) |
| 207 | return ExploreResponse(repos=results, total=total, page=page, page_size=page_size) |
| 208 | |
| 209 | |
| 210 | async def star_repo(session: AsyncSession, repo_id: str, user_id: str) -> StarResponse: |
| 211 | """Add a star for user_id on repo_id. Idempotent — duplicate stars are silently ignored. |
| 212 | |
| 213 | Returns StarResponse with the new total star count and ``starred=True``. |
| 214 | Raises ValueError if the repo does not exist or is not public. |
| 215 | """ |
| 216 | repo = await session.get(db.MusehubRepo, repo_id) |
| 217 | if repo is None: |
| 218 | raise ValueError(f"Repo {repo_id!r} not found") |
| 219 | if repo.visibility != "public": |
| 220 | raise ValueError(f"Repo {repo_id!r} is not public") |
| 221 | |
| 222 | # Check for existing star to make the operation idempotent. |
| 223 | existing = ( |
| 224 | await session.execute( |
| 225 | select(db.MusehubStar).where( |
| 226 | db.MusehubStar.repo_id == repo_id, |
| 227 | db.MusehubStar.user_id == user_id, |
| 228 | ) |
| 229 | ) |
| 230 | ).scalars().first() |
| 231 | |
| 232 | if existing is None: |
| 233 | star = db.MusehubStar(repo_id=repo_id, user_id=user_id) |
| 234 | session.add(star) |
| 235 | await session.flush() |
| 236 | logger.info("✅ User %s starred repo %s", user_id, repo_id) |
| 237 | |
| 238 | count: int = ( |
| 239 | await session.execute( |
| 240 | select(func.count(db.MusehubStar.star_id)).where( |
| 241 | db.MusehubStar.repo_id == repo_id |
| 242 | ) |
| 243 | ) |
| 244 | ).scalar_one() |
| 245 | |
| 246 | return StarResponse(starred=True, star_count=count) |
| 247 | |
| 248 | |
| 249 | async def unstar_repo(session: AsyncSession, repo_id: str, user_id: str) -> StarResponse: |
| 250 | """Remove a star for user_id on repo_id. Idempotent — no-op if not starred. |
| 251 | |
| 252 | Returns StarResponse with the new total star count and ``starred=False``. |
| 253 | """ |
| 254 | existing = ( |
| 255 | await session.execute( |
| 256 | select(db.MusehubStar).where( |
| 257 | db.MusehubStar.repo_id == repo_id, |
| 258 | db.MusehubStar.user_id == user_id, |
| 259 | ) |
| 260 | ) |
| 261 | ).scalars().first() |
| 262 | |
| 263 | if existing is not None: |
| 264 | await session.delete(existing) |
| 265 | await session.flush() |
| 266 | logger.info("✅ User %s unstarred repo %s", user_id, repo_id) |
| 267 | |
| 268 | count: int = ( |
| 269 | await session.execute( |
| 270 | select(func.count(db.MusehubStar.star_id)).where( |
| 271 | db.MusehubStar.repo_id == repo_id |
| 272 | ) |
| 273 | ) |
| 274 | ).scalar_one() |
| 275 | |
| 276 | return StarResponse(starred=False, star_count=count) |