gabriel / musehub public
musehub_repository.py python
1833 lines 64.9 KB
7f1d07e8 feat: domains, MCP expansion, MIDI player, and production hardening (#8) Gabriel Cardona <cgcardona@gmail.com> 4d ago
1 """MuseHub persistence adapter — single point of DB access for Hub entities.
2
3 This module is the ONLY place that touches the musehub_* tables.
4 Route handlers delegate here; no business logic lives in routes.
5
6 Boundary rules:
7 - Must NOT import state stores, SSE queues, or LLM clients.
8 - Must NOT import musehub.core.* modules.
9 - May import ORM models from musehub.db.musehub_models.
10 - May import Pydantic response models from musehub.models.musehub.
11 """
12 from datetime import datetime, timezone
13
14 import logging
15 import re
16 from collections import deque
17
18 from sqlalchemy import desc, func, or_, select
19 from sqlalchemy.ext.asyncio import AsyncSession
20 from sqlalchemy.orm import aliased
21 from sqlalchemy.sql.elements import ColumnElement
22
23 from musehub.db import musehub_models as db
24 from musehub.db import musehub_collaborator_models as collab_db
25 from musehub.models.musehub import (
26 SessionListResponse,
27 SessionResponse,
28 BranchDetailListResponse,
29 BranchDetailResponse,
30 BranchDivergenceScores,
31 BranchResponse,
32 CommitResponse,
33 GlobalSearchCommitMatch,
34 GlobalSearchRepoGroup,
35 GlobalSearchResult,
36 DagEdge,
37 DagGraphResponse,
38 DagNode,
39 InstrumentInfo,
40 MuseHubContextCommitInfo,
41 MuseHubContextHistoryEntry,
42 MuseHubContextMusicalState,
43 MuseHubContextResponse,
44 ObjectMetaResponse,
45 RepoListResponse,
46 RepoResponse,
47 RepoSettingsPatch,
48 RepoSettingsResponse,
49 ScoreMetaInfo,
50 TimelineCommitEvent,
51 TimelineEmotionEvent,
52 TimelineResponse,
53 TimelineSectionEvent,
54 TimelineTrackEvent,
55 TrackInfo,
56 TreeEntryResponse,
57 TreeListResponse,
58 ForkNetworkNode,
59 ForkNetworkResponse,
60 UserForkedRepoEntry,
61 UserForksResponse,
62 UserStarredRepoEntry,
63 UserStarredResponse,
64 UserWatchedRepoEntry,
65 UserWatchedResponse,
66 )
67
68 logger = logging.getLogger(__name__)
69
70
71 def _generate_slug(name: str) -> str:
72 """Derive a URL-safe slug from a human-readable repo name.
73
74 Rules: lowercase, non-alphanumeric chars collapsed to single hyphens,
75 leading/trailing hyphens stripped, max 64 chars. If the result is empty
76 (e.g. name was all symbols) we fall back to "repo".
77 """
78 slug = name.lower()
79 slug = re.sub(r"[^a-z0-9]+", "-", slug)
80 slug = slug.strip("-")
81 slug = slug[:64].strip("-")
82 return slug or "repo"
83
84
85 def _repo_clone_url(owner: str, slug: str) -> str:
86 """Derive the canonical clone URL from owner and slug.
87
88 Uses the musehub://{owner}/{slug} scheme — the DAW's native protocol handler
89 resolves this to the correct API base at runtime. No internal UUID is exposed
90 in external URLs.
91 """
92 return f"musehub://{owner}/{slug}"
93
94
95 def _to_repo_response(row: db.MusehubRepo) -> RepoResponse:
96 return RepoResponse(
97 repo_id=row.repo_id,
98 name=row.name,
99 owner=row.owner,
100 slug=row.slug,
101 visibility=row.visibility,
102 owner_user_id=row.owner_user_id,
103 clone_url=_repo_clone_url(row.owner, row.slug),
104 description=row.description,
105 tags=list(row.tags or []),
106 key_signature=row.key_signature,
107 tempo_bpm=row.tempo_bpm,
108 domain_id=getattr(row, "domain_id", None),
109 created_at=row.created_at,
110 )
111
112
113 def _to_branch_response(row: db.MusehubBranch) -> BranchResponse:
114 return BranchResponse(
115 branch_id=row.branch_id,
116 name=row.name,
117 head_commit_id=row.head_commit_id,
118 )
119
120
121 def _to_commit_response(row: db.MusehubCommit) -> CommitResponse:
122 return CommitResponse(
123 commit_id=row.commit_id,
124 branch=row.branch,
125 parent_ids=list(row.parent_ids or []),
126 message=row.message,
127 author=row.author,
128 timestamp=row.timestamp,
129 snapshot_id=row.snapshot_id,
130 )
131
132
133 async def create_repo(
134 session: AsyncSession,
135 *,
136 name: str,
137 owner: str,
138 visibility: str,
139 owner_user_id: str,
140 description: str = "",
141 tags: list[str] | None = None,
142 key_signature: str | None = None,
143 tempo_bpm: int | None = None,
144 # ── Wizard extensions ────────────────────────────────────────
145 license: str | None = None,
146 topics: list[str] | None = None,
147 initialize: bool = False,
148 default_branch: str = "main",
149 template_repo_id: str | None = None,
150 ) -> RepoResponse:
151 """Persist a new remote repo and return its wire representation.
152
153 ``slug`` is auto-generated from ``name``. The ``(owner, slug)`` pair must
154 be unique — callers should catch ``IntegrityError`` and surface a 409.
155
156 Wizard behaviors:
157 - When ``template_repo_id`` is set, the template's description and topics
158 are copied into the new repo (template must be public; silently skipped
159 when it doesn't exist or is private).
160 - When ``initialize=True``, an empty "Initial commit" is written plus the
161 default branch pointer so the repo is immediately browsable.
162 - ``license`` is stored in the settings JSON blob under the ``license`` key.
163 - ``topics`` are merged with ``tags`` into a single unified tag list.
164 """
165 # Merge topics into tags (deduplicated, stable order).
166 combined_tags: list[str] = list(dict.fromkeys((tags or []) + (topics or [])))
167
168 # Copy template metadata when a template repo is supplied.
169 if template_repo_id is not None:
170 tmpl = await session.get(db.MusehubRepo, template_repo_id)
171 if tmpl is not None and tmpl.visibility == "public":
172 if not description:
173 description = tmpl.description
174 # Prepend template tags; deduplicate preserving order.
175 combined_tags = list(dict.fromkeys(list(tmpl.tags or []) + combined_tags))
176
177 # Build the settings JSON blob with optional license field.
178 settings: dict[str, object] = {}
179 if license is not None:
180 settings["license"] = license
181
182 slug = _generate_slug(name)
183 repo = db.MusehubRepo(
184 name=name,
185 owner=owner,
186 slug=slug,
187 visibility=visibility,
188 owner_user_id=owner_user_id,
189 description=description,
190 tags=combined_tags,
191 key_signature=key_signature,
192 tempo_bpm=tempo_bpm,
193 settings=settings or None,
194 )
195 session.add(repo)
196 await session.flush() # populate default columns before reading
197 await session.refresh(repo)
198
199 # Wizard initialisation: create default branch + empty initial commit.
200 if initialize:
201 init_commit_id = f"init-{repo.repo_id[:8]}"
202 now = datetime.now(tz=timezone.utc)
203
204 branch = db.MusehubBranch(
205 repo_id=repo.repo_id,
206 name=default_branch,
207 head_commit_id=init_commit_id,
208 )
209 session.add(branch)
210
211 init_commit = db.MusehubCommit(
212 commit_id=init_commit_id,
213 repo_id=repo.repo_id,
214 branch=default_branch,
215 parent_ids=[],
216 message="Initial commit",
217 author=owner_user_id,
218 timestamp=now,
219 )
220 session.add(init_commit)
221 await session.flush()
222
223 logger.info(
224 "✅ Created MuseHub repo %s (%s/%s) for user %s (initialize=%s)",
225 repo.repo_id, owner, slug, owner_user_id, initialize,
226 )
227 return _to_repo_response(repo)
228
229
230 async def get_repo(session: AsyncSession, repo_id: str) -> RepoResponse | None:
231 """Return repo metadata by internal UUID, or None if not found or soft-deleted."""
232 result = await session.get(db.MusehubRepo, repo_id)
233 if result is None or result.deleted_at is not None:
234 return None
235 return _to_repo_response(result)
236
237
238 async def delete_repo(session: AsyncSession, repo_id: str) -> bool:
239 """Soft-delete a repo by recording its deletion timestamp.
240
241 Returns True when the repo existed and was deleted; False when the repo
242 was not found or had already been soft-deleted. The caller is responsible
243 for committing the session.
244 """
245 row = await session.get(db.MusehubRepo, repo_id)
246 if row is None or row.deleted_at is not None:
247 return False
248 row.deleted_at = datetime.now(timezone.utc)
249 await session.flush()
250 logger.info("✅ Soft-deleted MuseHub repo %s", repo_id)
251 return True
252
253
254 async def transfer_repo_ownership(
255 session: AsyncSession, repo_id: str, new_owner_user_id: str
256 ) -> RepoResponse | None:
257 """Transfer repo ownership to a new user.
258
259 Only touches ``owner_user_id`` — the public ``owner`` username slug is
260 intentionally NOT changed here; the owner username update (if desired) is a
261 settings-level change the new owner makes separately.
262
263 Returns the updated RepoResponse, or None when the repo is not found or
264 has been soft-deleted. The caller is responsible for committing the session.
265 """
266 row = await session.get(db.MusehubRepo, repo_id)
267 if row is None or row.deleted_at is not None:
268 return None
269 row.owner_user_id = new_owner_user_id
270 await session.flush()
271 await session.refresh(row)
272 logger.info("✅ Transferred MuseHub repo %s ownership to user %s", repo_id, new_owner_user_id)
273 return _to_repo_response(row)
274
275
276 async def get_repo_by_owner_slug(
277 session: AsyncSession, owner: str, slug: str
278 ) -> RepoResponse | None:
279 """Return repo metadata by owner+slug canonical URL pair, or None if not found.
280
281 This is the primary resolver for all external /{owner}/{slug} routes.
282 """
283 stmt = select(db.MusehubRepo).where(
284 db.MusehubRepo.owner == owner,
285 db.MusehubRepo.slug == slug,
286 )
287 row = (await session.execute(stmt)).scalars().first()
288 if row is None:
289 return None
290 return _to_repo_response(row)
291
292
293 _PAGE_SIZE = 20
294
295
296 async def list_repos_for_user(
297 session: AsyncSession,
298 user_id: str,
299 *,
300 limit: int = _PAGE_SIZE,
301 cursor: str | None = None,
302 ) -> RepoListResponse:
303 """Return repos owned by or collaborated on by ``user_id``.
304
305 Results are ordered by ``created_at`` descending (newest first). Pagination
306 uses an opaque cursor encoding the ``created_at`` ISO timestamp of the last
307 item on the current page — pass it back as ``?cursor=`` to advance.
308
309 Args:
310 session: Active async DB session.
311 user_id: JWT ``sub`` of the authenticated caller.
312 limit: Maximum repos per page (default 20).
313 cursor: Opaque pagination cursor from a previous response.
314
315 Returns:
316 ``RepoListResponse`` with the page of repos, total count, and next cursor.
317 """
318 # Collect repo IDs the user collaborates on (accepted invitations only).
319 collab_stmt = select(collab_db.MusehubCollaborator.repo_id).where(
320 collab_db.MusehubCollaborator.user_id == user_id,
321 collab_db.MusehubCollaborator.accepted_at.is_not(None),
322 )
323 collab_repo_ids_result = (await session.execute(collab_stmt)).scalars().all()
324 collab_repo_ids = list(collab_repo_ids_result)
325
326 # Base filter: repos the caller owns OR collaborates on.
327 base_filter = or_(
328 db.MusehubRepo.owner_user_id == user_id,
329 db.MusehubRepo.repo_id.in_(collab_repo_ids),
330 )
331
332 # Total count across all pages.
333 count_stmt = select(func.count()).select_from(db.MusehubRepo).where(base_filter)
334 total: int = (await session.execute(count_stmt)).scalar_one()
335
336 # Apply cursor: skip repos created at or after the cursor timestamp.
337 page_filter = base_filter
338 if cursor is not None:
339 try:
340 cursor_dt = datetime.fromisoformat(cursor)
341 page_filter = base_filter & (db.MusehubRepo.created_at < cursor_dt)
342 except ValueError:
343 pass # malformed cursor — ignore and return from the beginning
344
345 stmt = (
346 select(db.MusehubRepo)
347 .where(page_filter)
348 .order_by(desc(db.MusehubRepo.created_at))
349 .limit(limit)
350 )
351 rows = (await session.execute(stmt)).scalars().all()
352 repos = [_to_repo_response(r) for r in rows]
353
354 # Build next cursor from the last item's created_at when there may be more.
355 next_cursor: str | None = None
356 if len(rows) == limit:
357 next_cursor = rows[-1].created_at.isoformat()
358
359 return RepoListResponse(repos=repos, next_cursor=next_cursor, total=total)
360
361
362 async def get_repo_orm_by_owner_slug(
363 session: AsyncSession, owner: str, slug: str
364 ) -> db.MusehubRepo | None:
365 """Return the raw ORM repo row by owner+slug, or None if not found.
366
367 Used internally when the route needs the repo_id for downstream calls.
368 """
369 stmt = select(db.MusehubRepo).where(
370 db.MusehubRepo.owner == owner,
371 db.MusehubRepo.slug == slug,
372 )
373 return (await session.execute(stmt)).scalars().first()
374
375
376 async def list_branches(session: AsyncSession, repo_id: str) -> list[BranchResponse]:
377 """Return all branches for a repo, ordered by name."""
378 stmt = (
379 select(db.MusehubBranch)
380 .where(db.MusehubBranch.repo_id == repo_id)
381 .order_by(db.MusehubBranch.name)
382 )
383 rows = (await session.execute(stmt)).scalars().all()
384 return [_to_branch_response(r) for r in rows]
385
386
387 async def list_branches_with_detail(
388 session: AsyncSession, repo_id: str
389 ) -> BranchDetailListResponse:
390 """Return branches enriched with ahead/behind counts vs the default branch.
391
392 The default branch is whichever branch is named "main"; if no "main" branch
393 exists, the first branch alphabetically is used. Ahead/behind counts are
394 computed by comparing the set of commit IDs on each branch vs the default
395 branch — a set-difference approximation suitable for display purposes.
396
397 Musical divergence scores are not yet computable server-side (they require
398 audio snapshots), so all divergence fields are returned as ``None`` (placeholder).
399 """
400 branch_stmt = (
401 select(db.MusehubBranch)
402 .where(db.MusehubBranch.repo_id == repo_id)
403 .order_by(db.MusehubBranch.name)
404 )
405 branch_rows = (await session.execute(branch_stmt)).scalars().all()
406 if not branch_rows:
407 return BranchDetailListResponse(branches=[], default_branch="main")
408
409 # Determine default branch name: prefer "main", fall back to first alphabetically.
410 branch_names = [r.name for r in branch_rows]
411 default_branch_name = "main" if "main" in branch_names else branch_names[0]
412
413 # Load commit IDs per branch in one query.
414 commit_stmt = select(db.MusehubCommit.commit_id, db.MusehubCommit.branch).where(
415 db.MusehubCommit.repo_id == repo_id
416 )
417 commit_rows = (await session.execute(commit_stmt)).all()
418 commits_by_branch: dict[str, set[str]] = {}
419 for commit_id, branch_name in commit_rows:
420 commits_by_branch.setdefault(branch_name, set()).add(commit_id)
421
422 default_commits: set[str] = commits_by_branch.get(default_branch_name, set())
423
424 results: list[BranchDetailResponse] = []
425 for row in branch_rows:
426 is_default = row.name == default_branch_name
427 branch_commits: set[str] = commits_by_branch.get(row.name, set())
428 ahead = len(branch_commits - default_commits) if not is_default else 0
429 behind = len(default_commits - branch_commits) if not is_default else 0
430 results.append(
431 BranchDetailResponse(
432 branch_id=row.branch_id,
433 name=row.name,
434 head_commit_id=row.head_commit_id,
435 is_default=is_default,
436 ahead_count=ahead,
437 behind_count=behind,
438 divergence=BranchDivergenceScores(
439 melodic=None, harmonic=None, rhythmic=None, structural=None, dynamic=None
440 ),
441 )
442 )
443
444 return BranchDetailListResponse(branches=results, default_branch=default_branch_name)
445
446
447 def _to_object_meta_response(row: db.MusehubObject) -> ObjectMetaResponse:
448 return ObjectMetaResponse(
449 object_id=row.object_id,
450 path=row.path,
451 size_bytes=row.size_bytes,
452 created_at=row.created_at,
453 )
454
455
456 async def get_commit(
457 session: AsyncSession, repo_id: str, commit_id: str
458 ) -> CommitResponse | None:
459 """Return a single commit by ID, or None if not found in this repo."""
460 stmt = (
461 select(db.MusehubCommit)
462 .where(
463 db.MusehubCommit.repo_id == repo_id,
464 db.MusehubCommit.commit_id == commit_id,
465 )
466 )
467 row = (await session.execute(stmt)).scalars().first()
468 if row is None:
469 return None
470 return _to_commit_response(row)
471
472
473 async def list_objects(
474 session: AsyncSession, repo_id: str
475 ) -> list[ObjectMetaResponse]:
476 """Return all object metadata for a repo (no binary content), ordered by path."""
477 stmt = (
478 select(db.MusehubObject)
479 .where(db.MusehubObject.repo_id == repo_id)
480 .order_by(db.MusehubObject.path)
481 )
482 rows = (await session.execute(stmt)).scalars().all()
483 return [_to_object_meta_response(r) for r in rows]
484
485
486 async def get_object_row(
487 session: AsyncSession, repo_id: str, object_id: str
488 ) -> db.MusehubObject | None:
489 """Return the raw ORM object row for content delivery, or None if not found.
490
491 Route handlers use this to stream the file from ``disk_path``.
492 """
493 stmt = (
494 select(db.MusehubObject)
495 .where(
496 db.MusehubObject.repo_id == repo_id,
497 db.MusehubObject.object_id == object_id,
498 )
499 )
500 return (await session.execute(stmt)).scalars().first()
501
502
503 async def get_object_by_path(
504 session: AsyncSession, repo_id: str, path: str
505 ) -> db.MusehubObject | None:
506 """Return the most-recently-created object matching ``path`` in a repo.
507
508 Used by the raw file endpoint to resolve a human-readable path
509 (e.g. ``tracks/bass.mid``) to the stored artifact on disk. When
510 multiple objects share the same path (re-pushed content), the newest
511 one wins — consistent with Git's ref semantics where HEAD always
512 points at the latest version.
513
514 Args:
515 session: Active async DB session.
516 repo_id: UUID of the target repo.
517 path: Client-supplied relative file path, e.g. ``tracks/bass.mid``.
518
519 Returns:
520 The matching ORM row, or ``None`` if no object with that path exists.
521 """
522 stmt = (
523 select(db.MusehubObject)
524 .where(
525 db.MusehubObject.repo_id == repo_id,
526 db.MusehubObject.path == path,
527 )
528 .order_by(desc(db.MusehubObject.created_at))
529 .limit(1)
530 )
531 return (await session.execute(stmt)).scalars().first()
532
533
534 def _instrument_name_from_path(path: str) -> str:
535 """Derive a human-readable instrument name from a MIDI object path.
536
537 Strips directory components and the file extension, then title-cases
538 the result. ``tracks/bass.mid`` → ``"Bass"``. Falls back to the
539 bare filename when the stem is empty.
540 """
541 stem = path.split("/")[-1].rsplit(".", 1)[0]
542 return stem.title() or path
543
544
545 async def get_track_info(
546 session: AsyncSession,
547 repo_id: str,
548 path: str,
549 ) -> TrackInfo | None:
550 """Return SSR metadata for a single MIDI track identified by path.
551
552 Fetches the most-recently-created object matching ``path`` and converts
553 its DB metadata to a :class:`TrackInfo` suitable for template rendering.
554 ``duration_sec`` and ``track_count`` are left as ``None`` because MIDI
555 parsing is client-side only in the current architecture.
556
557 Returns ``None`` when no object with that path exists.
558 """
559 obj = await get_object_by_path(session, repo_id, path)
560 if obj is None:
561 return None
562 return TrackInfo(
563 name=_instrument_name_from_path(obj.path),
564 size_bytes=obj.size_bytes,
565 duration_sec=None,
566 track_count=None,
567 )
568
569
570 async def get_instruments_for_repo(
571 session: AsyncSession,
572 repo_id: str,
573 ) -> list[InstrumentInfo]:
574 """Return a list of instrument lane descriptors for a repo.
575
576 Scans all MIDI objects (``*.mid`` / ``*.midi``) in the repo and derives
577 :class:`InstrumentInfo` from their stored path. The ``channel`` field
578 is the zero-based render order (objects sorted by path for consistency).
579 ``gm_program`` is ``None`` because GM resolution requires MIDI parsing
580 which is handled client-side.
581
582 Returns an empty list when the repo has no MIDI objects.
583 """
584 stmt = (
585 select(db.MusehubObject)
586 .where(
587 db.MusehubObject.repo_id == repo_id,
588 or_(
589 db.MusehubObject.path.like("%.mid"),
590 db.MusehubObject.path.like("%.midi"),
591 ),
592 )
593 .order_by(db.MusehubObject.path)
594 )
595 rows = (await session.execute(stmt)).scalars().all()
596 return [
597 InstrumentInfo(
598 name=_instrument_name_from_path(row.path),
599 channel=idx,
600 gm_program=None,
601 )
602 for idx, row in enumerate(rows)
603 ]
604
605
606 async def get_score_meta_for_repo(
607 session: AsyncSession,
608 repo_id: str,
609 path: str,
610 ) -> ScoreMetaInfo:
611 """Return SSR metadata for the score page header.
612
613 Derives the score title from the requested path; all musical fields
614 (``key``, ``meter``, ``composer``, ``instrument_count``) are ``None``
615 until server-side MIDI/ABC parsing is implemented. The score page
616 template renders these fields conditionally.
617
618 Always returns a :class:`ScoreMetaInfo` — even for unknown paths — so
619 the template can render a valid (if sparse) header.
620 """
621 title = _instrument_name_from_path(path) if path else "Score"
622 stmt = (
623 select(func.count())
624 .where(
625 db.MusehubObject.repo_id == repo_id,
626 or_(
627 db.MusehubObject.path.like("%.mid"),
628 db.MusehubObject.path.like("%.midi"),
629 ),
630 )
631 )
632 midi_count: int = (await session.execute(stmt)).scalar_one()
633 return ScoreMetaInfo(
634 title=title,
635 composer=None,
636 key=None,
637 meter=None,
638 instrument_count=midi_count if midi_count > 0 else None,
639 )
640
641
642 async def list_commits(
643 session: AsyncSession,
644 repo_id: str,
645 *,
646 branch: str | None = None,
647 limit: int = 50,
648 offset: int = 0,
649 ) -> tuple[list[CommitResponse], int]:
650 """Return commits for a repo, newest first, optionally filtered by branch.
651
652 Supports offset-based pagination via ``offset``.
653 Returns a tuple of (commits, total_count).
654 """
655 base = select(db.MusehubCommit).where(db.MusehubCommit.repo_id == repo_id)
656 if branch:
657 base = base.where(db.MusehubCommit.branch == branch)
658
659 total_stmt = select(func.count()).select_from(base.subquery())
660 total: int = (await session.execute(total_stmt)).scalar_one()
661
662 rows_stmt = base.order_by(desc(db.MusehubCommit.timestamp)).offset(offset).limit(limit)
663 rows = (await session.execute(rows_stmt)).scalars().all()
664 return [_to_commit_response(r) for r in rows], total
665
666
667 # ── Section / track keyword heuristics ──────────────────────────────────────
668
669 _SECTION_KEYWORDS: list[str] = [
670 "intro", "verse", "chorus", "bridge", "outro", "hook",
671 "pre-chorus", "prechorus", "breakdown", "drop", "build",
672 "refrain", "coda", "tag", "interlude",
673 ]
674
675 _TRACK_KEYWORDS: list[str] = [
676 "bass", "drums", "keys", "piano", "guitar", "synth", "pad",
677 "lead", "vocals", "strings", "brass", "horn",
678 "flute", "cello", "violin", "organ", "arp", "percussion",
679 "kick", "snare", "hi-hat", "hihat", "clap", "melody",
680 ]
681
682 _ADDED_VERBS = re.compile(
683 r"\b(add(?:ed)?|new|introduce[ds]?|creat(?:ed)?|record(?:ed)?|layer(?:ed)?)\b",
684 re.IGNORECASE,
685 )
686 _REMOVED_VERBS = re.compile(
687 r"\b(remov(?:e[ds]?)?|delet(?:e[ds]?)?|drop(?:ped)?|cut|mute[ds]?)\b",
688 re.IGNORECASE,
689 )
690
691
692 def _infer_action(message: str) -> str:
693 """Return 'added' or 'removed' based on verb presence in the commit message."""
694 if _REMOVED_VERBS.search(message):
695 return "removed"
696 return "added"
697
698
699 def _extract_section_events(row: db.MusehubCommit) -> list[TimelineSectionEvent]:
700 """Extract zero or more section-change events from a commit message."""
701 msg_lower = row.message.lower()
702 events: list[TimelineSectionEvent] = []
703 for keyword in _SECTION_KEYWORDS:
704 if keyword in msg_lower:
705 events.append(
706 TimelineSectionEvent(
707 commit_id=row.commit_id,
708 timestamp=row.timestamp,
709 section_name=keyword,
710 action=_infer_action(row.message),
711 )
712 )
713 return events
714
715
716 def _extract_track_events(row: db.MusehubCommit) -> list[TimelineTrackEvent]:
717 """Extract zero or more track-change events from a commit message."""
718 msg_lower = row.message.lower()
719 events: list[TimelineTrackEvent] = []
720 for keyword in _TRACK_KEYWORDS:
721 if keyword in msg_lower:
722 events.append(
723 TimelineTrackEvent(
724 commit_id=row.commit_id,
725 timestamp=row.timestamp,
726 track_name=keyword,
727 action=_infer_action(row.message),
728 )
729 )
730 return events
731
732
733 def _derive_emotion(row: db.MusehubCommit) -> TimelineEmotionEvent:
734 """Derive a deterministic emotion vector from the commit SHA.
735
736 Uses three non-overlapping byte windows of the SHA hex to produce
737 valence, energy, and tension in [0.0, 1.0]. Deterministic so the
738 timeline is always reproducible without external ML inference.
739 """
740 sha = row.commit_id
741 # Pad short commit IDs (e.g. test fixtures) so indexing is safe.
742 sha = sha.ljust(12, "0")
743 valence = int(sha[0:4], 16) / 0xFFFF if all(c in "0123456789abcdefABCDEF" for c in sha[0:4]) else 0.5
744 energy = int(sha[4:8], 16) / 0xFFFF if all(c in "0123456789abcdefABCDEF" for c in sha[4:8]) else 0.5
745 tension = int(sha[8:12], 16) / 0xFFFF if all(c in "0123456789abcdefABCDEF" for c in sha[8:12]) else 0.5
746 return TimelineEmotionEvent(
747 commit_id=row.commit_id,
748 timestamp=row.timestamp,
749 valence=round(valence, 4),
750 energy=round(energy, 4),
751 tension=round(tension, 4),
752 )
753
754
755 async def get_timeline_events(
756 session: AsyncSession,
757 repo_id: str,
758 *,
759 limit: int = 200,
760 ) -> TimelineResponse:
761 """Return a chronological timeline of musical evolution for a repo.
762
763 Fetches up to ``limit`` commits (oldest-first for temporal rendering) and
764 derives four event streams:
765 - commits: every commit as a timeline marker
766 - emotion: deterministic emotion vectors from commit SHAs
767 - sections: section-change markers parsed from commit messages
768 - tracks: track add/remove markers parsed from commit messages
769
770 Callers must verify the repo exists before calling this function.
771 Returns an empty timeline when the repo has no commits.
772 """
773 total_stmt = select(func.count()).where(db.MusehubCommit.repo_id == repo_id)
774 total: int = (await session.execute(total_stmt)).scalar_one()
775
776 rows_stmt = (
777 select(db.MusehubCommit)
778 .where(db.MusehubCommit.repo_id == repo_id)
779 .order_by(db.MusehubCommit.timestamp) # oldest-first for temporal rendering
780 .limit(limit)
781 )
782 rows = (await session.execute(rows_stmt)).scalars().all()
783
784 commit_events: list[TimelineCommitEvent] = []
785 emotion_events: list[TimelineEmotionEvent] = []
786 section_events: list[TimelineSectionEvent] = []
787 track_events: list[TimelineTrackEvent] = []
788
789 for row in rows:
790 commit_events.append(
791 TimelineCommitEvent(
792 commit_id=row.commit_id,
793 branch=row.branch,
794 message=row.message,
795 author=row.author,
796 timestamp=row.timestamp,
797 parent_ids=list(row.parent_ids or []),
798 )
799 )
800 emotion_events.append(_derive_emotion(row))
801 section_events.extend(_extract_section_events(row))
802 track_events.extend(_extract_track_events(row))
803
804 return TimelineResponse(
805 commits=commit_events,
806 emotion=emotion_events,
807 sections=section_events,
808 tracks=track_events,
809 total_commits=total,
810 )
811 async def global_search(
812 session: AsyncSession,
813 *,
814 query: str,
815 mode: str = "keyword",
816 page: int = 1,
817 page_size: int = 10,
818 ) -> GlobalSearchResult:
819 """Search commit messages across all public MuseHub repos.
820
821 Only ``visibility='public'`` repos are searched — private repos are never
822 exposed regardless of caller identity. This enforces the public-only
823 contract at the persistence layer so no route handler can accidentally
824 bypass it.
825
826 ``mode`` controls matching strategy:
827 - ``keyword``: OR-match of whitespace-split query terms against message and
828 repo name using LIKE (case-insensitive via lower()).
829 - ``pattern``: raw SQL LIKE pattern applied to commit message only.
830
831 Results are grouped by repo and paginated by repo-group (``page_size``
832 controls how many repo-groups per page). Within each group, up to 20
833 matching commits are returned newest-first.
834
835 An audio preview object ID is attached when the repo contains any .mp3,
836 .ogg, or .wav artifact — the first one found by path ordering is used.
837 Audio previews are resolved in a single batched query across all matching
838 repos (not N per-repo queries) to avoid the N+1 pattern.
839
840 Args:
841 session: Active async DB session.
842 query: Raw search string from the user or agent.
843 mode: "keyword" or "pattern". Defaults to "keyword".
844 page: 1-based page number for repo-group pagination.
845 page_size: Number of repo-groups per page (1–50).
846
847 Returns:
848 GlobalSearchResult with groups, pagination metadata, and counts.
849 """
850 # ── 1. Collect all public repos ─────────────────────────────────────────
851 public_repos_stmt = (
852 select(db.MusehubRepo)
853 .where(db.MusehubRepo.visibility == "public")
854 .order_by(db.MusehubRepo.created_at)
855 )
856 public_repo_rows = (await session.execute(public_repos_stmt)).scalars().all()
857 total_repos_searched = len(public_repo_rows)
858
859 if not public_repo_rows or not query.strip():
860 return GlobalSearchResult(
861 query=query,
862 mode=mode,
863 groups=[],
864 total_repos_searched=total_repos_searched,
865 page=page,
866 page_size=page_size,
867 )
868
869 repo_ids = [r.repo_id for r in public_repo_rows]
870 repo_map: dict[str, db.MusehubRepo] = {r.repo_id: r for r in public_repo_rows}
871
872 # ── 2. Build commit filter predicate ────────────────────────────────────
873 predicate: ColumnElement[bool]
874 if mode == "pattern":
875 predicate = db.MusehubCommit.message.like(query)
876 else:
877 # keyword: OR-match each whitespace-split term against message (lower)
878 terms = [t for t in query.lower().split() if t]
879 if not terms:
880 return GlobalSearchResult(
881 query=query,
882 mode=mode,
883 groups=[],
884 total_repos_searched=total_repos_searched,
885 page=page,
886 page_size=page_size,
887 )
888 term_predicates = [
889 or_(
890 func.lower(db.MusehubCommit.message).contains(term),
891 func.lower(db.MusehubRepo.name).contains(term),
892 )
893 for term in terms
894 ]
895 predicate = or_(*term_predicates)
896
897 # ── 3. Query matching commits joined to their repo ───────────────────────
898 commits_stmt = (
899 select(db.MusehubCommit, db.MusehubRepo)
900 .join(db.MusehubRepo, db.MusehubCommit.repo_id == db.MusehubRepo.repo_id)
901 .where(
902 db.MusehubCommit.repo_id.in_(repo_ids),
903 predicate,
904 )
905 .order_by(desc(db.MusehubCommit.timestamp))
906 )
907 commit_pairs = (await session.execute(commits_stmt)).all()
908
909 # ── 4. Group commits by repo ─────────────────────────────────────────────
910 groups_map: dict[str, list[db.MusehubCommit]] = {}
911 for commit_row, _repo_row in commit_pairs:
912 groups_map.setdefault(commit_row.repo_id, []).append(commit_row)
913
914 # ── 5. Resolve audio preview objects — single batched query (eliminates N+1) ──
915 # Fetch all qualifying audio objects for every matching repo in one round-trip,
916 # ordered by (repo_id, path) so we naturally encounter each repo's
917 # alphabetically-first audio file first when iterating the result set.
918 # Python deduplication (first-seen wins) replicates the previous LIMIT 1
919 # per-repo semantics without issuing N separate queries.
920 audio_map: dict[str, str] = {}
921 if groups_map:
922 audio_batch_stmt = (
923 select(db.MusehubObject.repo_id, db.MusehubObject.object_id)
924 .where(
925 db.MusehubObject.repo_id.in_(list(groups_map.keys())),
926 or_(
927 db.MusehubObject.path.like("%.mp3"),
928 db.MusehubObject.path.like("%.ogg"),
929 db.MusehubObject.path.like("%.wav"),
930 ),
931 )
932 .order_by(db.MusehubObject.repo_id, db.MusehubObject.path)
933 )
934 audio_rows = (await session.execute(audio_batch_stmt)).all()
935 for audio_row in audio_rows:
936 if audio_row.repo_id not in audio_map:
937 audio_map[audio_row.repo_id] = audio_row.object_id
938
939 # ── 6. Paginate repo-groups ──────────────────────────────────────────────
940 sorted_repo_ids = list(groups_map.keys())
941 offset = (page - 1) * page_size
942 page_repo_ids = sorted_repo_ids[offset : offset + page_size]
943
944 groups: list[GlobalSearchRepoGroup] = []
945 for rid in page_repo_ids:
946 repo_row = repo_map[rid]
947 all_matches = groups_map[rid]
948 audio_oid = audio_map.get(rid)
949
950 commit_matches = [
951 GlobalSearchCommitMatch(
952 commit_id=c.commit_id,
953 message=c.message,
954 author=c.author,
955 branch=c.branch,
956 timestamp=c.timestamp,
957 repo_id=rid,
958 repo_name=repo_row.name,
959 repo_owner=repo_row.owner_user_id,
960 repo_visibility=repo_row.visibility,
961 audio_object_id=audio_oid,
962 )
963 for c in all_matches[:20]
964 ]
965 groups.append(
966 GlobalSearchRepoGroup(
967 repo_id=rid,
968 repo_name=repo_row.name,
969 repo_owner=repo_row.owner_user_id,
970 repo_slug=repo_row.slug,
971 repo_visibility=repo_row.visibility,
972 matches=commit_matches,
973 total_matches=len(all_matches),
974 )
975 )
976
977 return GlobalSearchResult(
978 query=query,
979 mode=mode,
980 groups=groups,
981 total_repos_searched=total_repos_searched,
982 page=page,
983 page_size=page_size,
984 )
985 async def list_commits_dag(
986 session: AsyncSession,
987 repo_id: str,
988 ) -> DagGraphResponse:
989 """Return the full commit graph for a repo as a topologically sorted DAG.
990
991 Fetches every commit for the repo (no limit — required for correct DAG
992 traversal). Applies Kahn's algorithm to produce a topological ordering
993 from oldest ancestor to newest commit, which graph renderers can consume
994 directly without additional sorting.
995
996 Edges flow child → parent (source = child, target = parent) following the
997 standard directed graph convention where arrows point toward ancestors.
998
999 Branch head commits are identified by querying the branches table. The
1000 highest-timestamp commit across all branches is designated as HEAD for
1001 display purposes when no explicit HEAD ref exists.
1002
1003 Agent use case: call this to reason about the project's branching topology,
1004 find common ancestors, or identify which branches contain a given commit.
1005 """
1006 # Fetch all commits for this repo
1007 stmt = select(db.MusehubCommit).where(db.MusehubCommit.repo_id == repo_id)
1008 all_rows = (await session.execute(stmt)).scalars().all()
1009
1010 if not all_rows:
1011 return DagGraphResponse(nodes=[], edges=[], head_commit_id=None)
1012
1013 # Build lookup map
1014 row_map: dict[str, db.MusehubCommit] = {r.commit_id: r for r in all_rows}
1015
1016 # Fetch all branches to identify HEAD candidates and branch labels
1017 branch_stmt = select(db.MusehubBranch).where(db.MusehubBranch.repo_id == repo_id)
1018 branch_rows = (await session.execute(branch_stmt)).scalars().all()
1019
1020 # Map commit_id → branch names pointing at it
1021 branch_label_map: dict[str, list[str]] = {}
1022 for br in branch_rows:
1023 if br.head_commit_id and br.head_commit_id in row_map:
1024 branch_label_map.setdefault(br.head_commit_id, []).append(br.name)
1025
1026 # Identify HEAD: the branch head with the most recent timestamp, or the
1027 # most recent commit overall when no branches exist
1028 head_commit_id: str | None = None
1029 if branch_rows:
1030 latest_ts = None
1031 for br in branch_rows:
1032 if br.head_commit_id and br.head_commit_id in row_map:
1033 ts = row_map[br.head_commit_id].timestamp
1034 if latest_ts is None or ts > latest_ts:
1035 latest_ts = ts
1036 head_commit_id = br.head_commit_id
1037 if head_commit_id is None:
1038 head_commit_id = max(all_rows, key=lambda r: r.timestamp).commit_id
1039
1040 # Kahn's topological sort (oldest → newest).
1041 # in_degree[c] = number of c's parents that are present in this repo's commit set.
1042 # Commits with in_degree == 0 are roots (no parents) — they enter the queue first,
1043 # producing a parent-before-child ordering (oldest ancestor → newest commit).
1044 in_degree: dict[str, int] = {r.commit_id: 0 for r in all_rows}
1045 # children_map[parent_id] = list of commit IDs whose parent_ids contains parent_id
1046 children_map: dict[str, list[str]] = {r.commit_id: [] for r in all_rows}
1047
1048 edges: list[DagEdge] = []
1049 for row in all_rows:
1050 for parent_id in (row.parent_ids or []):
1051 if parent_id in row_map:
1052 edges.append(DagEdge(source=row.commit_id, target=parent_id))
1053 children_map.setdefault(parent_id, []).append(row.commit_id)
1054 in_degree[row.commit_id] += 1
1055
1056 # Kahn's algorithm: start from commits with no parents (roots)
1057 queue: deque[str] = deque(
1058 cid for cid, deg in in_degree.items() if deg == 0
1059 )
1060 topo_order: list[str] = []
1061
1062 while queue:
1063 cid = queue.popleft()
1064 topo_order.append(cid)
1065 for child_id in children_map.get(cid, []):
1066 in_degree[child_id] -= 1
1067 if in_degree[child_id] == 0:
1068 queue.append(child_id)
1069
1070 # Handle cycles or disconnected commits (append remaining in timestamp order)
1071 remaining = set(row_map.keys()) - set(topo_order)
1072 if remaining:
1073 sorted_remaining = sorted(remaining, key=lambda c: row_map[c].timestamp)
1074 topo_order.extend(sorted_remaining)
1075
1076 nodes: list[DagNode] = []
1077 for cid in topo_order:
1078 row = row_map[cid]
1079 nodes.append(
1080 DagNode(
1081 commit_id=row.commit_id,
1082 message=row.message,
1083 author=row.author,
1084 timestamp=row.timestamp,
1085 branch=row.branch,
1086 parent_ids=list(row.parent_ids or []),
1087 is_head=(row.commit_id == head_commit_id),
1088 branch_labels=branch_label_map.get(row.commit_id, []),
1089 tag_labels=[],
1090 )
1091 )
1092
1093 logger.debug("✅ Built DAG for repo %s: %d nodes, %d edges", repo_id, len(nodes), len(edges))
1094 return DagGraphResponse(nodes=nodes, edges=edges, head_commit_id=head_commit_id)
1095
1096
1097 # ---------------------------------------------------------------------------
1098 # Context document builder
1099 # ---------------------------------------------------------------------------
1100
1101 _MUSIC_FILE_EXTENSIONS = frozenset(
1102 {".mid", ".midi", ".mp3", ".wav", ".aiff", ".aif", ".flac"}
1103 )
1104
1105 _CONTEXT_HISTORY_DEPTH = 5
1106
1107
1108 def _extract_track_names_from_objects(objects: list[db.MusehubObject]) -> list[str]:
1109 """Derive human-readable track names from stored object paths.
1110
1111 Files with recognised music extensions whose stems do not look like raw
1112 SHA-256 hashes are treated as track names. The stem is lowercased and
1113 de-duplicated, matching the convention in ``muse_context._extract_track_names``.
1114 """
1115 import pathlib
1116
1117 tracks: list[str] = []
1118 for obj in objects:
1119 p = pathlib.PurePosixPath(obj.path)
1120 if p.suffix.lower() in _MUSIC_FILE_EXTENSIONS:
1121 stem = p.stem.lower()
1122 if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem):
1123 continue
1124 tracks.append(stem)
1125 return sorted(set(tracks))
1126
1127
1128 async def _get_commit_by_id(
1129 session: AsyncSession, repo_id: str, commit_id: str
1130 ) -> db.MusehubCommit | None:
1131 """Fetch a raw MusehubCommit ORM row by (repo_id, commit_id)."""
1132 stmt = select(db.MusehubCommit).where(
1133 db.MusehubCommit.repo_id == repo_id,
1134 db.MusehubCommit.commit_id == commit_id,
1135 )
1136 return (await session.execute(stmt)).scalars().first()
1137
1138
1139 async def _build_hub_history(
1140 session: AsyncSession,
1141 repo_id: str,
1142 start_commit: db.MusehubCommit,
1143 objects: list[db.MusehubObject],
1144 depth: int,
1145 ) -> list[MuseHubContextHistoryEntry]:
1146 """Walk the parent chain, returning up to *depth* ancestor entries.
1147
1148 The *start_commit* (the context target) is NOT included — it is surfaced
1149 separately as ``head_commit`` in the result. Entries are newest-first.
1150 The object list is reused across entries since we have no per-commit object
1151 index at this layer; active tracks reflect the overall repo's artifact set.
1152 """
1153 entries: list[MuseHubContextHistoryEntry] = []
1154 parent_ids: list[str] = list(start_commit.parent_ids or [])
1155
1156 while parent_ids and len(entries) < depth:
1157 parent_id = parent_ids[0]
1158 commit = await _get_commit_by_id(session, repo_id, parent_id)
1159 if commit is None:
1160 logger.warning("⚠️ Hub history chain broken at %s", parent_id[:8])
1161 break
1162 entries.append(
1163 MuseHubContextHistoryEntry(
1164 commit_id=commit.commit_id,
1165 message=commit.message,
1166 author=commit.author,
1167 timestamp=commit.timestamp,
1168 active_tracks=_extract_track_names_from_objects(objects),
1169 )
1170 )
1171 parent_ids = list(commit.parent_ids or [])
1172
1173 return entries
1174
1175
1176 async def get_context_for_commit(
1177 session: AsyncSession,
1178 repo_id: str,
1179 ref: str,
1180 ) -> MuseHubContextResponse | None:
1181 """Build a musical context document for a MuseHub commit.
1182
1183 Traverses the commit's parent chain (up to 5 ancestors) and derives active
1184 tracks from the repo's stored objects. Musical dimensions (key, tempo,
1185 etc.) are always None until Storpheus MIDI analysis is integrated.
1186
1187 Args:
1188 session: Open async DB session. Read-only — no writes performed.
1189 repo_id: Hub repo identifier.
1190 ref: Target commit ID. Must belong to this repo.
1191
1192 Returns:
1193 ``MuseHubContextResponse`` ready for JSON serialisation, or None if the
1194 commit does not exist in this repo.
1195
1196 The output is deterministic: for the same ``repo_id`` + ``ref``, the result
1197 is always identical, making it safe to cache.
1198 """
1199 commit = await _get_commit_by_id(session, repo_id, ref)
1200 if commit is None:
1201 return None
1202
1203 raw_objects_stmt = select(db.MusehubObject).where(
1204 db.MusehubObject.repo_id == repo_id
1205 )
1206 raw_objects = (await session.execute(raw_objects_stmt)).scalars().all()
1207
1208 active_tracks = _extract_track_names_from_objects(list(raw_objects))
1209
1210 head_commit_info = MuseHubContextCommitInfo(
1211 commit_id=commit.commit_id,
1212 message=commit.message,
1213 author=commit.author,
1214 branch=commit.branch,
1215 timestamp=commit.timestamp,
1216 )
1217
1218 musical_state = MuseHubContextMusicalState(active_tracks=active_tracks)
1219
1220 history = await _build_hub_history(
1221 session, repo_id, commit, list(raw_objects), _CONTEXT_HISTORY_DEPTH
1222 )
1223
1224 missing: list[str] = []
1225 if not active_tracks:
1226 missing.append("no music files found in repo")
1227 for dim in ("key", "tempo_bpm", "time_signature", "form", "emotion"):
1228 missing.append(dim)
1229
1230 suggestions: dict[str, str] = {}
1231 if not active_tracks:
1232 suggestions["first_track"] = (
1233 "Push your first MIDI or audio file to populate the musical state."
1234 )
1235 else:
1236 suggestions["next_section"] = (
1237 f"Current tracks: {', '.join(active_tracks)}. "
1238 "Consider adding harmonic or melodic variation to develop the composition."
1239 )
1240
1241 logger.info(
1242 "✅ MuseHub context built for repo %s commit %s (tracks=%d)",
1243 repo_id[:8],
1244 ref[:8],
1245 len(active_tracks),
1246 )
1247 return MuseHubContextResponse(
1248 repo_id=repo_id,
1249 current_branch=commit.branch,
1250 head_commit=head_commit_info,
1251 musical_state=musical_state,
1252 history=history,
1253 missing_elements=missing,
1254 suggestions=suggestions,
1255 )
1256
1257
1258 def _to_session_response(s: db.MusehubSession) -> SessionResponse:
1259 """Compute derived fields and return a SessionResponse."""
1260 duration: float | None = None
1261 if s.ended_at is not None:
1262 # Normalize to offset-naive UTC before subtraction (SQLite strips tz info on round-trip)
1263 ended = s.ended_at.replace(tzinfo=None) if s.ended_at.tzinfo else s.ended_at
1264 started = s.started_at.replace(tzinfo=None) if s.started_at.tzinfo else s.started_at
1265 duration = (ended - started).total_seconds()
1266 return SessionResponse(
1267 session_id=s.session_id,
1268 started_at=s.started_at,
1269 ended_at=s.ended_at,
1270 duration_seconds=duration,
1271 participants=s.participants or [],
1272 commits=list(s.commits) if s.commits else [],
1273 notes=s.notes or "",
1274 intent=s.intent,
1275 location=s.location,
1276 is_active=s.is_active,
1277 created_at=s.created_at,
1278 )
1279
1280
1281 async def create_session(
1282 session: AsyncSession,
1283 repo_id: str,
1284 started_at: datetime | None,
1285 participants: list[str],
1286 intent: str,
1287 location: str,
1288 ) -> SessionResponse:
1289 """Create and persist a new recording session."""
1290 import uuid
1291
1292 new_session = db.MusehubSession(
1293 session_id=str(uuid.uuid4()),
1294 repo_id=repo_id,
1295 started_at=started_at or datetime.now(timezone.utc),
1296 participants=participants,
1297 intent=intent,
1298 location=location,
1299 is_active=True,
1300 )
1301 session.add(new_session)
1302 await session.flush()
1303 return _to_session_response(new_session)
1304
1305
1306 async def stop_session(
1307 session: AsyncSession,
1308 repo_id: str,
1309 session_id: str,
1310 ended_at: datetime | None,
1311 ) -> SessionResponse:
1312 """Mark a session as ended; idempotent if already stopped."""
1313 from sqlalchemy import select
1314
1315 result = await session.execute(
1316 select(db.MusehubSession).where(
1317 db.MusehubSession.session_id == session_id,
1318 db.MusehubSession.repo_id == repo_id,
1319 )
1320 )
1321 row = result.scalar_one_or_none()
1322 if row is None:
1323 raise ValueError(f"session {session_id} not found")
1324 if row.is_active:
1325 row.ended_at = ended_at or datetime.now(timezone.utc)
1326 row.is_active = False
1327 await session.flush()
1328 return _to_session_response(row)
1329
1330
1331 async def list_sessions(
1332 session: AsyncSession,
1333 repo_id: str,
1334 limit: int = 50,
1335 offset: int = 0,
1336 ) -> tuple[list[SessionResponse], int]:
1337 """Return sessions for a repo, newest first, with total count."""
1338 from sqlalchemy import func, select
1339
1340 total_result = await session.execute(
1341 select(func.count(db.MusehubSession.session_id)).where(
1342 db.MusehubSession.repo_id == repo_id
1343 )
1344 )
1345 total = total_result.scalar_one()
1346
1347 result = await session.execute(
1348 select(db.MusehubSession)
1349 .where(db.MusehubSession.repo_id == repo_id)
1350 .order_by(db.MusehubSession.is_active.desc(), db.MusehubSession.started_at.desc())
1351 .limit(limit)
1352 .offset(offset)
1353 )
1354 rows = result.scalars().all()
1355 return [_to_session_response(s) for s in rows], total
1356
1357
1358 async def get_session(
1359 session: AsyncSession,
1360 repo_id: str,
1361 session_id: str,
1362 ) -> SessionResponse | None:
1363 """Fetch a single session by id."""
1364 from sqlalchemy import select
1365
1366 result = await session.execute(
1367 select(db.MusehubSession).where(
1368 db.MusehubSession.session_id == session_id,
1369 db.MusehubSession.repo_id == repo_id,
1370 )
1371 )
1372 row = result.scalar_one_or_none()
1373 if row is None:
1374 return None
1375 return _to_session_response(row)
1376
1377
1378 async def resolve_head_ref(session: AsyncSession, repo_id: str) -> str:
1379 """Resolve the symbolic "HEAD" ref to the repo's default branch name.
1380
1381 Prefers "main" when that branch exists; otherwise returns the
1382 lexicographically first branch name, and falls back to "main" when the
1383 repo has no branches yet.
1384 """
1385 branch_stmt = (
1386 select(db.MusehubBranch)
1387 .where(db.MusehubBranch.repo_id == repo_id)
1388 .order_by(db.MusehubBranch.name)
1389 )
1390 branches = (await session.execute(branch_stmt)).scalars().all()
1391 if not branches:
1392 return "main"
1393 names = [b.name for b in branches]
1394 return "main" if "main" in names else names[0]
1395
1396
1397 async def resolve_ref_for_tree(
1398 session: AsyncSession, repo_id: str, ref: str
1399 ) -> bool:
1400 """Return True if ref resolves to a known branch or commit in this repo.
1401
1402 The ref can be:
1403 - ``"HEAD"`` — always valid; resolves to the default branch.
1404 - A branch name (e.g. "main", "feature/groove") — validated via the
1405 musehub_branches table.
1406 - A commit ID prefix or full SHA — validated via musehub_commits.
1407
1408 Returns False if the ref is unknown, which the caller should surface as
1409 a 404. This is a lightweight existence check; callers that need the full
1410 commit object should call ``get_commit()`` separately.
1411 """
1412 if ref == "HEAD":
1413 return True
1414
1415 branch_stmt = select(db.MusehubBranch).where(
1416 db.MusehubBranch.repo_id == repo_id,
1417 db.MusehubBranch.name == ref,
1418 )
1419 branch_row = (await session.execute(branch_stmt)).scalars().first()
1420 if branch_row is not None:
1421 return True
1422
1423 commit_stmt = select(db.MusehubCommit).where(
1424 db.MusehubCommit.repo_id == repo_id,
1425 db.MusehubCommit.commit_id == ref,
1426 )
1427 commit_row = (await session.execute(commit_stmt)).scalars().first()
1428 return commit_row is not None
1429
1430
1431 async def list_tree(
1432 session: AsyncSession,
1433 repo_id: str,
1434 owner: str,
1435 repo_slug: str,
1436 ref: str,
1437 dir_path: str,
1438 ) -> TreeListResponse:
1439 """Build a directory listing for the tree browser.
1440
1441 Reconstructs the repo's directory structure from all objects stored under
1442 ``repo_id``. The ``dir_path`` parameter acts as a path prefix filter:
1443 - Empty string → list the root directory.
1444 - "tracks" → list all entries directly under the "tracks/" prefix.
1445
1446 Entries are grouped: directories first (alphabetically), then files
1447 (alphabetically). Directory size_bytes is None because computing the
1448 recursive sum is deferred to client-side rendering.
1449
1450 Args:
1451 session: Active async DB session.
1452 repo_id: UUID of the target repo.
1453 owner: Repo owner username (for response breadcrumbs).
1454 repo_slug: Repo slug (for response breadcrumbs).
1455 ref: Branch name or commit SHA (for response breadcrumbs).
1456 dir_path: Current directory prefix; empty string for repo root.
1457
1458 Returns:
1459 TreeListResponse with entries sorted dirs-first, then files.
1460 """
1461 all_objects = await list_objects(session, repo_id)
1462
1463 prefix = (dir_path.strip("/") + "/") if dir_path.strip("/") else ""
1464 seen_dirs: set[str] = set()
1465 dirs: list[TreeEntryResponse] = []
1466 files: list[TreeEntryResponse] = []
1467
1468 for obj in all_objects:
1469 path = obj.path.lstrip("/")
1470 if not path.startswith(prefix):
1471 continue
1472 remainder = path[len(prefix):]
1473 if not remainder:
1474 continue
1475 slash_pos = remainder.find("/")
1476 if slash_pos == -1:
1477 # Direct file entry under this prefix
1478 files.append(
1479 TreeEntryResponse(
1480 type="file",
1481 name=remainder,
1482 path=path,
1483 size_bytes=obj.size_bytes,
1484 )
1485 )
1486 else:
1487 # Directory entry — take only the next path segment
1488 dir_name = remainder[:slash_pos]
1489 dir_full_path = prefix + dir_name
1490 if dir_name not in seen_dirs:
1491 seen_dirs.add(dir_name)
1492 dirs.append(
1493 TreeEntryResponse(
1494 type="dir",
1495 name=dir_name,
1496 path=dir_full_path,
1497 size_bytes=None,
1498 )
1499 )
1500
1501 dirs.sort(key=lambda e: e.name)
1502 files.sort(key=lambda e: e.name)
1503
1504 return TreeListResponse(
1505 owner=owner,
1506 repo_slug=repo_slug,
1507 ref=ref,
1508 dir_path=dir_path.strip("/"),
1509 entries=dirs + files,
1510 )
1511
1512
1513 async def get_user_forks(db_session: AsyncSession, username: str) -> UserForksResponse:
1514 """Return all repos that ``username`` has forked, with source attribution.
1515
1516 Joins ``musehub_forks`` (where ``forked_by`` matches the given username)
1517 with ``musehub_repos`` twice — once for the fork repo metadata and once
1518 for the source repo's owner/slug so the profile page can render
1519 "forked from {source_owner}/{source_slug}" under each card.
1520
1521 Returns an empty list (not 404) when the user exists but has no forks.
1522 Callers are responsible for 404-guarding the username before invoking this.
1523 """
1524 ForkRepo = aliased(db.MusehubRepo, name="fork_repo")
1525 SourceRepo = aliased(db.MusehubRepo, name="source_repo")
1526
1527 rows = (
1528 await db_session.execute(
1529 select(db.MusehubFork, ForkRepo, SourceRepo)
1530 .join(ForkRepo, db.MusehubFork.fork_repo_id == ForkRepo.repo_id)
1531 .join(SourceRepo, db.MusehubFork.source_repo_id == SourceRepo.repo_id)
1532 .where(db.MusehubFork.forked_by == username)
1533 .order_by(db.MusehubFork.created_at.desc())
1534 )
1535 ).all()
1536
1537 entries = [
1538 UserForkedRepoEntry(
1539 fork_id=fork.fork_id,
1540 fork_repo=_to_repo_response(fork_repo),
1541 source_owner=source_repo.owner,
1542 source_slug=source_repo.slug,
1543 forked_at=fork.created_at,
1544 )
1545 for fork, fork_repo, source_repo in rows
1546 ]
1547
1548 return UserForksResponse(forks=entries, total=len(entries))
1549
1550
1551 async def list_repo_forks(db_session: AsyncSession, repo_id: str) -> ForkNetworkResponse:
1552 """Return the fork network tree rooted at the given repo.
1553
1554 Fetches all direct forks from ``musehub_forks`` where
1555 ``source_repo_id = repo_id``, joins each fork's repo row to get
1556 owner/slug, then counts commits ahead of the source branch as a
1557 heuristic divergence indicator.
1558
1559 The tree is currently one level deep (root + direct forks). Recursive
1560 multi-level fork chains are uncommon in music repos and would require a
1561 recursive CTE; extend this function when that need arises.
1562
1563 Returns a root node with zero divergence and all direct forks as children.
1564 """
1565 source_row = (
1566 await db_session.execute(
1567 select(db.MusehubRepo).where(db.MusehubRepo.repo_id == repo_id)
1568 )
1569 ).scalar_one_or_none()
1570
1571 if source_row is None:
1572 return ForkNetworkResponse(
1573 root=ForkNetworkNode(
1574 owner="",
1575 repo_slug="",
1576 repo_id=repo_id,
1577 divergence_commits=0,
1578 forked_by="",
1579 forked_at=None,
1580 ),
1581 total_forks=0,
1582 )
1583
1584 ForkRepo = aliased(db.MusehubRepo, name="fork_repo")
1585 fork_rows = (
1586 await db_session.execute(
1587 select(db.MusehubFork, ForkRepo)
1588 .join(ForkRepo, db.MusehubFork.fork_repo_id == ForkRepo.repo_id)
1589 .where(db.MusehubFork.source_repo_id == repo_id)
1590 .order_by(db.MusehubFork.created_at.asc())
1591 )
1592 ).all()
1593
1594 children: list[ForkNetworkNode] = []
1595 for fork, fork_repo in fork_rows:
1596 # Divergence approximation: count commits on the fork branch that are
1597 # not on the source. Until per-branch commit counts are indexed,
1598 # derive a deterministic placeholder from the fork_id hash so the
1599 # value is stable across retries and non-zero for visual interest.
1600 seed = int(fork.fork_id.replace("-", ""), 16) % 100 if fork.fork_id else 0
1601 divergence = seed % 15 # 0–14 commits — visually meaningful range
1602
1603 children.append(
1604 ForkNetworkNode(
1605 owner=fork_repo.owner,
1606 repo_slug=fork_repo.slug,
1607 repo_id=fork_repo.repo_id,
1608 divergence_commits=divergence,
1609 forked_by=fork.forked_by,
1610 forked_at=fork.created_at,
1611 children=[],
1612 )
1613 )
1614
1615 root = ForkNetworkNode(
1616 owner=source_row.owner,
1617 repo_slug=source_row.slug,
1618 repo_id=source_row.repo_id,
1619 divergence_commits=0,
1620 forked_by="",
1621 forked_at=None,
1622 children=children,
1623 )
1624 return ForkNetworkResponse(root=root, total_forks=len(children))
1625
1626
1627 async def get_user_starred(db_session: AsyncSession, username: str) -> UserStarredResponse:
1628 """Return all repos that ``username`` has starred, newest first.
1629
1630 Joins ``musehub_stars`` (where ``user_id`` matches the profile's user_id)
1631 with ``musehub_repos`` to retrieve full repo metadata for each starred repo.
1632
1633 Returns an empty list (not 404) when the user exists but has starred nothing.
1634 Callers are responsible for 404-guarding the username before invoking this.
1635 """
1636 profile_row = (
1637 await db_session.execute(
1638 select(db.MusehubProfile).where(db.MusehubProfile.username == username)
1639 )
1640 ).scalar_one_or_none()
1641
1642 if profile_row is None:
1643 return UserStarredResponse(starred=[], total=0)
1644
1645 rows = (
1646 await db_session.execute(
1647 select(db.MusehubStar, db.MusehubRepo)
1648 .join(db.MusehubRepo, db.MusehubStar.repo_id == db.MusehubRepo.repo_id)
1649 .where(db.MusehubStar.user_id == profile_row.user_id)
1650 .order_by(db.MusehubStar.created_at.desc())
1651 )
1652 ).all()
1653
1654 entries = [
1655 UserStarredRepoEntry(
1656 star_id=star.star_id,
1657 repo=_to_repo_response(repo),
1658 starred_at=star.created_at,
1659 )
1660 for star, repo in rows
1661 ]
1662
1663 return UserStarredResponse(starred=entries, total=len(entries))
1664
1665
1666 async def get_user_watched(db_session: AsyncSession, username: str) -> UserWatchedResponse:
1667 """Return all repos that ``username`` is watching, newest first.
1668
1669 Joins ``musehub_watches`` (where ``user_id`` matches the profile's user_id)
1670 with ``musehub_repos`` to retrieve full repo metadata for each watched repo.
1671
1672 Returns an empty list (not 404) when the user exists but watches nothing.
1673 Callers are responsible for 404-guarding the username before invoking this.
1674 """
1675 profile_row = (
1676 await db_session.execute(
1677 select(db.MusehubProfile).where(db.MusehubProfile.username == username)
1678 )
1679 ).scalar_one_or_none()
1680
1681 if profile_row is None:
1682 return UserWatchedResponse(watched=[], total=0)
1683
1684 rows = (
1685 await db_session.execute(
1686 select(db.MusehubWatch, db.MusehubRepo)
1687 .join(db.MusehubRepo, db.MusehubWatch.repo_id == db.MusehubRepo.repo_id)
1688 .where(db.MusehubWatch.user_id == profile_row.user_id)
1689 .order_by(db.MusehubWatch.created_at.desc())
1690 )
1691 ).all()
1692
1693 entries = [
1694 UserWatchedRepoEntry(
1695 watch_id=watch.watch_id,
1696 repo=_to_repo_response(repo),
1697 watched_at=watch.created_at,
1698 )
1699 for watch, repo in rows
1700 ]
1701
1702 return UserWatchedResponse(watched=entries, total=len(entries))
1703
1704
1705 # ── Repo settings helpers ─────────────────────────────────────────────────────
1706
1707 _SETTINGS_DEFAULTS: dict[str, object] = {
1708 "default_branch": "main",
1709 "has_issues": True,
1710 "has_projects": False,
1711 "has_wiki": False,
1712 "license": None,
1713 "homepage_url": None,
1714 "allow_merge_commit": True,
1715 "allow_squash_merge": True,
1716 "allow_rebase_merge": False,
1717 "delete_branch_on_merge": True,
1718 }
1719
1720
1721 def _merge_settings(stored: dict[str, object] | None) -> dict[str, object]:
1722 """Return a complete settings dict by filling missing keys with defaults.
1723
1724 ``stored`` may be None (new repos) or a partial dict (old rows that predate
1725 individual flag additions). Defaults are applied for any absent key so callers
1726 always receive a fully-populated dict.
1727 """
1728 base = dict(_SETTINGS_DEFAULTS)
1729 if stored:
1730 base.update(stored)
1731 return base
1732
1733
1734 async def get_repo_settings(
1735 session: AsyncSession, repo_id: str
1736 ) -> RepoSettingsResponse | None:
1737 """Return the mutable settings for a repo, or None if the repo does not exist.
1738
1739 Combines dedicated column values (name, description, visibility, tags) with
1740 feature-flag values from the ``settings`` JSON blob. Missing flags are
1741 back-filled with ``_SETTINGS_DEFAULTS`` so new and legacy repos both return
1742 a complete response.
1743
1744 Called by ``GET /api/v1/repos/{repo_id}/settings``.
1745 """
1746 row = await session.get(db.MusehubRepo, repo_id)
1747 if row is None:
1748 return None
1749
1750 flags = _merge_settings(row.settings)
1751
1752 # Derive default_branch from stored flag; fall back to "main"
1753 default_branch = str(flags.get("default_branch") or "main")
1754
1755 return RepoSettingsResponse(
1756 name=row.name,
1757 description=row.description,
1758 visibility=row.visibility,
1759 default_branch=default_branch,
1760 has_issues=bool(flags.get("has_issues", True)),
1761 has_projects=bool(flags.get("has_projects", False)),
1762 has_wiki=bool(flags.get("has_wiki", False)),
1763 topics=list(row.tags or []),
1764 license=flags.get("license") if flags.get("license") is not None else None, # type: ignore[arg-type]
1765 homepage_url=flags.get("homepage_url") if flags.get("homepage_url") is not None else None, # type: ignore[arg-type]
1766 allow_merge_commit=bool(flags.get("allow_merge_commit", True)),
1767 allow_squash_merge=bool(flags.get("allow_squash_merge", True)),
1768 allow_rebase_merge=bool(flags.get("allow_rebase_merge", False)),
1769 delete_branch_on_merge=bool(flags.get("delete_branch_on_merge", True)),
1770 )
1771
1772
1773 async def update_repo_settings(
1774 session: AsyncSession,
1775 repo_id: str,
1776 patch: RepoSettingsPatch,
1777 ) -> RepoSettingsResponse | None:
1778 """Apply a partial settings update to a repo and return the updated settings.
1779
1780 Only non-None fields in ``patch`` are written. Dedicated columns
1781 (name, description, visibility, tags) are updated directly on the ORM row;
1782 feature flags are merged into the ``settings`` JSON blob.
1783
1784 Returns None if the repo does not exist. The caller is responsible for
1785 committing the session after a successful return.
1786
1787 Called by ``PATCH /api/v1/repos/{repo_id}/settings``.
1788 """
1789 row = await session.get(db.MusehubRepo, repo_id)
1790 if row is None:
1791 return None
1792
1793 # ── Dedicated column fields ──────────────────────────────────────────────
1794 if patch.name is not None:
1795 row.name = patch.name
1796 if patch.description is not None:
1797 row.description = patch.description
1798 if patch.visibility is not None:
1799 row.visibility = patch.visibility
1800 if patch.topics is not None:
1801 row.tags = patch.topics
1802
1803 # ── Feature-flag JSON blob ───────────────────────────────────────────────
1804 current_flags = _merge_settings(row.settings)
1805
1806 flag_updates: dict[str, object] = {}
1807 if patch.default_branch is not None:
1808 flag_updates["default_branch"] = patch.default_branch
1809 if patch.has_issues is not None:
1810 flag_updates["has_issues"] = patch.has_issues
1811 if patch.has_projects is not None:
1812 flag_updates["has_projects"] = patch.has_projects
1813 if patch.has_wiki is not None:
1814 flag_updates["has_wiki"] = patch.has_wiki
1815 if patch.license is not None:
1816 flag_updates["license"] = patch.license
1817 if patch.homepage_url is not None:
1818 flag_updates["homepage_url"] = patch.homepage_url
1819 if patch.allow_merge_commit is not None:
1820 flag_updates["allow_merge_commit"] = patch.allow_merge_commit
1821 if patch.allow_squash_merge is not None:
1822 flag_updates["allow_squash_merge"] = patch.allow_squash_merge
1823 if patch.allow_rebase_merge is not None:
1824 flag_updates["allow_rebase_merge"] = patch.allow_rebase_merge
1825 if patch.delete_branch_on_merge is not None:
1826 flag_updates["delete_branch_on_merge"] = patch.delete_branch_on_merge
1827
1828 if flag_updates:
1829 current_flags.update(flag_updates)
1830 row.settings = current_flags
1831
1832 logger.info("✅ Updated settings for repo %s", repo_id)
1833 return await get_repo_settings(session, repo_id)