gabriel / musehub public
musehub_repository.py python
2355 lines 81.8 KB
b2e329a1 dev → main: uvicorn workers, nginx push timeout, MCP tool descriptions (#32) Gabriel Cardona <cgcardona@gmail.com> 2d ago
1 """MuseHub persistence adapter — single point of DB access for Hub entities.
2
3 This module is the ONLY place that touches the musehub_* tables.
4 Route handlers delegate here; no business logic lives in routes.
5
6 Boundary rules:
7 - Must NOT import state stores, SSE queues, or LLM clients.
8 - Must NOT import musehub.core.* modules.
9 - May import ORM models from musehub.db.musehub_models.
10 - May import Pydantic response models from musehub.models.musehub.
11 """
12 from datetime import datetime, timezone
13
14 import logging
15 import re
16 from collections import deque
17 from typing import Any
18
19 from sqlalchemy import desc, func, or_, select
20 from sqlalchemy.ext.asyncio import AsyncSession
21 from sqlalchemy.orm import aliased
22 from sqlalchemy.sql.elements import ColumnElement
23
24 from musehub.db import musehub_models as db
25 from musehub.db import musehub_collaborator_models as collab_db
26 from musehub.models.musehub import (
27 SessionListResponse,
28 SessionResponse,
29 BranchDetailListResponse,
30 BranchDetailResponse,
31 BranchDivergenceScores,
32 BranchResponse,
33 CommitResponse,
34 GlobalSearchCommitMatch,
35 GlobalSearchRepoGroup,
36 GlobalSearchResult,
37 DagEdge,
38 DagGraphResponse,
39 DagNode,
40 InstrumentInfo,
41 MuseHubContextCommitInfo,
42 MuseHubContextHistoryEntry,
43 MuseHubContextMusicalState,
44 MuseHubContextResponse,
45 ObjectMetaResponse,
46 RepoListResponse,
47 RepoResponse,
48 RepoSettingsPatch,
49 RepoSettingsResponse,
50 ScoreMetaInfo,
51 TimelineCommitEvent,
52 TimelineEmotionEvent,
53 TimelineResponse,
54 TimelineSectionEvent,
55 TimelineTrackEvent,
56 TrackInfo,
57 TreeEntryResponse,
58 TreeListResponse,
59 ForkNetworkNode,
60 ForkNetworkResponse,
61 UserForkedRepoEntry,
62 UserForksResponse,
63 UserStarredRepoEntry,
64 UserStarredResponse,
65 UserWatchedRepoEntry,
66 UserWatchedResponse,
67 )
68
69 logger = logging.getLogger(__name__)
70
71
72 def _generate_slug(name: str) -> str:
73 """Derive a URL-safe slug from a human-readable repo name.
74
75 Rules: lowercase, non-alphanumeric chars collapsed to single hyphens,
76 leading/trailing hyphens stripped, max 64 chars. If the result is empty
77 (e.g. name was all symbols) we fall back to "repo".
78 """
79 slug = name.lower()
80 slug = re.sub(r"[^a-z0-9]+", "-", slug)
81 slug = slug.strip("-")
82 slug = slug[:64].strip("-")
83 return slug or "repo"
84
85
86 def _repo_clone_url(owner: str, slug: str) -> str:
87 """Derive the canonical clone URL from owner and slug.
88
89 Uses the musehub://{owner}/{slug} scheme — the DAW's native protocol handler
90 resolves this to the correct API base at runtime. No internal UUID is exposed
91 in external URLs.
92 """
93 return f"musehub://{owner}/{slug}"
94
95
96 def _to_repo_response(row: db.MusehubRepo) -> RepoResponse:
97 return RepoResponse(
98 repo_id=row.repo_id,
99 name=row.name,
100 owner=row.owner,
101 slug=row.slug,
102 visibility=row.visibility,
103 owner_user_id=row.owner_user_id,
104 clone_url=_repo_clone_url(row.owner, row.slug),
105 description=row.description,
106 tags=list(row.tags or []),
107 key_signature=row.key_signature,
108 tempo_bpm=row.tempo_bpm,
109 domain_id=getattr(row, "domain_id", None),
110 created_at=row.created_at,
111 )
112
113
114 def _to_branch_response(row: db.MusehubBranch) -> BranchResponse:
115 return BranchResponse(
116 branch_id=row.branch_id,
117 name=row.name,
118 head_commit_id=row.head_commit_id,
119 )
120
121
122 def _to_commit_response(row: db.MusehubCommit) -> CommitResponse:
123 return CommitResponse(
124 commit_id=row.commit_id,
125 branch=row.branch,
126 parent_ids=list(row.parent_ids or []),
127 message=row.message,
128 author=row.author,
129 timestamp=row.timestamp,
130 snapshot_id=row.snapshot_id,
131 )
132
133
134 async def create_repo(
135 session: AsyncSession,
136 *,
137 name: str,
138 owner: str,
139 visibility: str,
140 owner_user_id: str,
141 description: str = "",
142 tags: list[str] | None = None,
143 key_signature: str | None = None,
144 tempo_bpm: int | None = None,
145 # ── Wizard extensions ────────────────────────────────────────
146 license: str | None = None,
147 topics: list[str] | None = None,
148 initialize: bool = False,
149 default_branch: str = "main",
150 template_repo_id: str | None = None,
151 ) -> RepoResponse:
152 """Persist a new remote repo and return its wire representation.
153
154 ``slug`` is auto-generated from ``name``. The ``(owner, slug)`` pair must
155 be unique — callers should catch ``IntegrityError`` and surface a 409.
156
157 Wizard behaviors:
158 - When ``template_repo_id`` is set, the template's description and topics
159 are copied into the new repo (template must be public; silently skipped
160 when it doesn't exist or is private).
161 - When ``initialize=True``, an empty "Initial commit" is written plus the
162 default branch pointer so the repo is immediately browsable.
163 - ``license`` is stored in the settings JSON blob under the ``license`` key.
164 - ``topics`` are merged with ``tags`` into a single unified tag list.
165 """
166 # Merge topics into tags (deduplicated, stable order).
167 combined_tags: list[str] = list(dict.fromkeys((tags or []) + (topics or [])))
168
169 # Copy template metadata when a template repo is supplied.
170 if template_repo_id is not None:
171 tmpl = await session.get(db.MusehubRepo, template_repo_id)
172 if tmpl is not None and tmpl.visibility == "public":
173 if not description:
174 description = tmpl.description
175 # Prepend template tags; deduplicate preserving order.
176 combined_tags = list(dict.fromkeys(list(tmpl.tags or []) + combined_tags))
177
178 # Build the settings JSON blob with optional license field.
179 settings: dict[str, object] = {}
180 if license is not None:
181 settings["license"] = license
182
183 slug = _generate_slug(name)
184 repo = db.MusehubRepo(
185 name=name,
186 owner=owner,
187 slug=slug,
188 visibility=visibility,
189 owner_user_id=owner_user_id,
190 description=description,
191 tags=combined_tags,
192 key_signature=key_signature,
193 tempo_bpm=tempo_bpm,
194 settings=settings or None,
195 )
196 session.add(repo)
197 await session.flush() # populate default columns before reading
198 await session.refresh(repo)
199
200 # Wizard initialisation: create default branch + empty initial commit.
201 if initialize:
202 init_commit_id = f"init-{repo.repo_id[:8]}"
203 now = datetime.now(tz=timezone.utc)
204
205 branch = db.MusehubBranch(
206 repo_id=repo.repo_id,
207 name=default_branch,
208 head_commit_id=init_commit_id,
209 )
210 session.add(branch)
211
212 init_commit = db.MusehubCommit(
213 commit_id=init_commit_id,
214 repo_id=repo.repo_id,
215 branch=default_branch,
216 parent_ids=[],
217 message="Initial commit",
218 author=owner_user_id,
219 timestamp=now,
220 )
221 session.add(init_commit)
222 await session.flush()
223
224 logger.info(
225 "✅ Created MuseHub repo %s (%s/%s) for user %s (initialize=%s)",
226 repo.repo_id, owner, slug, owner_user_id, initialize,
227 )
228 return _to_repo_response(repo)
229
230
231 async def get_repo(session: AsyncSession, repo_id: str) -> RepoResponse | None:
232 """Return repo metadata by internal UUID, or None if not found or soft-deleted."""
233 result = await session.get(db.MusehubRepo, repo_id)
234 if result is None or result.deleted_at is not None:
235 return None
236 return _to_repo_response(result)
237
238
239 async def delete_repo(session: AsyncSession, repo_id: str) -> bool:
240 """Soft-delete a repo by recording its deletion timestamp.
241
242 Returns True when the repo existed and was deleted; False when the repo
243 was not found or had already been soft-deleted. The caller is responsible
244 for committing the session.
245 """
246 row = await session.get(db.MusehubRepo, repo_id)
247 if row is None or row.deleted_at is not None:
248 return False
249 row.deleted_at = datetime.now(timezone.utc)
250 await session.flush()
251 logger.info("✅ Soft-deleted MuseHub repo %s", repo_id)
252 return True
253
254
255 async def transfer_repo_ownership(
256 session: AsyncSession, repo_id: str, new_owner_user_id: str
257 ) -> RepoResponse | None:
258 """Transfer repo ownership to a new user.
259
260 Only touches ``owner_user_id`` — the public ``owner`` username slug is
261 intentionally NOT changed here; the owner username update (if desired) is a
262 settings-level change the new owner makes separately.
263
264 Returns the updated RepoResponse, or None when the repo is not found or
265 has been soft-deleted. The caller is responsible for committing the session.
266 """
267 row = await session.get(db.MusehubRepo, repo_id)
268 if row is None or row.deleted_at is not None:
269 return None
270 row.owner_user_id = new_owner_user_id
271 await session.flush()
272 await session.refresh(row)
273 logger.info("✅ Transferred MuseHub repo %s ownership to user %s", repo_id, new_owner_user_id)
274 return _to_repo_response(row)
275
276
277 async def get_repo_row_by_owner_slug(
278 session: AsyncSession, owner: str, slug: str
279 ) -> db.MusehubRepo | None:
280 """Return the raw ORM row for owner/slug, or None if not found.
281
282 Use this when you need access to internal fields (e.g. ``owner_user_id``,
283 ``visibility``) that are not exposed by :class:`RepoResponse`.
284 """
285 stmt = select(db.MusehubRepo).where(
286 db.MusehubRepo.owner == owner,
287 db.MusehubRepo.slug == slug,
288 )
289 return (await session.execute(stmt)).scalars().first()
290
291
292 async def get_repo_by_owner_slug(
293 session: AsyncSession, owner: str, slug: str
294 ) -> RepoResponse | None:
295 """Return repo metadata by owner+slug canonical URL pair, or None if not found.
296
297 This is the primary resolver for all external /{owner}/{slug} routes.
298 """
299 row = await get_repo_row_by_owner_slug(session, owner, slug)
300 if row is None:
301 return None
302 return _to_repo_response(row)
303
304
305 _PAGE_SIZE = 20
306
307
308 async def list_repos_for_user(
309 session: AsyncSession,
310 user_id: str,
311 *,
312 limit: int = _PAGE_SIZE,
313 cursor: str | None = None,
314 ) -> RepoListResponse:
315 """Return repos owned by or collaborated on by ``user_id``.
316
317 Results are ordered by ``created_at`` descending (newest first). Pagination
318 uses an opaque cursor encoding the ``created_at`` ISO timestamp of the last
319 item on the current page — pass it back as ``?cursor=`` to advance.
320
321 Args:
322 session: Active async DB session.
323 user_id: JWT ``sub`` of the authenticated caller.
324 limit: Maximum repos per page (default 20).
325 cursor: Opaque pagination cursor from a previous response.
326
327 Returns:
328 ``RepoListResponse`` with the page of repos, total count, and next cursor.
329 """
330 # Collect repo IDs the user collaborates on (accepted invitations only).
331 collab_stmt = select(collab_db.MusehubCollaborator.repo_id).where(
332 collab_db.MusehubCollaborator.user_id == user_id,
333 collab_db.MusehubCollaborator.accepted_at.is_not(None),
334 )
335 collab_repo_ids_result = (await session.execute(collab_stmt)).scalars().all()
336 collab_repo_ids = list(collab_repo_ids_result)
337
338 # Base filter: repos the caller owns OR collaborates on.
339 base_filter = or_(
340 db.MusehubRepo.owner_user_id == user_id,
341 db.MusehubRepo.repo_id.in_(collab_repo_ids),
342 )
343
344 # Total count across all pages.
345 count_stmt = select(func.count()).select_from(db.MusehubRepo).where(base_filter)
346 total: int = (await session.execute(count_stmt)).scalar_one()
347
348 # Apply cursor: skip repos created at or after the cursor timestamp.
349 page_filter = base_filter
350 if cursor is not None:
351 try:
352 cursor_dt = datetime.fromisoformat(cursor)
353 page_filter = base_filter & (db.MusehubRepo.created_at < cursor_dt)
354 except ValueError:
355 pass # malformed cursor — ignore and return from the beginning
356
357 stmt = (
358 select(db.MusehubRepo)
359 .where(page_filter)
360 .order_by(desc(db.MusehubRepo.created_at))
361 .limit(limit)
362 )
363 rows = (await session.execute(stmt)).scalars().all()
364 repos = [_to_repo_response(r) for r in rows]
365
366 # Build next cursor from the last item's created_at when there may be more.
367 next_cursor: str | None = None
368 if len(rows) == limit:
369 next_cursor = rows[-1].created_at.isoformat()
370
371 return RepoListResponse(repos=repos, next_cursor=next_cursor, total=total)
372
373
374 async def get_repo_orm_by_owner_slug(
375 session: AsyncSession, owner: str, slug: str
376 ) -> db.MusehubRepo | None:
377 """Return the raw ORM repo row by owner+slug, or None if not found.
378
379 Used internally when the route needs the repo_id for downstream calls.
380 """
381 stmt = select(db.MusehubRepo).where(
382 db.MusehubRepo.owner == owner,
383 db.MusehubRepo.slug == slug,
384 )
385 return (await session.execute(stmt)).scalars().first()
386
387
388 async def list_branches(session: AsyncSession, repo_id: str) -> list[BranchResponse]:
389 """Return all branches for a repo, ordered by name."""
390 stmt = (
391 select(db.MusehubBranch)
392 .where(db.MusehubBranch.repo_id == repo_id)
393 .order_by(db.MusehubBranch.name)
394 )
395 rows = (await session.execute(stmt)).scalars().all()
396 return [_to_branch_response(r) for r in rows]
397
398
399 async def list_branches_with_detail(
400 session: AsyncSession, repo_id: str
401 ) -> BranchDetailListResponse:
402 """Return branches enriched with ahead/behind counts vs the default branch.
403
404 The default branch is whichever branch is named "main"; if no "main" branch
405 exists, the first branch alphabetically is used. Ahead/behind counts are
406 computed by comparing the set of commit IDs on each branch vs the default
407 branch — a set-difference approximation suitable for display purposes.
408
409 Musical divergence scores are not yet computable server-side (they require
410 audio snapshots), so all divergence fields are returned as ``None`` (placeholder).
411 """
412 branch_stmt = (
413 select(db.MusehubBranch)
414 .where(db.MusehubBranch.repo_id == repo_id)
415 .order_by(db.MusehubBranch.name)
416 )
417 branch_rows = (await session.execute(branch_stmt)).scalars().all()
418 if not branch_rows:
419 return BranchDetailListResponse(branches=[], default_branch="main")
420
421 # Determine default branch name: prefer "main", fall back to first alphabetically.
422 branch_names = [r.name for r in branch_rows]
423 default_branch_name = "main" if "main" in branch_names else branch_names[0]
424
425 # Load commit IDs per branch in one query.
426 commit_stmt = select(db.MusehubCommit.commit_id, db.MusehubCommit.branch).where(
427 db.MusehubCommit.repo_id == repo_id
428 )
429 commit_rows = (await session.execute(commit_stmt)).all()
430 commits_by_branch: dict[str, set[str]] = {}
431 for commit_id, branch_name in commit_rows:
432 commits_by_branch.setdefault(branch_name, set()).add(commit_id)
433
434 default_commits: set[str] = commits_by_branch.get(default_branch_name, set())
435
436 results: list[BranchDetailResponse] = []
437 for row in branch_rows:
438 is_default = row.name == default_branch_name
439 branch_commits: set[str] = commits_by_branch.get(row.name, set())
440 ahead = len(branch_commits - default_commits) if not is_default else 0
441 behind = len(default_commits - branch_commits) if not is_default else 0
442 results.append(
443 BranchDetailResponse(
444 branch_id=row.branch_id,
445 name=row.name,
446 head_commit_id=row.head_commit_id,
447 is_default=is_default,
448 ahead_count=ahead,
449 behind_count=behind,
450 divergence=BranchDivergenceScores(
451 melodic=None, harmonic=None, rhythmic=None, structural=None, dynamic=None
452 ),
453 )
454 )
455
456 return BranchDetailListResponse(branches=results, default_branch=default_branch_name)
457
458
459 def _to_object_meta_response(row: db.MusehubObject) -> ObjectMetaResponse:
460 return ObjectMetaResponse(
461 object_id=row.object_id,
462 path=row.path,
463 size_bytes=row.size_bytes,
464 created_at=row.created_at,
465 )
466
467
468 async def get_commit(
469 session: AsyncSession, repo_id: str, commit_id: str
470 ) -> CommitResponse | None:
471 """Return a single commit by ID, or None if not found in this repo."""
472 stmt = (
473 select(db.MusehubCommit)
474 .where(
475 db.MusehubCommit.repo_id == repo_id,
476 db.MusehubCommit.commit_id == commit_id,
477 )
478 )
479 row = (await session.execute(stmt)).scalars().first()
480 if row is None:
481 return None
482 return _to_commit_response(row)
483
484
485 async def list_objects(
486 session: AsyncSession, repo_id: str
487 ) -> list[ObjectMetaResponse]:
488 """Return all object metadata for a repo (no binary content), ordered by path."""
489 stmt = (
490 select(db.MusehubObject)
491 .where(db.MusehubObject.repo_id == repo_id)
492 .order_by(db.MusehubObject.path)
493 )
494 rows = (await session.execute(stmt)).scalars().all()
495 return [_to_object_meta_response(r) for r in rows]
496
497
498 async def get_object_row(
499 session: AsyncSession, repo_id: str, object_id: str
500 ) -> db.MusehubObject | None:
501 """Return the raw ORM object row for content delivery, or None if not found.
502
503 Route handlers use this to stream the file from ``disk_path``.
504 """
505 stmt = (
506 select(db.MusehubObject)
507 .where(
508 db.MusehubObject.repo_id == repo_id,
509 db.MusehubObject.object_id == object_id,
510 )
511 )
512 return (await session.execute(stmt)).scalars().first()
513
514
515 async def get_object_by_path(
516 session: AsyncSession, repo_id: str, path: str
517 ) -> db.MusehubObject | None:
518 """Return the most-recently-created object matching ``path`` in a repo.
519
520 Used by the raw file endpoint to resolve a human-readable path
521 (e.g. ``tracks/bass.mid``) to the stored artifact on disk. When
522 multiple objects share the same path (re-pushed content), the newest
523 one wins — consistent with Git's ref semantics where HEAD always
524 points at the latest version.
525
526 Args:
527 session: Active async DB session.
528 repo_id: UUID of the target repo.
529 path: Client-supplied relative file path, e.g. ``tracks/bass.mid``.
530
531 Returns:
532 The matching ORM row, or ``None`` if no object with that path exists.
533 """
534 stmt = (
535 select(db.MusehubObject)
536 .where(
537 db.MusehubObject.repo_id == repo_id,
538 db.MusehubObject.path == path,
539 )
540 .order_by(desc(db.MusehubObject.created_at))
541 .limit(1)
542 )
543 return (await session.execute(stmt)).scalars().first()
544
545
546 def _instrument_name_from_path(path: str) -> str:
547 """Derive a human-readable instrument name from a MIDI object path.
548
549 Strips directory components and the file extension, then title-cases
550 the result. ``tracks/bass.mid`` → ``"Bass"``. Falls back to the
551 bare filename when the stem is empty.
552 """
553 stem = path.split("/")[-1].rsplit(".", 1)[0]
554 return stem.title() or path
555
556
557 async def get_track_info(
558 session: AsyncSession,
559 repo_id: str,
560 path: str,
561 ) -> TrackInfo | None:
562 """Return SSR metadata for a single MIDI track identified by path.
563
564 Fetches the most-recently-created object matching ``path`` and converts
565 its DB metadata to a :class:`TrackInfo` suitable for template rendering.
566 ``duration_sec`` and ``track_count`` are left as ``None`` because MIDI
567 parsing is client-side only in the current architecture.
568
569 Returns ``None`` when no object with that path exists.
570 """
571 obj = await get_object_by_path(session, repo_id, path)
572 if obj is None:
573 return None
574 return TrackInfo(
575 name=_instrument_name_from_path(obj.path),
576 size_bytes=obj.size_bytes,
577 duration_sec=None,
578 track_count=None,
579 )
580
581
582 async def get_instruments_for_repo(
583 session: AsyncSession,
584 repo_id: str,
585 ) -> list[InstrumentInfo]:
586 """Return a list of instrument lane descriptors for a repo.
587
588 Scans all MIDI objects (``*.mid`` / ``*.midi``) in the repo and derives
589 :class:`InstrumentInfo` from their stored path. The ``channel`` field
590 is the zero-based render order (objects sorted by path for consistency).
591 ``gm_program`` is ``None`` because GM resolution requires MIDI parsing
592 which is handled client-side.
593
594 Returns an empty list when the repo has no MIDI objects.
595 """
596 stmt = (
597 select(db.MusehubObject)
598 .where(
599 db.MusehubObject.repo_id == repo_id,
600 or_(
601 db.MusehubObject.path.like("%.mid"),
602 db.MusehubObject.path.like("%.midi"),
603 ),
604 )
605 .order_by(db.MusehubObject.path)
606 )
607 rows = (await session.execute(stmt)).scalars().all()
608 return [
609 InstrumentInfo(
610 name=_instrument_name_from_path(row.path),
611 channel=idx,
612 gm_program=None,
613 )
614 for idx, row in enumerate(rows)
615 ]
616
617
618 async def get_score_meta_for_repo(
619 session: AsyncSession,
620 repo_id: str,
621 path: str,
622 ) -> ScoreMetaInfo:
623 """Return SSR metadata for the score page header.
624
625 Derives the score title from the requested path; all musical fields
626 (``key``, ``meter``, ``composer``, ``instrument_count``) are ``None``
627 until server-side MIDI/ABC parsing is implemented. The score page
628 template renders these fields conditionally.
629
630 Always returns a :class:`ScoreMetaInfo` — even for unknown paths — so
631 the template can render a valid (if sparse) header.
632 """
633 title = _instrument_name_from_path(path) if path else "Score"
634 stmt = (
635 select(func.count())
636 .where(
637 db.MusehubObject.repo_id == repo_id,
638 or_(
639 db.MusehubObject.path.like("%.mid"),
640 db.MusehubObject.path.like("%.midi"),
641 ),
642 )
643 )
644 midi_count: int = (await session.execute(stmt)).scalar_one()
645 return ScoreMetaInfo(
646 title=title,
647 composer=None,
648 key=None,
649 meter=None,
650 instrument_count=midi_count if midi_count > 0 else None,
651 )
652
653
654 async def list_commits(
655 session: AsyncSession,
656 repo_id: str,
657 *,
658 branch: str | None = None,
659 limit: int = 50,
660 offset: int = 0,
661 ) -> tuple[list[CommitResponse], int]:
662 """Return commits for a repo, newest first, optionally filtered by branch.
663
664 Supports offset-based pagination via ``offset``.
665 Returns a tuple of (commits, total_count).
666 """
667 base = select(db.MusehubCommit).where(db.MusehubCommit.repo_id == repo_id)
668 if branch:
669 base = base.where(db.MusehubCommit.branch == branch)
670
671 total_stmt = select(func.count()).select_from(base.subquery())
672 total: int = (await session.execute(total_stmt)).scalar_one()
673
674 rows_stmt = base.order_by(desc(db.MusehubCommit.timestamp)).offset(offset).limit(limit)
675 rows = (await session.execute(rows_stmt)).scalars().all()
676 return [_to_commit_response(r) for r in rows], total
677
678
679 # ── Section / track keyword heuristics ──────────────────────────────────────
680
681 _SECTION_KEYWORDS: list[str] = [
682 "intro", "verse", "chorus", "bridge", "outro", "hook",
683 "pre-chorus", "prechorus", "breakdown", "drop", "build",
684 "refrain", "coda", "tag", "interlude",
685 ]
686
687 _TRACK_KEYWORDS: list[str] = [
688 "bass", "drums", "keys", "piano", "guitar", "synth", "pad",
689 "lead", "vocals", "strings", "brass", "horn",
690 "flute", "cello", "violin", "organ", "arp", "percussion",
691 "kick", "snare", "hi-hat", "hihat", "clap", "melody",
692 ]
693
694 _ADDED_VERBS = re.compile(
695 r"\b(add(?:ed)?|new|introduce[ds]?|creat(?:ed)?|record(?:ed)?|layer(?:ed)?)\b",
696 re.IGNORECASE,
697 )
698 _REMOVED_VERBS = re.compile(
699 r"\b(remov(?:e[ds]?)?|delet(?:e[ds]?)?|drop(?:ped)?|cut|mute[ds]?)\b",
700 re.IGNORECASE,
701 )
702
703
704 def _infer_action(message: str) -> str:
705 """Return 'added' or 'removed' based on verb presence in the commit message."""
706 if _REMOVED_VERBS.search(message):
707 return "removed"
708 return "added"
709
710
711 def _extract_section_events(row: db.MusehubCommit) -> list[TimelineSectionEvent]:
712 """Extract zero or more section-change events from a commit message."""
713 msg_lower = row.message.lower()
714 events: list[TimelineSectionEvent] = []
715 for keyword in _SECTION_KEYWORDS:
716 if keyword in msg_lower:
717 events.append(
718 TimelineSectionEvent(
719 commit_id=row.commit_id,
720 timestamp=row.timestamp,
721 section_name=keyword,
722 action=_infer_action(row.message),
723 )
724 )
725 return events
726
727
728 def _extract_track_events(row: db.MusehubCommit) -> list[TimelineTrackEvent]:
729 """Extract zero or more track-change events from a commit message."""
730 msg_lower = row.message.lower()
731 events: list[TimelineTrackEvent] = []
732 for keyword in _TRACK_KEYWORDS:
733 if keyword in msg_lower:
734 events.append(
735 TimelineTrackEvent(
736 commit_id=row.commit_id,
737 timestamp=row.timestamp,
738 track_name=keyword,
739 action=_infer_action(row.message),
740 )
741 )
742 return events
743
744
745 def _derive_emotion(row: db.MusehubCommit) -> TimelineEmotionEvent:
746 """Derive a deterministic emotion vector from the commit SHA.
747
748 Uses three non-overlapping byte windows of the SHA hex to produce
749 valence, energy, and tension in [0.0, 1.0]. Deterministic so the
750 timeline is always reproducible without external ML inference.
751 """
752 sha = row.commit_id
753 # Pad short commit IDs (e.g. test fixtures) so indexing is safe.
754 sha = sha.ljust(12, "0")
755 valence = int(sha[0:4], 16) / 0xFFFF if all(c in "0123456789abcdefABCDEF" for c in sha[0:4]) else 0.5
756 energy = int(sha[4:8], 16) / 0xFFFF if all(c in "0123456789abcdefABCDEF" for c in sha[4:8]) else 0.5
757 tension = int(sha[8:12], 16) / 0xFFFF if all(c in "0123456789abcdefABCDEF" for c in sha[8:12]) else 0.5
758 return TimelineEmotionEvent(
759 commit_id=row.commit_id,
760 timestamp=row.timestamp,
761 valence=round(valence, 4),
762 energy=round(energy, 4),
763 tension=round(tension, 4),
764 )
765
766
767 async def get_timeline_events(
768 session: AsyncSession,
769 repo_id: str,
770 *,
771 limit: int = 200,
772 ) -> TimelineResponse:
773 """Return a chronological timeline of musical evolution for a repo.
774
775 Fetches up to ``limit`` commits (oldest-first for temporal rendering) and
776 derives four event streams:
777 - commits: every commit as a timeline marker
778 - emotion: deterministic emotion vectors from commit SHAs
779 - sections: section-change markers parsed from commit messages
780 - tracks: track add/remove markers parsed from commit messages
781
782 Callers must verify the repo exists before calling this function.
783 Returns an empty timeline when the repo has no commits.
784 """
785 total_stmt = select(func.count()).where(db.MusehubCommit.repo_id == repo_id)
786 total: int = (await session.execute(total_stmt)).scalar_one()
787
788 rows_stmt = (
789 select(db.MusehubCommit)
790 .where(db.MusehubCommit.repo_id == repo_id)
791 .order_by(db.MusehubCommit.timestamp) # oldest-first for temporal rendering
792 .limit(limit)
793 )
794 rows = (await session.execute(rows_stmt)).scalars().all()
795
796 commit_events: list[TimelineCommitEvent] = []
797 emotion_events: list[TimelineEmotionEvent] = []
798 section_events: list[TimelineSectionEvent] = []
799 track_events: list[TimelineTrackEvent] = []
800
801 for row in rows:
802 commit_events.append(
803 TimelineCommitEvent(
804 commit_id=row.commit_id,
805 branch=row.branch,
806 message=row.message,
807 author=row.author,
808 timestamp=row.timestamp,
809 parent_ids=list(row.parent_ids or []),
810 )
811 )
812 emotion_events.append(_derive_emotion(row))
813 section_events.extend(_extract_section_events(row))
814 track_events.extend(_extract_track_events(row))
815
816 return TimelineResponse(
817 commits=commit_events,
818 emotion=emotion_events,
819 sections=section_events,
820 tracks=track_events,
821 total_commits=total,
822 )
823 async def global_search(
824 session: AsyncSession,
825 *,
826 query: str,
827 mode: str = "keyword",
828 page: int = 1,
829 page_size: int = 10,
830 ) -> GlobalSearchResult:
831 """Search commit messages across all public MuseHub repos.
832
833 Only ``visibility='public'`` repos are searched — private repos are never
834 exposed regardless of caller identity. This enforces the public-only
835 contract at the persistence layer so no route handler can accidentally
836 bypass it.
837
838 ``mode`` controls matching strategy:
839 - ``keyword``: OR-match of whitespace-split query terms against message and
840 repo name using LIKE (case-insensitive via lower()).
841 - ``pattern``: raw SQL LIKE pattern applied to commit message only.
842
843 Results are grouped by repo and paginated by repo-group (``page_size``
844 controls how many repo-groups per page). Within each group, up to 20
845 matching commits are returned newest-first.
846
847 An audio preview object ID is attached when the repo contains any .mp3,
848 .ogg, or .wav artifact — the first one found by path ordering is used.
849 Audio previews are resolved in a single batched query across all matching
850 repos (not N per-repo queries) to avoid the N+1 pattern.
851
852 Args:
853 session: Active async DB session.
854 query: Raw search string from the user or agent.
855 mode: "keyword" or "pattern". Defaults to "keyword".
856 page: 1-based page number for repo-group pagination.
857 page_size: Number of repo-groups per page (1–50).
858
859 Returns:
860 GlobalSearchResult with groups, pagination metadata, and counts.
861 """
862 # ── 1. Collect all public repos ─────────────────────────────────────────
863 public_repos_stmt = (
864 select(db.MusehubRepo)
865 .where(db.MusehubRepo.visibility == "public")
866 .order_by(db.MusehubRepo.created_at)
867 )
868 public_repo_rows = (await session.execute(public_repos_stmt)).scalars().all()
869 total_repos_searched = len(public_repo_rows)
870
871 if not public_repo_rows or not query.strip():
872 return GlobalSearchResult(
873 query=query,
874 mode=mode,
875 groups=[],
876 total_repos_searched=total_repos_searched,
877 page=page,
878 page_size=page_size,
879 )
880
881 repo_ids = [r.repo_id for r in public_repo_rows]
882 repo_map: dict[str, db.MusehubRepo] = {r.repo_id: r for r in public_repo_rows}
883
884 # ── 2. Build commit filter predicate ────────────────────────────────────
885 predicate: ColumnElement[bool]
886 if mode == "pattern":
887 predicate = db.MusehubCommit.message.like(query)
888 else:
889 # keyword: OR-match each whitespace-split term against message (lower)
890 terms = [t for t in query.lower().split() if t]
891 if not terms:
892 return GlobalSearchResult(
893 query=query,
894 mode=mode,
895 groups=[],
896 total_repos_searched=total_repos_searched,
897 page=page,
898 page_size=page_size,
899 )
900 term_predicates = [
901 or_(
902 func.lower(db.MusehubCommit.message).contains(term),
903 func.lower(db.MusehubRepo.name).contains(term),
904 )
905 for term in terms
906 ]
907 predicate = or_(*term_predicates)
908
909 # ── 3. Query matching commits joined to their repo ───────────────────────
910 commits_stmt = (
911 select(db.MusehubCommit, db.MusehubRepo)
912 .join(db.MusehubRepo, db.MusehubCommit.repo_id == db.MusehubRepo.repo_id)
913 .where(
914 db.MusehubCommit.repo_id.in_(repo_ids),
915 predicate,
916 )
917 .order_by(desc(db.MusehubCommit.timestamp))
918 )
919 commit_pairs = (await session.execute(commits_stmt)).all()
920
921 # ── 4. Group commits by repo ─────────────────────────────────────────────
922 groups_map: dict[str, list[db.MusehubCommit]] = {}
923 for commit_row, _repo_row in commit_pairs:
924 groups_map.setdefault(commit_row.repo_id, []).append(commit_row)
925
926 # ── 5. Resolve audio preview objects — single batched query (eliminates N+1) ──
927 # Fetch all qualifying audio objects for every matching repo in one round-trip,
928 # ordered by (repo_id, path) so we naturally encounter each repo's
929 # alphabetically-first audio file first when iterating the result set.
930 # Python deduplication (first-seen wins) replicates the previous LIMIT 1
931 # per-repo semantics without issuing N separate queries.
932 audio_map: dict[str, str] = {}
933 if groups_map:
934 audio_batch_stmt = (
935 select(db.MusehubObject.repo_id, db.MusehubObject.object_id)
936 .where(
937 db.MusehubObject.repo_id.in_(list(groups_map.keys())),
938 or_(
939 db.MusehubObject.path.like("%.mp3"),
940 db.MusehubObject.path.like("%.ogg"),
941 db.MusehubObject.path.like("%.wav"),
942 ),
943 )
944 .order_by(db.MusehubObject.repo_id, db.MusehubObject.path)
945 )
946 audio_rows = (await session.execute(audio_batch_stmt)).all()
947 for audio_row in audio_rows:
948 if audio_row.repo_id not in audio_map:
949 audio_map[audio_row.repo_id] = audio_row.object_id
950
951 # ── 6. Paginate repo-groups ──────────────────────────────────────────────
952 sorted_repo_ids = list(groups_map.keys())
953 offset = (page - 1) * page_size
954 page_repo_ids = sorted_repo_ids[offset : offset + page_size]
955
956 groups: list[GlobalSearchRepoGroup] = []
957 for rid in page_repo_ids:
958 repo_row = repo_map[rid]
959 all_matches = groups_map[rid]
960 audio_oid = audio_map.get(rid)
961
962 commit_matches = [
963 GlobalSearchCommitMatch(
964 commit_id=c.commit_id,
965 message=c.message,
966 author=c.author,
967 branch=c.branch,
968 timestamp=c.timestamp,
969 repo_id=rid,
970 repo_name=repo_row.name,
971 repo_owner=repo_row.owner_user_id,
972 repo_visibility=repo_row.visibility,
973 audio_object_id=audio_oid,
974 )
975 for c in all_matches[:20]
976 ]
977 groups.append(
978 GlobalSearchRepoGroup(
979 repo_id=rid,
980 repo_name=repo_row.name,
981 repo_owner=repo_row.owner_user_id,
982 repo_slug=repo_row.slug,
983 repo_visibility=repo_row.visibility,
984 matches=commit_matches,
985 total_matches=len(all_matches),
986 )
987 )
988
989 return GlobalSearchResult(
990 query=query,
991 mode=mode,
992 groups=groups,
993 total_repos_searched=total_repos_searched,
994 page=page,
995 page_size=page_size,
996 )
997 async def list_commits_dag(
998 session: AsyncSession,
999 repo_id: str,
1000 ) -> DagGraphResponse:
1001 """Return the full commit graph for a repo as a topologically sorted DAG.
1002
1003 Fetches every commit for the repo (no limit — required for correct DAG
1004 traversal). Applies Kahn's algorithm to produce a topological ordering
1005 from oldest ancestor to newest commit, which graph renderers can consume
1006 directly without additional sorting.
1007
1008 Edges flow child → parent (source = child, target = parent) following the
1009 standard directed graph convention where arrows point toward ancestors.
1010
1011 Branch head commits are identified by querying the branches table. The
1012 highest-timestamp commit across all branches is designated as HEAD for
1013 display purposes when no explicit HEAD ref exists.
1014
1015 Agent use case: call this to reason about the project's branching topology,
1016 find common ancestors, or identify which branches contain a given commit.
1017 """
1018 # Fetch all commits for this repo
1019 stmt = select(db.MusehubCommit).where(db.MusehubCommit.repo_id == repo_id)
1020 all_rows = (await session.execute(stmt)).scalars().all()
1021
1022 if not all_rows:
1023 return DagGraphResponse(nodes=[], edges=[], head_commit_id=None)
1024
1025 # Build lookup map
1026 row_map: dict[str, db.MusehubCommit] = {r.commit_id: r for r in all_rows}
1027
1028 # Fetch all branches to identify HEAD candidates and branch labels
1029 branch_stmt = select(db.MusehubBranch).where(db.MusehubBranch.repo_id == repo_id)
1030 branch_rows = (await session.execute(branch_stmt)).scalars().all()
1031
1032 # Map commit_id → branch names pointing at it
1033 branch_label_map: dict[str, list[str]] = {}
1034 for br in branch_rows:
1035 if br.head_commit_id and br.head_commit_id in row_map:
1036 branch_label_map.setdefault(br.head_commit_id, []).append(br.name)
1037
1038 # Identify HEAD: the branch head with the most recent timestamp, or the
1039 # most recent commit overall when no branches exist
1040 head_commit_id: str | None = None
1041 if branch_rows:
1042 latest_ts = None
1043 for br in branch_rows:
1044 if br.head_commit_id and br.head_commit_id in row_map:
1045 ts = row_map[br.head_commit_id].timestamp
1046 if latest_ts is None or ts > latest_ts:
1047 latest_ts = ts
1048 head_commit_id = br.head_commit_id
1049 if head_commit_id is None:
1050 head_commit_id = max(all_rows, key=lambda r: r.timestamp).commit_id
1051
1052 # Kahn's topological sort (oldest → newest).
1053 # in_degree[c] = number of c's parents that are present in this repo's commit set.
1054 # Commits with in_degree == 0 are roots (no parents) — they enter the queue first,
1055 # producing a parent-before-child ordering (oldest ancestor → newest commit).
1056 in_degree: dict[str, int] = {r.commit_id: 0 for r in all_rows}
1057 # children_map[parent_id] = list of commit IDs whose parent_ids contains parent_id
1058 children_map: dict[str, list[str]] = {r.commit_id: [] for r in all_rows}
1059
1060 edges: list[DagEdge] = []
1061 for row in all_rows:
1062 for parent_id in (row.parent_ids or []):
1063 if parent_id in row_map:
1064 edges.append(DagEdge(source=row.commit_id, target=parent_id))
1065 children_map.setdefault(parent_id, []).append(row.commit_id)
1066 in_degree[row.commit_id] += 1
1067
1068 # Kahn's algorithm: start from commits with no parents (roots)
1069 queue: deque[str] = deque(
1070 cid for cid, deg in in_degree.items() if deg == 0
1071 )
1072 topo_order: list[str] = []
1073
1074 while queue:
1075 cid = queue.popleft()
1076 topo_order.append(cid)
1077 for child_id in children_map.get(cid, []):
1078 in_degree[child_id] -= 1
1079 if in_degree[child_id] == 0:
1080 queue.append(child_id)
1081
1082 # Handle cycles or disconnected commits (append remaining in timestamp order)
1083 remaining = set(row_map.keys()) - set(topo_order)
1084 if remaining:
1085 sorted_remaining = sorted(remaining, key=lambda c: row_map[c].timestamp)
1086 topo_order.extend(sorted_remaining)
1087
1088 _conv_re = re.compile(r'^(\w+)(\([^)]*\))?(!)?\s*:')
1089
1090 nodes: list[DagNode] = []
1091 for cid in topo_order:
1092 row = row_map[cid]
1093 raw_meta: dict[str, Any] = dict(row.commit_meta or {}) if row.commit_meta else {}
1094
1095 # Extract conventional-commit prefix from the message
1096 m = _conv_re.match((row.message or "").strip())
1097 commit_type = m.group(1).lower() if m else ""
1098
1099 # Semantic version bump from commit_meta
1100 sem_ver_bump = str(raw_meta.get("sem_ver_bump") or "none").lower()
1101
1102 # Breaking change: bang suffix OR commit_meta flag
1103 is_breaking = bool((m and m.group(3)) or raw_meta.get("breaking_changes"))
1104
1105 # Agent authorship: presence of agent_id in commit_meta
1106 is_agent = bool(raw_meta.get("agent_id"))
1107
1108 # Symbol operation counts from structured_delta
1109 structured_delta: dict[str, Any] = raw_meta.get("structured_delta") or {}
1110 sym_added = 0
1111 sym_removed = 0
1112 for file_op in structured_delta.get("ops", []):
1113 for child_op in (file_op.get("child_ops") or []):
1114 op = child_op.get("op", "")
1115 if op == "insert":
1116 sym_added += 1
1117 elif op == "delete":
1118 sym_removed += 1
1119
1120 nodes.append(
1121 DagNode(
1122 commit_id=row.commit_id,
1123 message=row.message,
1124 author=row.author,
1125 timestamp=row.timestamp,
1126 branch=row.branch,
1127 parent_ids=list(row.parent_ids or []),
1128 is_head=(row.commit_id == head_commit_id),
1129 branch_labels=branch_label_map.get(row.commit_id, []),
1130 tag_labels=[],
1131 commit_type=commit_type,
1132 sem_ver_bump=sem_ver_bump,
1133 is_breaking=is_breaking,
1134 is_agent=is_agent,
1135 sym_added=sym_added,
1136 sym_removed=sym_removed,
1137 )
1138 )
1139
1140 logger.debug("✅ Built DAG for repo %s: %d nodes, %d edges", repo_id, len(nodes), len(edges))
1141 return DagGraphResponse(nodes=nodes, edges=edges, head_commit_id=head_commit_id)
1142
1143
1144 # ---------------------------------------------------------------------------
1145 # Context document builder
1146 # ---------------------------------------------------------------------------
1147
1148 _MUSIC_FILE_EXTENSIONS = frozenset(
1149 {".mid", ".midi", ".mp3", ".wav", ".aiff", ".aif", ".flac"}
1150 )
1151
1152 _CONTEXT_HISTORY_DEPTH = 5
1153
1154
1155 def _extract_track_names_from_objects(objects: list[db.MusehubObject]) -> list[str]:
1156 """Derive human-readable track names from stored object paths.
1157
1158 Files with recognised music extensions whose stems do not look like raw
1159 SHA-256 hashes are treated as track names. The stem is lowercased and
1160 de-duplicated, matching the convention in ``muse_context._extract_track_names``.
1161 """
1162 import pathlib
1163
1164 tracks: list[str] = []
1165 for obj in objects:
1166 p = pathlib.PurePosixPath(obj.path)
1167 if p.suffix.lower() in _MUSIC_FILE_EXTENSIONS:
1168 stem = p.stem.lower()
1169 if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem):
1170 continue
1171 tracks.append(stem)
1172 return sorted(set(tracks))
1173
1174
1175 async def _get_commit_by_id(
1176 session: AsyncSession, repo_id: str, commit_id: str
1177 ) -> db.MusehubCommit | None:
1178 """Fetch a raw MusehubCommit ORM row by (repo_id, commit_id)."""
1179 stmt = select(db.MusehubCommit).where(
1180 db.MusehubCommit.repo_id == repo_id,
1181 db.MusehubCommit.commit_id == commit_id,
1182 )
1183 return (await session.execute(stmt)).scalars().first()
1184
1185
1186 async def _build_hub_history(
1187 session: AsyncSession,
1188 repo_id: str,
1189 start_commit: db.MusehubCommit,
1190 objects: list[db.MusehubObject],
1191 depth: int,
1192 ) -> list[MuseHubContextHistoryEntry]:
1193 """Walk the parent chain, returning up to *depth* ancestor entries.
1194
1195 The *start_commit* (the context target) is NOT included — it is surfaced
1196 separately as ``head_commit`` in the result. Entries are newest-first.
1197 The object list is reused across entries since we have no per-commit object
1198 index at this layer; active tracks reflect the overall repo's artifact set.
1199 """
1200 entries: list[MuseHubContextHistoryEntry] = []
1201 parent_ids: list[str] = list(start_commit.parent_ids or [])
1202
1203 while parent_ids and len(entries) < depth:
1204 parent_id = parent_ids[0]
1205 commit = await _get_commit_by_id(session, repo_id, parent_id)
1206 if commit is None:
1207 logger.warning("⚠️ Hub history chain broken at %s", parent_id[:8])
1208 break
1209 entries.append(
1210 MuseHubContextHistoryEntry(
1211 commit_id=commit.commit_id,
1212 message=commit.message,
1213 author=commit.author,
1214 timestamp=commit.timestamp,
1215 active_tracks=_extract_track_names_from_objects(objects),
1216 )
1217 )
1218 parent_ids = list(commit.parent_ids or [])
1219
1220 return entries
1221
1222
1223 async def get_context_for_commit(
1224 session: AsyncSession,
1225 repo_id: str,
1226 ref: str,
1227 ) -> MuseHubContextResponse | None:
1228 """Build a musical context document for a MuseHub commit.
1229
1230 Traverses the commit's parent chain (up to 5 ancestors) and derives active
1231 tracks from the repo's stored objects. Musical dimensions (key, tempo,
1232 etc.) are always None until Storpheus MIDI analysis is integrated.
1233
1234 Args:
1235 session: Open async DB session. Read-only — no writes performed.
1236 repo_id: Hub repo identifier.
1237 ref: Target commit ID. Must belong to this repo.
1238
1239 Returns:
1240 ``MuseHubContextResponse`` ready for JSON serialisation, or None if the
1241 commit does not exist in this repo.
1242
1243 The output is deterministic: for the same ``repo_id`` + ``ref``, the result
1244 is always identical, making it safe to cache.
1245 """
1246 commit = await _get_commit_by_id(session, repo_id, ref)
1247 if commit is None:
1248 return None
1249
1250 raw_objects_stmt = select(db.MusehubObject).where(
1251 db.MusehubObject.repo_id == repo_id
1252 )
1253 raw_objects = (await session.execute(raw_objects_stmt)).scalars().all()
1254
1255 active_tracks = _extract_track_names_from_objects(list(raw_objects))
1256
1257 head_commit_info = MuseHubContextCommitInfo(
1258 commit_id=commit.commit_id,
1259 message=commit.message,
1260 author=commit.author,
1261 branch=commit.branch,
1262 timestamp=commit.timestamp,
1263 )
1264
1265 musical_state = MuseHubContextMusicalState(active_tracks=active_tracks)
1266
1267 history = await _build_hub_history(
1268 session, repo_id, commit, list(raw_objects), _CONTEXT_HISTORY_DEPTH
1269 )
1270
1271 missing: list[str] = []
1272 if not active_tracks:
1273 missing.append("no music files found in repo")
1274 for dim in ("key", "tempo_bpm", "time_signature", "form", "emotion"):
1275 missing.append(dim)
1276
1277 suggestions: dict[str, str] = {}
1278 if not active_tracks:
1279 suggestions["first_track"] = (
1280 "Push your first MIDI or audio file to populate the musical state."
1281 )
1282 else:
1283 suggestions["next_section"] = (
1284 f"Current tracks: {', '.join(active_tracks)}. "
1285 "Consider adding harmonic or melodic variation to develop the composition."
1286 )
1287
1288 logger.info(
1289 "✅ MuseHub context built for repo %s commit %s (tracks=%d)",
1290 repo_id[:8],
1291 ref[:8],
1292 len(active_tracks),
1293 )
1294 return MuseHubContextResponse(
1295 repo_id=repo_id,
1296 current_branch=commit.branch,
1297 head_commit=head_commit_info,
1298 musical_state=musical_state,
1299 history=history,
1300 missing_elements=missing,
1301 suggestions=suggestions,
1302 )
1303
1304
1305 def _to_session_response(s: db.MusehubSession) -> SessionResponse:
1306 """Compute derived fields and return a SessionResponse."""
1307 duration: float | None = None
1308 if s.ended_at is not None:
1309 # Normalize to offset-naive UTC before subtraction (SQLite strips tz info on round-trip)
1310 ended = s.ended_at.replace(tzinfo=None) if s.ended_at.tzinfo else s.ended_at
1311 started = s.started_at.replace(tzinfo=None) if s.started_at.tzinfo else s.started_at
1312 duration = (ended - started).total_seconds()
1313 return SessionResponse(
1314 session_id=s.session_id,
1315 started_at=s.started_at,
1316 ended_at=s.ended_at,
1317 duration_seconds=duration,
1318 participants=s.participants or [],
1319 commits=list(s.commits) if s.commits else [],
1320 notes=s.notes or "",
1321 intent=s.intent,
1322 location=s.location,
1323 is_active=s.is_active,
1324 created_at=s.created_at,
1325 )
1326
1327
1328 async def create_session(
1329 session: AsyncSession,
1330 repo_id: str,
1331 started_at: datetime | None,
1332 participants: list[str],
1333 intent: str,
1334 location: str,
1335 ) -> SessionResponse:
1336 """Create and persist a new recording session."""
1337 import uuid
1338
1339 new_session = db.MusehubSession(
1340 session_id=str(uuid.uuid4()),
1341 repo_id=repo_id,
1342 started_at=started_at or datetime.now(timezone.utc),
1343 participants=participants,
1344 intent=intent,
1345 location=location,
1346 is_active=True,
1347 )
1348 session.add(new_session)
1349 await session.flush()
1350 return _to_session_response(new_session)
1351
1352
1353 async def stop_session(
1354 session: AsyncSession,
1355 repo_id: str,
1356 session_id: str,
1357 ended_at: datetime | None,
1358 ) -> SessionResponse:
1359 """Mark a session as ended; idempotent if already stopped."""
1360 from sqlalchemy import select
1361
1362 result = await session.execute(
1363 select(db.MusehubSession).where(
1364 db.MusehubSession.session_id == session_id,
1365 db.MusehubSession.repo_id == repo_id,
1366 )
1367 )
1368 row = result.scalar_one_or_none()
1369 if row is None:
1370 raise ValueError(f"session {session_id} not found")
1371 if row.is_active:
1372 row.ended_at = ended_at or datetime.now(timezone.utc)
1373 row.is_active = False
1374 await session.flush()
1375 return _to_session_response(row)
1376
1377
1378 async def list_sessions(
1379 session: AsyncSession,
1380 repo_id: str,
1381 limit: int = 50,
1382 offset: int = 0,
1383 ) -> tuple[list[SessionResponse], int]:
1384 """Return sessions for a repo, newest first, with total count."""
1385 from sqlalchemy import func, select
1386
1387 total_result = await session.execute(
1388 select(func.count(db.MusehubSession.session_id)).where(
1389 db.MusehubSession.repo_id == repo_id
1390 )
1391 )
1392 total = total_result.scalar_one()
1393
1394 result = await session.execute(
1395 select(db.MusehubSession)
1396 .where(db.MusehubSession.repo_id == repo_id)
1397 .order_by(db.MusehubSession.is_active.desc(), db.MusehubSession.started_at.desc())
1398 .limit(limit)
1399 .offset(offset)
1400 )
1401 rows = result.scalars().all()
1402 return [_to_session_response(s) for s in rows], total
1403
1404
1405 async def get_session(
1406 session: AsyncSession,
1407 repo_id: str,
1408 session_id: str,
1409 ) -> SessionResponse | None:
1410 """Fetch a single session by id."""
1411 from sqlalchemy import select
1412
1413 result = await session.execute(
1414 select(db.MusehubSession).where(
1415 db.MusehubSession.session_id == session_id,
1416 db.MusehubSession.repo_id == repo_id,
1417 )
1418 )
1419 row = result.scalar_one_or_none()
1420 if row is None:
1421 return None
1422 return _to_session_response(row)
1423
1424
1425 async def resolve_head_ref(session: AsyncSession, repo_id: str) -> str:
1426 """Resolve the symbolic "HEAD" ref to the repo's default branch name.
1427
1428 Prefers "main" when that branch exists; otherwise returns the
1429 lexicographically first branch name, and falls back to "main" when the
1430 repo has no branches yet.
1431 """
1432 branch_stmt = (
1433 select(db.MusehubBranch)
1434 .where(db.MusehubBranch.repo_id == repo_id)
1435 .order_by(db.MusehubBranch.name)
1436 )
1437 branches = (await session.execute(branch_stmt)).scalars().all()
1438 if not branches:
1439 return "main"
1440 names = [b.name for b in branches]
1441 return "main" if "main" in names else names[0]
1442
1443
1444 async def resolve_ref_for_tree(
1445 session: AsyncSession, repo_id: str, ref: str
1446 ) -> bool:
1447 """Return True if ref resolves to a known branch or commit in this repo.
1448
1449 The ref can be:
1450 - ``"HEAD"`` — always valid; resolves to the default branch.
1451 - A branch name (e.g. "main", "feature/groove") — validated via the
1452 musehub_branches table.
1453 - A commit ID prefix or full SHA — validated via musehub_commits.
1454
1455 Returns False if the ref is unknown, which the caller should surface as
1456 a 404. This is a lightweight existence check; callers that need the full
1457 commit object should call ``get_commit()`` separately.
1458 """
1459 if ref == "HEAD":
1460 return True
1461
1462 branch_stmt = select(db.MusehubBranch).where(
1463 db.MusehubBranch.repo_id == repo_id,
1464 db.MusehubBranch.name == ref,
1465 )
1466 branch_row = (await session.execute(branch_stmt)).scalars().first()
1467 if branch_row is not None:
1468 return True
1469
1470 commit_stmt = select(db.MusehubCommit).where(
1471 db.MusehubCommit.repo_id == repo_id,
1472 db.MusehubCommit.commit_id == ref,
1473 )
1474 commit_row = (await session.execute(commit_stmt)).scalars().first()
1475 return commit_row is not None
1476
1477
1478 async def _get_head_snapshot_manifest(
1479 session: AsyncSession,
1480 repo_id: str,
1481 ref: str,
1482 ) -> dict[str, str]:
1483 """Return the ``{path: object_id}`` manifest for the HEAD commit on *ref*.
1484
1485 Falls back to an empty dict when no snapshot exists (e.g. new empty repo).
1486 """
1487 # Resolve branch head → commit_id
1488 branch_row = (
1489 await session.execute(
1490 select(db.MusehubBranch).where(
1491 db.MusehubBranch.repo_id == repo_id,
1492 db.MusehubBranch.name == ref,
1493 )
1494 )
1495 ).scalar_one_or_none()
1496
1497 if branch_row is None or not branch_row.head_commit_id:
1498 return {}
1499
1500 head_commit = (
1501 await session.execute(
1502 select(db.MusehubCommit).where(
1503 db.MusehubCommit.commit_id == branch_row.head_commit_id
1504 )
1505 )
1506 ).scalar_one_or_none()
1507
1508 if head_commit is None or head_commit.snapshot_id is None:
1509 return {}
1510
1511 snapshot = await session.get(db.MusehubSnapshot, head_commit.snapshot_id)
1512 if snapshot is None:
1513 return {}
1514 return dict(snapshot.manifest or {})
1515
1516
1517 def _manifest_to_tree(
1518 manifest: dict[str, str],
1519 dir_path: str,
1520 ) -> tuple[list[TreeEntryResponse], list[TreeEntryResponse]]:
1521 """Build sorted (dirs, files) tree entries from a snapshot manifest.
1522
1523 ``dir_path`` is the directory prefix to list (empty = repo root).
1524 Returns (dirs, files) each sorted alphabetically.
1525 """
1526 prefix = (dir_path.strip("/") + "/") if dir_path.strip("/") else ""
1527 seen_dirs: set[str] = set()
1528 dirs: list[TreeEntryResponse] = []
1529 files: list[TreeEntryResponse] = []
1530
1531 for path, object_id in manifest.items():
1532 norm = path.lstrip("/")
1533 if not norm.startswith(prefix):
1534 continue
1535 remainder = norm[len(prefix):]
1536 if not remainder:
1537 continue
1538 slash_pos = remainder.find("/")
1539 if slash_pos == -1:
1540 files.append(
1541 TreeEntryResponse(
1542 type="file",
1543 name=remainder,
1544 path=norm,
1545 size_bytes=None,
1546 object_id=object_id,
1547 )
1548 )
1549 else:
1550 dir_name = remainder[:slash_pos]
1551 if dir_name not in seen_dirs:
1552 seen_dirs.add(dir_name)
1553 dirs.append(
1554 TreeEntryResponse(
1555 type="dir",
1556 name=dir_name,
1557 path=prefix + dir_name,
1558 size_bytes=None,
1559 object_id=None,
1560 )
1561 )
1562
1563 dirs.sort(key=lambda e: e.name)
1564 files.sort(key=lambda e: e.name)
1565 return dirs, files
1566
1567
1568 async def list_tree(
1569 session: AsyncSession,
1570 repo_id: str,
1571 owner: str,
1572 repo_slug: str,
1573 ref: str,
1574 dir_path: str,
1575 ) -> TreeListResponse:
1576 """Build a directory listing for the tree browser.
1577
1578 Primary strategy: read the snapshot manifest for the HEAD commit on *ref*
1579 (``musehub_snapshots.manifest``). This reflects the exact file tree at
1580 that commit without relying on per-object path metadata (which is empty
1581 for wire-protocol pushes).
1582
1583 Fallback: scan ``musehub_objects`` by path (legacy MIDI-upload repos).
1584 """
1585 manifest = await _get_head_snapshot_manifest(session, repo_id, ref)
1586 if manifest:
1587 dirs, files = _manifest_to_tree(manifest, dir_path)
1588 return TreeListResponse(
1589 owner=owner,
1590 repo_slug=repo_slug,
1591 ref=ref,
1592 dir_path=dir_path.strip("/"),
1593 entries=dirs + files,
1594 )
1595
1596 # ── Legacy fallback: objects with explicit paths ──────────────────────
1597 all_objects = await list_objects(session, repo_id)
1598 prefix = (dir_path.strip("/") + "/") if dir_path.strip("/") else ""
1599 seen_dirs: set[str] = set()
1600 dirs_legacy: list[TreeEntryResponse] = []
1601 files_legacy: list[TreeEntryResponse] = []
1602
1603 for obj in all_objects:
1604 path = obj.path.lstrip("/")
1605 if not path.startswith(prefix):
1606 continue
1607 remainder = path[len(prefix):]
1608 if not remainder:
1609 continue
1610 slash_pos = remainder.find("/")
1611 if slash_pos == -1:
1612 files_legacy.append(
1613 TreeEntryResponse(
1614 type="file",
1615 name=remainder,
1616 path=path,
1617 size_bytes=obj.size_bytes,
1618 object_id=None,
1619 )
1620 )
1621 else:
1622 dir_name = remainder[:slash_pos]
1623 dir_full_path = prefix + dir_name
1624 if dir_name not in seen_dirs:
1625 seen_dirs.add(dir_name)
1626 dirs_legacy.append(
1627 TreeEntryResponse(
1628 type="dir",
1629 name=dir_name,
1630 path=dir_full_path,
1631 size_bytes=None,
1632 object_id=None,
1633 )
1634 )
1635
1636 dirs_legacy.sort(key=lambda e: e.name)
1637 files_legacy.sort(key=lambda e: e.name)
1638
1639 return TreeListResponse(
1640 owner=owner,
1641 repo_slug=repo_slug,
1642 ref=ref,
1643 dir_path=dir_path.strip("/"),
1644 entries=dirs_legacy + files_legacy,
1645 )
1646
1647
1648 async def _resolve_ref_to_commit(
1649 session: AsyncSession, repo_id: str, ref: str
1650 ) -> db.MusehubCommit | None:
1651 """Resolve a branch name or commit SHA to a commit row.
1652
1653 Tries branch lookup first, then falls back to direct commit_id lookup
1654 so both ``main`` and full/partial SHAs work.
1655 """
1656 # 1. Try branch
1657 branch_row = (
1658 await session.execute(
1659 select(db.MusehubBranch).where(
1660 db.MusehubBranch.repo_id == repo_id,
1661 db.MusehubBranch.name == ref,
1662 )
1663 )
1664 ).scalar_one_or_none()
1665 if branch_row and branch_row.head_commit_id:
1666 return await session.get(db.MusehubCommit, branch_row.head_commit_id)
1667
1668 # 2. Try exact commit_id match
1669 row = (
1670 await session.execute(
1671 select(db.MusehubCommit).where(
1672 db.MusehubCommit.repo_id == repo_id,
1673 db.MusehubCommit.commit_id == ref,
1674 )
1675 )
1676 ).scalar_one_or_none()
1677 if row:
1678 return row
1679
1680 # 3. Prefix match (short SHA)
1681 if len(ref) >= 7:
1682 row = (
1683 await session.execute(
1684 select(db.MusehubCommit).where(
1685 db.MusehubCommit.repo_id == repo_id,
1686 db.MusehubCommit.commit_id.like(f"{ref}%"),
1687 ).limit(1)
1688 )
1689 ).scalar_one_or_none()
1690 if row:
1691 return row
1692
1693 return None
1694
1695
1696 async def get_file_at_ref(
1697 session: AsyncSession,
1698 repo_id: str,
1699 ref: str,
1700 file_path: str,
1701 ) -> dict[str, object] | None:
1702 """Resolve a file path at a given ref via the snapshot manifest.
1703
1704 Looks up: ref → commit → snapshot → manifest[file_path] → object_id,
1705 then returns metadata. Content bytes are intentionally NOT returned here
1706 (callers fetch via the storage backend directly to avoid loading into memory
1707 unless needed).
1708
1709 Returns a dict with:
1710 - ``object_id``: content-addressed SHA
1711 - ``snapshot_id``: the snapshot this file belongs to
1712 - ``commit_id``: resolved commit SHA
1713 - ``path``: normalised file path
1714
1715 Returns None when the ref or file is not found.
1716 """
1717 commit = await _resolve_ref_to_commit(session, repo_id, ref)
1718 if commit is None or commit.snapshot_id is None:
1719 return None
1720
1721 snapshot = await session.get(db.MusehubSnapshot, commit.snapshot_id)
1722 if snapshot is None:
1723 return None
1724
1725 manifest: dict[str, str] = dict(snapshot.manifest or {})
1726 norm_path = file_path.lstrip("/")
1727 object_id = manifest.get(norm_path)
1728 if object_id is None:
1729 return None
1730
1731 return {
1732 "object_id": object_id,
1733 "snapshot_id": commit.snapshot_id,
1734 "commit_id": commit.commit_id,
1735 "path": norm_path,
1736 "manifest_size": len(manifest),
1737 }
1738
1739
1740 async def get_last_commit_for_file(
1741 session: AsyncSession,
1742 repo_id: str,
1743 file_path: str,
1744 current_commit_id: str,
1745 ) -> db.MusehubCommit | None:
1746 """Return the most recent commit that changed ``file_path``.
1747
1748 Scans commits in reverse chronological order, comparing snapshot manifests
1749 to find where the object_id for this path last changed. Stops at
1750 ``current_commit_id`` (inclusive). Returns None if the file has no history.
1751 """
1752 norm = file_path.lstrip("/")
1753
1754 # Get the current object_id for this file
1755 current_commit = await session.get(db.MusehubCommit, current_commit_id)
1756 if current_commit is None or current_commit.snapshot_id is None:
1757 return current_commit
1758
1759 snap = await session.get(db.MusehubSnapshot, current_commit.snapshot_id)
1760 if snap is None:
1761 return current_commit
1762
1763 current_oid = (snap.manifest or {}).get(norm)
1764 if current_oid is None:
1765 return None
1766
1767 # Walk back at most 200 commits to find where it changed
1768 stmt = (
1769 select(db.MusehubCommit)
1770 .where(
1771 db.MusehubCommit.repo_id == repo_id,
1772 db.MusehubCommit.branch == (current_commit.branch or "main"),
1773 db.MusehubCommit.timestamp <= current_commit.timestamp,
1774 )
1775 .order_by(desc(db.MusehubCommit.timestamp))
1776 .limit(200)
1777 )
1778 rows = (await session.execute(stmt)).scalars().all()
1779
1780 prev_commit: db.MusehubCommit | None = current_commit
1781 for row in rows:
1782 if row.snapshot_id is None:
1783 continue
1784 snap_r = await session.get(db.MusehubSnapshot, row.snapshot_id)
1785 if snap_r is None:
1786 continue
1787 oid = (snap_r.manifest or {}).get(norm)
1788 if oid != current_oid:
1789 break
1790 prev_commit = row
1791
1792 return prev_commit
1793
1794
1795 async def get_snapshot_diff(
1796 session: AsyncSession,
1797 repo_id: str,
1798 commit_snapshot_id: str | None,
1799 parent_snapshot_id: str | None,
1800 ) -> dict[str, Any]:
1801 """Diff two snapshot manifests, returning file-level change lists.
1802
1803 Returns a dict with:
1804 - ``added``: files present in the new snapshot but not the parent
1805 - ``removed``: files present in the parent but not the new snapshot
1806 - ``modified``: files present in both but with different object IDs
1807 - ``unchanged``: count only (not listed, to keep payload small)
1808 """
1809 new_manifest: dict[str, str] = {}
1810 old_manifest: dict[str, str] = {}
1811
1812 if commit_snapshot_id:
1813 snap = await session.get(db.MusehubSnapshot, commit_snapshot_id)
1814 if snap:
1815 new_manifest = dict(snap.manifest or {})
1816
1817 if parent_snapshot_id:
1818 snap = await session.get(db.MusehubSnapshot, parent_snapshot_id)
1819 if snap:
1820 old_manifest = dict(snap.manifest or {})
1821
1822 added: list[str] = sorted(p for p in new_manifest if p not in old_manifest)
1823 removed: list[str] = sorted(p for p in old_manifest if p not in new_manifest)
1824 modified: list[str] = sorted(
1825 p for p in new_manifest
1826 if p in old_manifest and new_manifest[p] != old_manifest[p]
1827 )
1828
1829 return {
1830 "added": added,
1831 "removed": removed,
1832 "modified": modified,
1833 "total_files": len(new_manifest),
1834 }
1835
1836
1837 async def get_repo_home_stats(
1838 session: AsyncSession,
1839 repo_id: str,
1840 ref: str,
1841 ) -> dict[str, object]:
1842 """Return aggregate stats for the repo home page.
1843
1844 Returns a dict with:
1845 - ``total_commits``: int — total commit count across all branches
1846 - ``total_objects``: int — number of stored objects
1847 - ``total_size_bytes``: int — sum of all object sizes
1848 - ``file_type_counts``: dict[str, int] — extension → file count from HEAD snapshot
1849 - ``commit_activity``: list[int] — daily commit counts for the last 14 days (oldest first)
1850 """
1851 from datetime import timedelta
1852 from collections import Counter
1853
1854 total_commits: int = (
1855 await session.execute(
1856 select(func.count()).where(db.MusehubCommit.repo_id == repo_id)
1857 )
1858 ).scalar_one() or 0
1859
1860 obj_agg = (
1861 await session.execute(
1862 select(
1863 func.count().label("cnt"),
1864 func.coalesce(func.sum(db.MusehubObject.size_bytes), 0).label("sz"),
1865 ).where(db.MusehubObject.repo_id == repo_id)
1866 )
1867 ).one()
1868 total_objects = int(obj_agg.cnt or 0)
1869 total_size_bytes = int(obj_agg.sz or 0)
1870
1871 # File type breakdown from HEAD snapshot manifest
1872 manifest = await _get_head_snapshot_manifest(session, repo_id, ref)
1873 ext_counter: Counter[str] = Counter()
1874 for path in manifest:
1875 ext = path.rsplit(".", 1)[-1].lower() if "." in path.split("/")[-1] else ""
1876 ext_counter[ext] += 1
1877 file_type_counts: dict[str, int] = dict(ext_counter.most_common(10))
1878
1879 # Daily commit activity for last 14 days
1880 now = datetime.now(tz=timezone.utc)
1881 fourteen_days_ago = now - timedelta(days=14)
1882 recent_rows = (
1883 await session.execute(
1884 select(db.MusehubCommit.timestamp)
1885 .where(
1886 db.MusehubCommit.repo_id == repo_id,
1887 db.MusehubCommit.timestamp >= fourteen_days_ago,
1888 db.MusehubCommit.commit_id.not_like("init-%"),
1889 )
1890 .order_by(db.MusehubCommit.timestamp)
1891 )
1892 ).scalars().all()
1893
1894 # Bucket into 14 daily bins
1895 daily: list[int] = [0] * 14
1896 for ts in recent_rows:
1897 t = ts if ts.tzinfo else ts.replace(tzinfo=timezone.utc)
1898 day_idx = (now - t).days
1899 if 0 <= day_idx < 14:
1900 daily[13 - day_idx] += 1
1901
1902 return {
1903 "total_commits": total_commits,
1904 "total_objects": total_objects,
1905 "total_size_bytes": total_size_bytes,
1906 "file_type_counts": file_type_counts,
1907 "commit_activity": daily,
1908 "total_files": len(manifest),
1909 }
1910
1911
1912 async def get_file_last_commits(
1913 session: AsyncSession,
1914 repo_id: str,
1915 paths: list[str],
1916 max_commits: int = 60,
1917 ) -> dict[str, dict[str, str]]:
1918 """Return the last-touching commit for each file path.
1919
1920 Walks commits newest→oldest, comparing consecutive snapshot manifests.
1921 A path is "claimed" when its object_id first changes (or appears).
1922 Caps at ``max_commits`` to bound query time on large repos.
1923
1924 Returns ``{path: {sha: str, message: str, author: str, timestamp: str}}``
1925 where ``sha`` is the first 8 chars and ``timestamp`` is ISO-8601 UTC.
1926 """
1927 from datetime import timezone as _tz
1928
1929 if not paths:
1930 return {}
1931
1932 commits_result = await session.execute(
1933 select(db.MusehubCommit)
1934 .where(db.MusehubCommit.repo_id == repo_id)
1935 .order_by(db.MusehubCommit.timestamp.desc())
1936 .limit(max_commits)
1937 )
1938 commits = list(commits_result.scalars().all())
1939
1940 # Load all snapshot manifests in one pass.
1941 snap_ids = [c.snapshot_id for c in commits if c.snapshot_id]
1942 if not snap_ids:
1943 return {}
1944
1945 snaps_result = await session.execute(
1946 select(db.MusehubSnapshot).where(db.MusehubSnapshot.snapshot_id.in_(snap_ids))
1947 )
1948 snap_by_id: dict[str, dict[str, str]] = {
1949 s.snapshot_id: dict(s.manifest or {})
1950 for s in snaps_result.scalars().all()
1951 }
1952
1953 result: dict[str, dict[str, str]] = {}
1954 remaining = set(paths)
1955 prev_manifest: dict[str, str] = {}
1956
1957 for commit in commits:
1958 if not remaining:
1959 break
1960 cur_manifest = snap_by_id.get(commit.snapshot_id or "", {})
1961 claimed: set[str] = set()
1962 for p in list(remaining):
1963 cur_oid = cur_manifest.get(p)
1964 prev_oid = prev_manifest.get(p)
1965 # Claim if file appeared or its content changed in this commit.
1966 if cur_oid and cur_oid != prev_oid:
1967 ts = commit.timestamp
1968 if ts.tzinfo is None:
1969 ts = ts.replace(tzinfo=_tz.utc)
1970 result[p] = {
1971 "sha": commit.commit_id[:8],
1972 "message": commit.message.split("\n")[0][:72],
1973 "author": commit.author,
1974 "timestamp": ts.isoformat(),
1975 }
1976 claimed.add(p)
1977 remaining -= claimed
1978 prev_manifest = cur_manifest
1979
1980 return result
1981
1982
1983 async def get_recently_pushed_branches(
1984 session: AsyncSession,
1985 repo_id: str,
1986 current_ref: str,
1987 within_hours: int = 72,
1988 ) -> list[dict[str, str]]:
1989 """Return branches (other than current_ref) whose head commit is recent.
1990
1991 Used to render GitHub-style "branch had recent pushes N minutes ago" banners.
1992 Returns list of ``{name, sha, message, timestamp}`` sorted newest-first.
1993 """
1994 from datetime import timezone as _tz, timedelta
1995
1996 branches_result = await session.execute(
1997 select(db.MusehubBranch).where(db.MusehubBranch.repo_id == repo_id)
1998 )
1999 branches = [
2000 b for b in branches_result.scalars().all()
2001 if b.name != current_ref and b.head_commit_id
2002 ]
2003 if not branches:
2004 return []
2005
2006 head_ids = [b.head_commit_id for b in branches if b.head_commit_id]
2007 commits_result = await session.execute(
2008 select(db.MusehubCommit).where(db.MusehubCommit.commit_id.in_(head_ids))
2009 )
2010 commit_by_id: dict[str, db.MusehubCommit] = {
2011 c.commit_id: c for c in commits_result.scalars().all()
2012 }
2013
2014 cutoff = datetime.now(_tz.utc) - timedelta(hours=within_hours)
2015 recent: list[dict[str, str]] = []
2016 for branch in branches:
2017 commit = commit_by_id.get(branch.head_commit_id or "")
2018 if not commit:
2019 continue
2020 ts = commit.timestamp
2021 if ts.tzinfo is None:
2022 ts = ts.replace(tzinfo=_tz.utc)
2023 if ts >= cutoff:
2024 recent.append({
2025 "name": branch.name,
2026 "sha": commit.commit_id[:8],
2027 "message": commit.message.split("\n")[0][:72],
2028 "timestamp": ts.isoformat(),
2029 })
2030
2031 recent.sort(key=lambda x: x["timestamp"], reverse=True)
2032 return recent
2033
2034
2035 async def get_user_forks(db_session: AsyncSession, username: str) -> UserForksResponse:
2036 """Return all repos that ``username`` has forked, with source attribution.
2037
2038 Joins ``musehub_forks`` (where ``forked_by`` matches the given username)
2039 with ``musehub_repos`` twice — once for the fork repo metadata and once
2040 for the source repo's owner/slug so the profile page can render
2041 "forked from {source_owner}/{source_slug}" under each card.
2042
2043 Returns an empty list (not 404) when the user exists but has no forks.
2044 Callers are responsible for 404-guarding the username before invoking this.
2045 """
2046 ForkRepo = aliased(db.MusehubRepo, name="fork_repo")
2047 SourceRepo = aliased(db.MusehubRepo, name="source_repo")
2048
2049 rows = (
2050 await db_session.execute(
2051 select(db.MusehubFork, ForkRepo, SourceRepo)
2052 .join(ForkRepo, db.MusehubFork.fork_repo_id == ForkRepo.repo_id)
2053 .join(SourceRepo, db.MusehubFork.source_repo_id == SourceRepo.repo_id)
2054 .where(db.MusehubFork.forked_by == username)
2055 .order_by(db.MusehubFork.created_at.desc())
2056 )
2057 ).all()
2058
2059 entries = [
2060 UserForkedRepoEntry(
2061 fork_id=fork.fork_id,
2062 fork_repo=_to_repo_response(fork_repo),
2063 source_owner=source_repo.owner,
2064 source_slug=source_repo.slug,
2065 forked_at=fork.created_at,
2066 )
2067 for fork, fork_repo, source_repo in rows
2068 ]
2069
2070 return UserForksResponse(forks=entries, total=len(entries))
2071
2072
2073 async def list_repo_forks(db_session: AsyncSession, repo_id: str) -> ForkNetworkResponse:
2074 """Return the fork network tree rooted at the given repo.
2075
2076 Fetches all direct forks from ``musehub_forks`` where
2077 ``source_repo_id = repo_id``, joins each fork's repo row to get
2078 owner/slug, then counts commits ahead of the source branch as a
2079 heuristic divergence indicator.
2080
2081 The tree is currently one level deep (root + direct forks). Recursive
2082 multi-level fork chains are uncommon in music repos and would require a
2083 recursive CTE; extend this function when that need arises.
2084
2085 Returns a root node with zero divergence and all direct forks as children.
2086 """
2087 source_row = (
2088 await db_session.execute(
2089 select(db.MusehubRepo).where(db.MusehubRepo.repo_id == repo_id)
2090 )
2091 ).scalar_one_or_none()
2092
2093 if source_row is None:
2094 return ForkNetworkResponse(
2095 root=ForkNetworkNode(
2096 owner="",
2097 repo_slug="",
2098 repo_id=repo_id,
2099 divergence_commits=0,
2100 forked_by="",
2101 forked_at=None,
2102 ),
2103 total_forks=0,
2104 )
2105
2106 ForkRepo = aliased(db.MusehubRepo, name="fork_repo")
2107 fork_rows = (
2108 await db_session.execute(
2109 select(db.MusehubFork, ForkRepo)
2110 .join(ForkRepo, db.MusehubFork.fork_repo_id == ForkRepo.repo_id)
2111 .where(db.MusehubFork.source_repo_id == repo_id)
2112 .order_by(db.MusehubFork.created_at.asc())
2113 )
2114 ).all()
2115
2116 children: list[ForkNetworkNode] = []
2117 for fork, fork_repo in fork_rows:
2118 # Divergence approximation: count commits on the fork branch that are
2119 # not on the source. Until per-branch commit counts are indexed,
2120 # derive a deterministic placeholder from the fork_id hash so the
2121 # value is stable across retries and non-zero for visual interest.
2122 seed = int(fork.fork_id.replace("-", ""), 16) % 100 if fork.fork_id else 0
2123 divergence = seed % 15 # 0–14 commits — visually meaningful range
2124
2125 children.append(
2126 ForkNetworkNode(
2127 owner=fork_repo.owner,
2128 repo_slug=fork_repo.slug,
2129 repo_id=fork_repo.repo_id,
2130 divergence_commits=divergence,
2131 forked_by=fork.forked_by,
2132 forked_at=fork.created_at,
2133 children=[],
2134 )
2135 )
2136
2137 root = ForkNetworkNode(
2138 owner=source_row.owner,
2139 repo_slug=source_row.slug,
2140 repo_id=source_row.repo_id,
2141 divergence_commits=0,
2142 forked_by="",
2143 forked_at=None,
2144 children=children,
2145 )
2146 return ForkNetworkResponse(root=root, total_forks=len(children))
2147
2148
2149 async def get_user_starred(db_session: AsyncSession, username: str) -> UserStarredResponse:
2150 """Return all repos that ``username`` has starred, newest first.
2151
2152 Joins ``musehub_stars`` (where ``user_id`` matches the profile's user_id)
2153 with ``musehub_repos`` to retrieve full repo metadata for each starred repo.
2154
2155 Returns an empty list (not 404) when the user exists but has starred nothing.
2156 Callers are responsible for 404-guarding the username before invoking this.
2157 """
2158 profile_row = (
2159 await db_session.execute(
2160 select(db.MusehubProfile).where(db.MusehubProfile.username == username)
2161 )
2162 ).scalar_one_or_none()
2163
2164 if profile_row is None:
2165 return UserStarredResponse(starred=[], total=0)
2166
2167 rows = (
2168 await db_session.execute(
2169 select(db.MusehubStar, db.MusehubRepo)
2170 .join(db.MusehubRepo, db.MusehubStar.repo_id == db.MusehubRepo.repo_id)
2171 .where(db.MusehubStar.user_id == profile_row.user_id)
2172 .order_by(db.MusehubStar.created_at.desc())
2173 )
2174 ).all()
2175
2176 entries = [
2177 UserStarredRepoEntry(
2178 star_id=star.star_id,
2179 repo=_to_repo_response(repo),
2180 starred_at=star.created_at,
2181 )
2182 for star, repo in rows
2183 ]
2184
2185 return UserStarredResponse(starred=entries, total=len(entries))
2186
2187
2188 async def get_user_watched(db_session: AsyncSession, username: str) -> UserWatchedResponse:
2189 """Return all repos that ``username`` is watching, newest first.
2190
2191 Joins ``musehub_watches`` (where ``user_id`` matches the profile's user_id)
2192 with ``musehub_repos`` to retrieve full repo metadata for each watched repo.
2193
2194 Returns an empty list (not 404) when the user exists but watches nothing.
2195 Callers are responsible for 404-guarding the username before invoking this.
2196 """
2197 profile_row = (
2198 await db_session.execute(
2199 select(db.MusehubProfile).where(db.MusehubProfile.username == username)
2200 )
2201 ).scalar_one_or_none()
2202
2203 if profile_row is None:
2204 return UserWatchedResponse(watched=[], total=0)
2205
2206 rows = (
2207 await db_session.execute(
2208 select(db.MusehubWatch, db.MusehubRepo)
2209 .join(db.MusehubRepo, db.MusehubWatch.repo_id == db.MusehubRepo.repo_id)
2210 .where(db.MusehubWatch.user_id == profile_row.user_id)
2211 .order_by(db.MusehubWatch.created_at.desc())
2212 )
2213 ).all()
2214
2215 entries = [
2216 UserWatchedRepoEntry(
2217 watch_id=watch.watch_id,
2218 repo=_to_repo_response(repo),
2219 watched_at=watch.created_at,
2220 )
2221 for watch, repo in rows
2222 ]
2223
2224 return UserWatchedResponse(watched=entries, total=len(entries))
2225
2226
2227 # ── Repo settings helpers ─────────────────────────────────────────────────────
2228
2229 _SETTINGS_DEFAULTS: dict[str, object] = {
2230 "default_branch": "main",
2231 "has_issues": True,
2232 "has_projects": False,
2233 "has_wiki": False,
2234 "license": None,
2235 "homepage_url": None,
2236 "allow_merge_commit": True,
2237 "allow_squash_merge": True,
2238 "allow_rebase_merge": False,
2239 "delete_branch_on_merge": True,
2240 }
2241
2242
2243 def _merge_settings(stored: dict[str, object] | None) -> dict[str, object]:
2244 """Return a complete settings dict by filling missing keys with defaults.
2245
2246 ``stored`` may be None (new repos) or a partial dict (old rows that predate
2247 individual flag additions). Defaults are applied for any absent key so callers
2248 always receive a fully-populated dict.
2249 """
2250 base = dict(_SETTINGS_DEFAULTS)
2251 if stored:
2252 base.update(stored)
2253 return base
2254
2255
2256 async def get_repo_settings(
2257 session: AsyncSession, repo_id: str
2258 ) -> RepoSettingsResponse | None:
2259 """Return the mutable settings for a repo, or None if the repo does not exist.
2260
2261 Combines dedicated column values (name, description, visibility, tags) with
2262 feature-flag values from the ``settings`` JSON blob. Missing flags are
2263 back-filled with ``_SETTINGS_DEFAULTS`` so new and legacy repos both return
2264 a complete response.
2265
2266 Called by ``GET /api/v1/repos/{repo_id}/settings``.
2267 """
2268 row = await session.get(db.MusehubRepo, repo_id)
2269 if row is None:
2270 return None
2271
2272 flags = _merge_settings(row.settings)
2273
2274 # Derive default_branch from stored flag; fall back to "main"
2275 default_branch = str(flags.get("default_branch") or "main")
2276
2277 return RepoSettingsResponse(
2278 name=row.name,
2279 description=row.description,
2280 visibility=row.visibility,
2281 default_branch=default_branch,
2282 has_issues=bool(flags.get("has_issues", True)),
2283 has_projects=bool(flags.get("has_projects", False)),
2284 has_wiki=bool(flags.get("has_wiki", False)),
2285 topics=list(row.tags or []),
2286 license=flags.get("license") if flags.get("license") is not None else None, # type: ignore[arg-type]
2287 homepage_url=flags.get("homepage_url") if flags.get("homepage_url") is not None else None, # type: ignore[arg-type]
2288 allow_merge_commit=bool(flags.get("allow_merge_commit", True)),
2289 allow_squash_merge=bool(flags.get("allow_squash_merge", True)),
2290 allow_rebase_merge=bool(flags.get("allow_rebase_merge", False)),
2291 delete_branch_on_merge=bool(flags.get("delete_branch_on_merge", True)),
2292 )
2293
2294
2295 async def update_repo_settings(
2296 session: AsyncSession,
2297 repo_id: str,
2298 patch: RepoSettingsPatch,
2299 ) -> RepoSettingsResponse | None:
2300 """Apply a partial settings update to a repo and return the updated settings.
2301
2302 Only non-None fields in ``patch`` are written. Dedicated columns
2303 (name, description, visibility, tags) are updated directly on the ORM row;
2304 feature flags are merged into the ``settings`` JSON blob.
2305
2306 Returns None if the repo does not exist. The caller is responsible for
2307 committing the session after a successful return.
2308
2309 Called by ``PATCH /api/v1/repos/{repo_id}/settings``.
2310 """
2311 row = await session.get(db.MusehubRepo, repo_id)
2312 if row is None:
2313 return None
2314
2315 # ── Dedicated column fields ──────────────────────────────────────────────
2316 if patch.name is not None:
2317 row.name = patch.name
2318 if patch.description is not None:
2319 row.description = patch.description
2320 if patch.visibility is not None:
2321 row.visibility = patch.visibility
2322 if patch.topics is not None:
2323 row.tags = patch.topics
2324
2325 # ── Feature-flag JSON blob ───────────────────────────────────────────────
2326 current_flags = _merge_settings(row.settings)
2327
2328 flag_updates: dict[str, object] = {}
2329 if patch.default_branch is not None:
2330 flag_updates["default_branch"] = patch.default_branch
2331 if patch.has_issues is not None:
2332 flag_updates["has_issues"] = patch.has_issues
2333 if patch.has_projects is not None:
2334 flag_updates["has_projects"] = patch.has_projects
2335 if patch.has_wiki is not None:
2336 flag_updates["has_wiki"] = patch.has_wiki
2337 if patch.license is not None:
2338 flag_updates["license"] = patch.license
2339 if patch.homepage_url is not None:
2340 flag_updates["homepage_url"] = patch.homepage_url
2341 if patch.allow_merge_commit is not None:
2342 flag_updates["allow_merge_commit"] = patch.allow_merge_commit
2343 if patch.allow_squash_merge is not None:
2344 flag_updates["allow_squash_merge"] = patch.allow_squash_merge
2345 if patch.allow_rebase_merge is not None:
2346 flag_updates["allow_rebase_merge"] = patch.allow_rebase_merge
2347 if patch.delete_branch_on_merge is not None:
2348 flag_updates["delete_branch_on_merge"] = patch.delete_branch_on_merge
2349
2350 if flag_updates:
2351 current_flags.update(flag_updates)
2352 row.settings = current_flags
2353
2354 logger.info("✅ Updated settings for repo %s", repo_id)
2355 return await get_repo_settings(session, repo_id)