musehub/services/musehub_repository.py · gabriel/musehub

1

"""MuseHub persistence adapter — single point of DB access for Hub entities.

2

3

This module is the ONLY place that touches the musehub_* tables.

4

Route handlers delegate here; no business logic lives in routes.

5

6

Boundary rules:

7

- Must NOT import state stores, SSE queues, or LLM clients.

8

- Must NOT import musehub.core.* modules.

9

- May import ORM models from musehub.db.musehub_models.

10

- May import Pydantic response models from musehub.models.musehub.

11

"""

12

from datetime import datetime, timezone

import logging

import re

from collections import deque

17

from typing import Any

18

19

from sqlalchemy import desc, func, or_, select

20

from sqlalchemy.ext.asyncio import AsyncSession

21

from sqlalchemy.orm import aliased

22

from sqlalchemy.sql.elements import ColumnElement

23

24

from musehub.db import musehub_models as db

25

from musehub.db import musehub_collaborator_models as collab_db

26

from musehub.models.musehub import (

27

SessionListResponse,

28

SessionResponse,

29

BranchDetailListResponse,

30

BranchDetailResponse,

31

BranchDivergenceScores,

32

BranchResponse,

33

CommitResponse,

34

GlobalSearchCommitMatch,

35

GlobalSearchRepoGroup,

GlobalSearchResult,

DagEdge,

DagGraphResponse,

DagNode,

InstrumentInfo,

MuseHubContextCommitInfo,

42

MuseHubContextHistoryEntry,

43

MuseHubContextMusicalState,

44

MuseHubContextResponse,

ObjectMetaResponse,

RepoListResponse,

RepoResponse,

RepoSettingsPatch,

RepoSettingsResponse,

50

ScoreMetaInfo,

51

TimelineCommitEvent,

52

TimelineEmotionEvent,

53

TimelineResponse,

54

TimelineSectionEvent,

TimelineTrackEvent,

TrackInfo,

TreeEntryResponse,

TreeListResponse,

ForkNetworkNode,

ForkNetworkResponse,

UserForkedRepoEntry,

UserForksResponse,

UserStarredRepoEntry,

64

UserStarredResponse,

65

UserWatchedRepoEntry,

UserWatchedResponse,

)

logger = logging.getLogger(__name__)

70

71

72

def _generate_slug(name: str) -> str:

73

"""Derive a URL-safe slug from a human-readable repo name.

74

75

Rules: lowercase, non-alphanumeric chars collapsed to single hyphens,

76

leading/trailing hyphens stripped, max 64 chars. If the result is empty

77

(e.g. name was all symbols) we fall back to "repo".

78

"""

79

slug = name.lower()

80

slug = re.sub(r"[^a-z0-9]+", "-", slug)

81

slug = slug.strip("-")

82

slug = slug[:64].strip("-")

83

return slug or "repo"

84

85

86

def _repo_clone_url(owner: str, slug: str) -> str:

87

"""Derive the canonical clone URL from owner and slug.

88

89

Uses the musehub://{owner}/{slug} scheme — the DAW's native protocol handler

90

resolves this to the correct API base at runtime. No internal UUID is exposed

91

in external URLs.

92

"""

93

return f"musehub://{owner}/{slug}"

94

95

96

def _to_repo_response(row: db.MusehubRepo) -> RepoResponse:

return RepoResponse(

repo_id=row.repo_id,

name=row.name,

owner=row.owner,

slug=row.slug,

visibility=row.visibility,

103

owner_user_id=row.owner_user_id,

104

clone_url=_repo_clone_url(row.owner, row.slug),

105

description=row.description,

106

tags=list(row.tags or []),

107

key_signature=row.key_signature,

108

tempo_bpm=row.tempo_bpm,

109

domain_id=getattr(row, "domain_id", None),

110

created_at=row.created_at,

)

def _to_branch_response(row: db.MusehubBranch) -> BranchResponse:

115

return BranchResponse(

116

branch_id=row.branch_id,

117

name=row.name,

118

head_commit_id=row.head_commit_id,

)

def _to_commit_response(row: db.MusehubCommit) -> CommitResponse:

123

return CommitResponse(

124

commit_id=row.commit_id,

125

branch=row.branch,

126

parent_ids=list(row.parent_ids or []),

127

message=row.message,

128

author=row.author,

129

timestamp=row.timestamp,

130

snapshot_id=row.snapshot_id,

)

async def create_repo(

135

session: AsyncSession,

*,

name: str,

owner: str,

visibility: str,

owner_user_id: str,

description: str = "",

142

tags: list[str] | None = None,

143

key_signature: str | None = None,

144

tempo_bpm: int | None = None,

145

# ── Wizard extensions ────────────────────────────────────────

146

license: str | None = None,

147

topics: list[str] | None = None,

148

initialize: bool = False,

149

default_branch: str = "main",

150

template_repo_id: str | None = None,

151

) -> RepoResponse:

152

"""Persist a new remote repo and return its wire representation.

153

154

``slug`` is auto-generated from ``name``. The ``(owner, slug)`` pair must

155

be unique — callers should catch ``IntegrityError`` and surface a 409.

156

157

Wizard behaviors:

158

- When ``template_repo_id`` is set, the template's description and topics

159

are copied into the new repo (template must be public; silently skipped

160

when it doesn't exist or is private).

161

- When ``initialize=True``, an empty "Initial commit" is written plus the

162

default branch pointer so the repo is immediately browsable.

163

- ``license`` is stored in the settings JSON blob under the ``license`` key.

164

- ``topics`` are merged with ``tags`` into a single unified tag list.

165

"""

166

# Merge topics into tags (deduplicated, stable order).

167

combined_tags: list[str] = list(dict.fromkeys((tags or []) + (topics or [])))

168

169

# Copy template metadata when a template repo is supplied.

170

if template_repo_id is not None:

171

tmpl = await session.get(db.MusehubRepo, template_repo_id)

172

if tmpl is not None and tmpl.visibility == "public":

173

if not description:

174

description = tmpl.description

175

# Prepend template tags; deduplicate preserving order.

176

combined_tags = list(dict.fromkeys(list(tmpl.tags or []) + combined_tags))

177

178

# Build the settings JSON blob with optional license field.

179

settings: dict[str, object] = {}

180

if license is not None:

181

settings["license"] = license

182

183

slug = _generate_slug(name)

184

repo = db.MusehubRepo(

name=name,

owner=owner,

slug=slug,

visibility=visibility,

189

owner_user_id=owner_user_id,

190

description=description,

191

tags=combined_tags,

192

key_signature=key_signature,

193

tempo_bpm=tempo_bpm,

194

settings=settings or None,

195

)

196

session.add(repo)

197

await session.flush() # populate default columns before reading

198

await session.refresh(repo)

199

200

# Wizard initialisation: create default branch + empty initial commit.

201

if initialize:

202

init_commit_id = f"init-{repo.repo_id[:8]}"

203

now = datetime.now(tz=timezone.utc)

204

205

branch = db.MusehubBranch(

206

repo_id=repo.repo_id,

207

name=default_branch,

208

head_commit_id=init_commit_id,

)

session.add(branch)

init_commit = db.MusehubCommit(

213

commit_id=init_commit_id,

214

repo_id=repo.repo_id,

215

branch=default_branch,

216

parent_ids=[],

217

message="Initial commit",

218

author=owner_user_id,

219

timestamp=now,

220

)

221

session.add(init_commit)

222

await session.flush()

223

224

logger.info(

225

"✅ Created MuseHub repo %s (%s/%s) for user %s (initialize=%s)",

226

repo.repo_id, owner, slug, owner_user_id, initialize,

227

)

228

return _to_repo_response(repo)

229

230

231

async def get_repo(session: AsyncSession, repo_id: str) -> RepoResponse | None:

232

"""Return repo metadata by internal UUID, or None if not found or soft-deleted."""

233

result = await session.get(db.MusehubRepo, repo_id)

234

if result is None or result.deleted_at is not None:

235

return None

236

return _to_repo_response(result)

237

238

239

async def delete_repo(session: AsyncSession, repo_id: str) -> bool:

240

"""Soft-delete a repo by recording its deletion timestamp.

241

242

Returns True when the repo existed and was deleted; False when the repo

243

was not found or had already been soft-deleted. The caller is responsible

244

for committing the session.

245

"""

246

row = await session.get(db.MusehubRepo, repo_id)

247

if row is None or row.deleted_at is not None:

248

return False

249

row.deleted_at = datetime.now(timezone.utc)

250

await session.flush()

251

logger.info("✅ Soft-deleted MuseHub repo %s", repo_id)

return True

async def transfer_repo_ownership(

256

session: AsyncSession, repo_id: str, new_owner_user_id: str

257

) -> RepoResponse | None:

258

"""Transfer repo ownership to a new user.

259

260

Only touches ``owner_user_id`` — the public ``owner`` username slug is

261

intentionally NOT changed here; the owner username update (if desired) is a

262

settings-level change the new owner makes separately.

263

264

Returns the updated RepoResponse, or None when the repo is not found or

265

has been soft-deleted. The caller is responsible for committing the session.

266

"""

267

row = await session.get(db.MusehubRepo, repo_id)

268

if row is None or row.deleted_at is not None:

269

return None

270

row.owner_user_id = new_owner_user_id

271

await session.flush()

272

await session.refresh(row)

273

logger.info("✅ Transferred MuseHub repo %s ownership to user %s", repo_id, new_owner_user_id)

274

return _to_repo_response(row)

275

276

277

async def get_repo_row_by_owner_slug(

278

session: AsyncSession, owner: str, slug: str

279

) -> db.MusehubRepo | None:

280

"""Return the raw ORM row for owner/slug, or None if not found.

281

282

Use this when you need access to internal fields (e.g. ``owner_user_id``,

283

``visibility``) that are not exposed by :class:`RepoResponse`.

284

"""

285

stmt = select(db.MusehubRepo).where(

286

db.MusehubRepo.owner == owner,

287

db.MusehubRepo.slug == slug,

288

)

289

return (await session.execute(stmt)).scalars().first()

290

291

292

async def get_repo_by_owner_slug(

293

session: AsyncSession, owner: str, slug: str

294

) -> RepoResponse | None:

295

"""Return repo metadata by owner+slug canonical URL pair, or None if not found.

296

297

This is the primary resolver for all external /{owner}/{slug} routes.

298

"""

299

row = await get_repo_row_by_owner_slug(session, owner, slug)

300

if row is None:

301

return None

302

return _to_repo_response(row)

_PAGE_SIZE = 20

async def list_repos_for_user(

309

session: AsyncSession,

310

user_id: str,

311

*,

312

limit: int = _PAGE_SIZE,

313

cursor: str | None = None,

314

) -> RepoListResponse:

315

"""Return repos owned by or collaborated on by ``user_id``.

316

317

Results are ordered by ``created_at`` descending (newest first). Pagination

318

uses an opaque cursor encoding the ``created_at`` ISO timestamp of the last

319

item on the current page — pass it back as ``?cursor=`` to advance.

320

321

Args:

322

session: Active async DB session.

323

user_id: JWT ``sub`` of the authenticated caller.

324

limit: Maximum repos per page (default 20).

325

cursor: Opaque pagination cursor from a previous response.

326

327

Returns:

328

``RepoListResponse`` with the page of repos, total count, and next cursor.

329

"""

330

# Collect repo IDs the user collaborates on (accepted invitations only).

331

collab_stmt = select(collab_db.MusehubCollaborator.repo_id).where(

332

collab_db.MusehubCollaborator.user_id == user_id,

333

collab_db.MusehubCollaborator.accepted_at.is_not(None),

334

)

335

collab_repo_ids_result = (await session.execute(collab_stmt)).scalars().all()

336

collab_repo_ids = list(collab_repo_ids_result)

337

338

# Base filter: repos the caller owns OR collaborates on.

339

base_filter = or_(

340

db.MusehubRepo.owner_user_id == user_id,

341

db.MusehubRepo.repo_id.in_(collab_repo_ids),

342

)

343

344

# Total count across all pages.

345

count_stmt = select(func.count()).select_from(db.MusehubRepo).where(base_filter)

346

total: int = (await session.execute(count_stmt)).scalar_one()

347

348

# Apply cursor: skip repos created at or after the cursor timestamp.

349

page_filter = base_filter

350

if cursor is not None:

351

try:

352

cursor_dt = datetime.fromisoformat(cursor)

353

page_filter = base_filter & (db.MusehubRepo.created_at < cursor_dt)

354

except ValueError:

355

pass # malformed cursor — ignore and return from the beginning

356

357

stmt = (

358

select(db.MusehubRepo)

359

.where(page_filter)

360

.order_by(desc(db.MusehubRepo.created_at))

361

.limit(limit)

362

)

363

rows = (await session.execute(stmt)).scalars().all()

364

repos = [_to_repo_response(r) for r in rows]

365

366

# Build next cursor from the last item's created_at when there may be more.

367

next_cursor: str | None = None

368

if len(rows) == limit:

369

next_cursor = rows[-1].created_at.isoformat()

370

371

return RepoListResponse(repos=repos, next_cursor=next_cursor, total=total)

372

373

374

async def get_repo_orm_by_owner_slug(

375

session: AsyncSession, owner: str, slug: str

376

) -> db.MusehubRepo | None:

377

"""Return the raw ORM repo row by owner+slug, or None if not found.

378

379

Used internally when the route needs the repo_id for downstream calls.

380

"""

381

stmt = select(db.MusehubRepo).where(

382

db.MusehubRepo.owner == owner,

383

db.MusehubRepo.slug == slug,

384

)

385

return (await session.execute(stmt)).scalars().first()

386

387

388

async def list_branches(session: AsyncSession, repo_id: str) -> list[BranchResponse]:

389

"""Return all branches for a repo, ordered by name."""

390

stmt = (

391

select(db.MusehubBranch)

392

.where(db.MusehubBranch.repo_id == repo_id)

393

.order_by(db.MusehubBranch.name)

394

)

395

rows = (await session.execute(stmt)).scalars().all()

396

return [_to_branch_response(r) for r in rows]

397

398

399

async def list_branches_with_detail(

400

session: AsyncSession, repo_id: str

401

) -> BranchDetailListResponse:

402

"""Return branches enriched with ahead/behind counts vs the default branch.

403

404

The default branch is whichever branch is named "main"; if no "main" branch

405

exists, the first branch alphabetically is used. Ahead/behind counts are

406

computed by comparing the set of commit IDs on each branch vs the default

407

branch — a set-difference approximation suitable for display purposes.

408

409

Musical divergence scores are not yet computable server-side (they require

410

audio snapshots), so all divergence fields are returned as ``None`` (placeholder).

411

"""

412

branch_stmt = (

413

select(db.MusehubBranch)

414

.where(db.MusehubBranch.repo_id == repo_id)

415

.order_by(db.MusehubBranch.name)

416

)

417

branch_rows = (await session.execute(branch_stmt)).scalars().all()

418

if not branch_rows:

419

return BranchDetailListResponse(branches=[], default_branch="main")

420

421

# Determine default branch name: prefer "main", fall back to first alphabetically.

422

branch_names = [r.name for r in branch_rows]

423

default_branch_name = "main" if "main" in branch_names else branch_names[0]

424

425

# Load commit IDs per branch in one query.

426

commit_stmt = select(db.MusehubCommit.commit_id, db.MusehubCommit.branch).where(

427

db.MusehubCommit.repo_id == repo_id

428

)

429

commit_rows = (await session.execute(commit_stmt)).all()

430

commits_by_branch: dict[str, set[str]] = {}

431

for commit_id, branch_name in commit_rows:

432

commits_by_branch.setdefault(branch_name, set()).add(commit_id)

433

434

default_commits: set[str] = commits_by_branch.get(default_branch_name, set())

435

436

results: list[BranchDetailResponse] = []

437

for row in branch_rows:

438

is_default = row.name == default_branch_name

439

branch_commits: set[str] = commits_by_branch.get(row.name, set())

440

ahead = len(branch_commits - default_commits) if not is_default else 0

441

behind = len(default_commits - branch_commits) if not is_default else 0

442

results.append(

443

BranchDetailResponse(

444

branch_id=row.branch_id,

445

name=row.name,

446

head_commit_id=row.head_commit_id,

447

is_default=is_default,

448

ahead_count=ahead,

449

behind_count=behind,

450

divergence=BranchDivergenceScores(

451

melodic=None, harmonic=None, rhythmic=None, structural=None, dynamic=None

),

)

)

return BranchDetailListResponse(branches=results, default_branch=default_branch_name)

457

458

459

def _to_object_meta_response(row: db.MusehubObject) -> ObjectMetaResponse:

460

return ObjectMetaResponse(

461

object_id=row.object_id,

462

path=row.path,

463

size_bytes=row.size_bytes,

464

created_at=row.created_at,

)

async def get_commit(

469

session: AsyncSession, repo_id: str, commit_id: str

470

) -> CommitResponse | None:

471

"""Return a single commit by ID, or None if not found in this repo."""

472

stmt = (

473

select(db.MusehubCommit)

474

.where(

475

db.MusehubCommit.repo_id == repo_id,

476

db.MusehubCommit.commit_id == commit_id,

477

)

478

)

479

row = (await session.execute(stmt)).scalars().first()

480

if row is None:

481

return None

482

return _to_commit_response(row)

483

484

485

async def list_objects(

486

session: AsyncSession, repo_id: str

487

) -> list[ObjectMetaResponse]:

488

"""Return all object metadata for a repo (no binary content), ordered by path."""

489

stmt = (

490

select(db.MusehubObject)

491

.where(db.MusehubObject.repo_id == repo_id)

492

.order_by(db.MusehubObject.path)

493

)

494

rows = (await session.execute(stmt)).scalars().all()

495

return [_to_object_meta_response(r) for r in rows]

496

497

498

async def get_object_row(

499

session: AsyncSession, repo_id: str, object_id: str

500

) -> db.MusehubObject | None:

501

"""Return the raw ORM object row for content delivery, or None if not found.

502

503

Route handlers use this to stream the file from ``disk_path``.

504

"""

505

stmt = (

506

select(db.MusehubObject)

507

.where(

508

db.MusehubObject.repo_id == repo_id,

509

db.MusehubObject.object_id == object_id,

510

)

511

)

512

return (await session.execute(stmt)).scalars().first()

513

514

515

async def get_object_by_path(

516

session: AsyncSession, repo_id: str, path: str

517

) -> db.MusehubObject | None:

518

"""Return the most-recently-created object matching ``path`` in a repo.

519

520

Used by the raw file endpoint to resolve a human-readable path

521

(e.g. ``tracks/bass.mid``) to the stored artifact on disk. When

522

multiple objects share the same path (re-pushed content), the newest

523

one wins — consistent with Git's ref semantics where HEAD always

524

points at the latest version.

525

526

Args:

527

session: Active async DB session.

528

repo_id: UUID of the target repo.

529

path: Client-supplied relative file path, e.g. ``tracks/bass.mid``.

530

531

Returns:

532

The matching ORM row, or ``None`` if no object with that path exists.

533

"""

534

stmt = (

535

select(db.MusehubObject)

536

.where(

537

db.MusehubObject.repo_id == repo_id,

538

db.MusehubObject.path == path,

539

)

540

.order_by(desc(db.MusehubObject.created_at))

541

.limit(1)

542

)

543

return (await session.execute(stmt)).scalars().first()

544

545

546

def _instrument_name_from_path(path: str) -> str:

547

"""Derive a human-readable instrument name from a MIDI object path.

548

549

Strips directory components and the file extension, then title-cases

550

the result. ``tracks/bass.mid`` → ``"Bass"``. Falls back to the

551

bare filename when the stem is empty.

552

"""

553

stem = path.split("/")[-1].rsplit(".", 1)[0]

554

return stem.title() or path

555

556

557

async def get_track_info(

558

session: AsyncSession,

559

repo_id: str,

560

path: str,

561

) -> TrackInfo | None:

562

"""Return SSR metadata for a single MIDI track identified by path.

563

564

Fetches the most-recently-created object matching ``path`` and converts

565

its DB metadata to a :class:`TrackInfo` suitable for template rendering.

566

``duration_sec`` and ``track_count`` are left as ``None`` because MIDI

567

parsing is client-side only in the current architecture.

568

569

Returns ``None`` when no object with that path exists.

570

"""

571

obj = await get_object_by_path(session, repo_id, path)

if obj is None:

return None

return TrackInfo(

name=_instrument_name_from_path(obj.path),

576

size_bytes=obj.size_bytes,

duration_sec=None,

track_count=None,

)

async def get_instruments_for_repo(

583

session: AsyncSession,

584

repo_id: str,

585

) -> list[InstrumentInfo]:

586

"""Return a list of instrument lane descriptors for a repo.

587

588

Scans all MIDI objects (``*.mid`` / ``*.midi``) in the repo and derives

589

:class:`InstrumentInfo` from their stored path. The ``channel`` field

590

is the zero-based render order (objects sorted by path for consistency).

591

``gm_program`` is ``None`` because GM resolution requires MIDI parsing

592

which is handled client-side.

593

594

Returns an empty list when the repo has no MIDI objects.

595

"""

596

stmt = (

597

select(db.MusehubObject)

598

.where(

599

db.MusehubObject.repo_id == repo_id,

600

or_(

601

db.MusehubObject.path.like("%.mid"),

602

db.MusehubObject.path.like("%.midi"),

603

),

604

)

605

.order_by(db.MusehubObject.path)

606

)

607

rows = (await session.execute(stmt)).scalars().all()

608

return [

609

InstrumentInfo(

610

name=_instrument_name_from_path(row.path),

channel=idx,

gm_program=None,

)

for idx, row in enumerate(rows)

]

async def get_score_meta_for_repo(

619

session: AsyncSession,

repo_id: str,

path: str,

) -> ScoreMetaInfo:

"""Return SSR metadata for the score page header.

624

625

Derives the score title from the requested path; all musical fields

626

(``key``, ``meter``, ``composer``, ``instrument_count``) are ``None``

627

until server-side MIDI/ABC parsing is implemented. The score page

628

template renders these fields conditionally.

629

630

Always returns a :class:`ScoreMetaInfo` — even for unknown paths — so

631

the template can render a valid (if sparse) header.

632

"""

633

title = _instrument_name_from_path(path) if path else "Score"

stmt = (

select(func.count())

.where(

db.MusehubObject.repo_id == repo_id,

638

or_(

639

db.MusehubObject.path.like("%.mid"),

640

db.MusehubObject.path.like("%.midi"),

),

)

)

midi_count: int = (await session.execute(stmt)).scalar_one()

645

return ScoreMetaInfo(

title=title,

composer=None,

key=None,

meter=None,

instrument_count=midi_count if midi_count > 0 else None,

)

async def list_commits(

655

session: AsyncSession,

656

repo_id: str,

657

*,

658

branch: str | None = None,

659

limit: int = 50,

660

offset: int = 0,

661

) -> tuple[list[CommitResponse], int]:

662

"""Return commits for a repo, newest first, optionally filtered by branch.

663

664

Supports offset-based pagination via ``offset``.

665

Returns a tuple of (commits, total_count).

666

"""

667

base = select(db.MusehubCommit).where(db.MusehubCommit.repo_id == repo_id)

668

if branch:

669

base = base.where(db.MusehubCommit.branch == branch)

670

671

total_stmt = select(func.count()).select_from(base.subquery())

672

total: int = (await session.execute(total_stmt)).scalar_one()

673

674

rows_stmt = base.order_by(desc(db.MusehubCommit.timestamp)).offset(offset).limit(limit)

675

rows = (await session.execute(rows_stmt)).scalars().all()

676

return [_to_commit_response(r) for r in rows], total

677

678

679

# ── Section / track keyword heuristics ──────────────────────────────────────

680

681

_SECTION_KEYWORDS: list[str] = [

682

"intro", "verse", "chorus", "bridge", "outro", "hook",

683

"pre-chorus", "prechorus", "breakdown", "drop", "build",

684

"refrain", "coda", "tag", "interlude",

685

]

686

687

_TRACK_KEYWORDS: list[str] = [

688

"bass", "drums", "keys", "piano", "guitar", "synth", "pad",

689

"lead", "vocals", "strings", "brass", "horn",

690

"flute", "cello", "violin", "organ", "arp", "percussion",

691

"kick", "snare", "hi-hat", "hihat", "clap", "melody",

692

]

693

694

_ADDED_VERBS = re.compile(

re.IGNORECASE,

)

_REMOVED_VERBS = re.compile(

699

r"\b(remov(?:e[ds]?)?|delet(?:e[ds]?)?|drop(?:ped)?|cut|mute[ds]?)\b",

re.IGNORECASE,

)

def _infer_action(message: str) -> str:

705

"""Return 'added' or 'removed' based on verb presence in the commit message."""

706

if _REMOVED_VERBS.search(message):

return "removed"

return "added"

def _extract_section_events(row: db.MusehubCommit) -> list[TimelineSectionEvent]:

712

"""Extract zero or more section-change events from a commit message."""

713

msg_lower = row.message.lower()

714

events: list[TimelineSectionEvent] = []

715

for keyword in _SECTION_KEYWORDS:

716

if keyword in msg_lower:

717

events.append(

718

TimelineSectionEvent(

719

commit_id=row.commit_id,

720

timestamp=row.timestamp,

721

section_name=keyword,

722

action=_infer_action(row.message),

)

)

return events

def _extract_track_events(row: db.MusehubCommit) -> list[TimelineTrackEvent]:

729

"""Extract zero or more track-change events from a commit message."""

730

msg_lower = row.message.lower()

731

events: list[TimelineTrackEvent] = []

732

for keyword in _TRACK_KEYWORDS:

733

if keyword in msg_lower:

734

events.append(

735

TimelineTrackEvent(

736

commit_id=row.commit_id,

737

timestamp=row.timestamp,

738

track_name=keyword,

739

action=_infer_action(row.message),

)

)

return events

def _derive_emotion(row: db.MusehubCommit) -> TimelineEmotionEvent:

746

"""Derive a deterministic emotion vector from the commit SHA.

747

748

Uses three non-overlapping byte windows of the SHA hex to produce

749

valence, energy, and tension in [0.0, 1.0]. Deterministic so the

750

timeline is always reproducible without external ML inference.

751

"""

752

sha = row.commit_id

753

# Pad short commit IDs (e.g. test fixtures) so indexing is safe.

754

sha = sha.ljust(12, "0")

755

valence = int(sha[0:4], 16) / 0xFFFF if all(c in "0123456789abcdefABCDEF" for c in sha[0:4]) else 0.5

756

energy = int(sha[4:8], 16) / 0xFFFF if all(c in "0123456789abcdefABCDEF" for c in sha[4:8]) else 0.5

757

tension = int(sha[8:12], 16) / 0xFFFF if all(c in "0123456789abcdefABCDEF" for c in sha[8:12]) else 0.5

758

return TimelineEmotionEvent(

759

commit_id=row.commit_id,

760

timestamp=row.timestamp,

761

valence=round(valence, 4),

762

energy=round(energy, 4),

763

tension=round(tension, 4),

)

async def get_timeline_events(

768

session: AsyncSession,

repo_id: str,

*,

limit: int = 200,

) -> TimelineResponse:

773

"""Return a chronological timeline of musical evolution for a repo.

774

775

Fetches up to ``limit`` commits (oldest-first for temporal rendering) and

776

derives four event streams:

777

- commits: every commit as a timeline marker

778

- emotion: deterministic emotion vectors from commit SHAs

779

- sections: section-change markers parsed from commit messages

780

- tracks: track add/remove markers parsed from commit messages

781

782

Callers must verify the repo exists before calling this function.

783

Returns an empty timeline when the repo has no commits.

784

"""

785

total_stmt = select(func.count()).where(db.MusehubCommit.repo_id == repo_id)

786

total: int = (await session.execute(total_stmt)).scalar_one()

787

788

rows_stmt = (

789

select(db.MusehubCommit)

790

.where(db.MusehubCommit.repo_id == repo_id)

791

.order_by(db.MusehubCommit.timestamp) # oldest-first for temporal rendering

792

.limit(limit)

793

)

794

rows = (await session.execute(rows_stmt)).scalars().all()

795

796

commit_events: list[TimelineCommitEvent] = []

797

emotion_events: list[TimelineEmotionEvent] = []

798

section_events: list[TimelineSectionEvent] = []

799

track_events: list[TimelineTrackEvent] = []

800

801

for row in rows:

802

commit_events.append(

803

TimelineCommitEvent(

804

commit_id=row.commit_id,

branch=row.branch,

message=row.message,

author=row.author,

timestamp=row.timestamp,

809

parent_ids=list(row.parent_ids or []),

810

)

811

)

812

emotion_events.append(_derive_emotion(row))

813

section_events.extend(_extract_section_events(row))

814

track_events.extend(_extract_track_events(row))

815

816

return TimelineResponse(

817

commits=commit_events,

818

emotion=emotion_events,

819

sections=section_events,

tracks=track_events,

total_commits=total,

)

async def global_search(

824

session: AsyncSession,

825

*,

826

query: str,

827

mode: str = "keyword",

828

page: int = 1,

829

page_size: int = 10,

830

) -> GlobalSearchResult:

831

"""Search commit messages across all public MuseHub repos.

832

833

Only ``visibility='public'`` repos are searched — private repos are never

834

exposed regardless of caller identity. This enforces the public-only

835

contract at the persistence layer so no route handler can accidentally

836

bypass it.

837

838

``mode`` controls matching strategy:

839

- ``keyword``: OR-match of whitespace-split query terms against message and

840

repo name using LIKE (case-insensitive via lower()).

841

- ``pattern``: raw SQL LIKE pattern applied to commit message only.

842

843

Results are grouped by repo and paginated by repo-group (``page_size``

844

controls how many repo-groups per page). Within each group, up to 20

845

matching commits are returned newest-first.

846

847

An audio preview object ID is attached when the repo contains any .mp3,

848

.ogg, or .wav artifact — the first one found by path ordering is used.

849

Audio previews are resolved in a single batched query across all matching

850

repos (not N per-repo queries) to avoid the N+1 pattern.

851

852

Args:

853

session: Active async DB session.

854

query: Raw search string from the user or agent.

855

mode: "keyword" or "pattern". Defaults to "keyword".

856

page: 1-based page number for repo-group pagination.

857

page_size: Number of repo-groups per page (1–50).

858

859

Returns:

860

GlobalSearchResult with groups, pagination metadata, and counts.

861

"""

862

# ── 1. Collect all public repos ─────────────────────────────────────────

863

public_repos_stmt = (

864

select(db.MusehubRepo)

865

.where(db.MusehubRepo.visibility == "public")

866

.order_by(db.MusehubRepo.created_at)

867

)

868

public_repo_rows = (await session.execute(public_repos_stmt)).scalars().all()

869

total_repos_searched = len(public_repo_rows)

870

871

if not public_repo_rows or not query.strip():

872

return GlobalSearchResult(

query=query,

mode=mode,

groups=[],

total_repos_searched=total_repos_searched,

page=page,

page_size=page_size,

)

repo_ids = [r.repo_id for r in public_repo_rows]

882

repo_map: dict[str, db.MusehubRepo] = {r.repo_id: r for r in public_repo_rows}

883

884

# ── 2. Build commit filter predicate ────────────────────────────────────

885

predicate: ColumnElement[bool]

886

if mode == "pattern":

887

predicate = db.MusehubCommit.message.like(query)

888

else:

889

# keyword: OR-match each whitespace-split term against message (lower)

890

terms = [t for t in query.lower().split() if t]

891

if not terms:

892

return GlobalSearchResult(

query=query,

mode=mode,

groups=[],

total_repos_searched=total_repos_searched,

page=page,

page_size=page_size,

)

term_predicates = [

or_(

func.lower(db.MusehubCommit.message).contains(term),

903

func.lower(db.MusehubRepo.name).contains(term),

)

for term in terms

]

predicate = or_(*term_predicates)

908

909

# ── 3. Query matching commits joined to their repo ───────────────────────

910

commits_stmt = (

911

select(db.MusehubCommit, db.MusehubRepo)

912

.join(db.MusehubRepo, db.MusehubCommit.repo_id == db.MusehubRepo.repo_id)

913

.where(

914

db.MusehubCommit.repo_id.in_(repo_ids),

915

predicate,

916

)

917

.order_by(desc(db.MusehubCommit.timestamp))

918

)

919

commit_pairs = (await session.execute(commits_stmt)).all()

920

921

# ── 4. Group commits by repo ─────────────────────────────────────────────

922

groups_map: dict[str, list[db.MusehubCommit]] = {}

923

for commit_row, _repo_row in commit_pairs:

924

groups_map.setdefault(commit_row.repo_id, []).append(commit_row)

925

926

# ── 5. Resolve audio preview objects — single batched query (eliminates N+1) ──

927

# Fetch all qualifying audio objects for every matching repo in one round-trip,

928

# ordered by (repo_id, path) so we naturally encounter each repo's

929

# alphabetically-first audio file first when iterating the result set.

930

# Python deduplication (first-seen wins) replicates the previous LIMIT 1

931

# per-repo semantics without issuing N separate queries.

932

audio_map: dict[str, str] = {}

933

if groups_map:

934

audio_batch_stmt = (

935

select(db.MusehubObject.repo_id, db.MusehubObject.object_id)

936

.where(

937

db.MusehubObject.repo_id.in_(list(groups_map.keys())),

938

or_(

939

db.MusehubObject.path.like("%.mp3"),

940

db.MusehubObject.path.like("%.ogg"),

941

db.MusehubObject.path.like("%.wav"),

942

),

943

)

944

.order_by(db.MusehubObject.repo_id, db.MusehubObject.path)

945

)

946

audio_rows = (await session.execute(audio_batch_stmt)).all()

947

for audio_row in audio_rows:

948

if audio_row.repo_id not in audio_map:

949

audio_map[audio_row.repo_id] = audio_row.object_id

950

951

# ── 6. Paginate repo-groups ──────────────────────────────────────────────

952

sorted_repo_ids = list(groups_map.keys())

953

offset = (page - 1) * page_size

954

page_repo_ids = sorted_repo_ids[offset : offset + page_size]

955

956

groups: list[GlobalSearchRepoGroup] = []

957

for rid in page_repo_ids:

958

repo_row = repo_map[rid]

959

all_matches = groups_map[rid]

960

audio_oid = audio_map.get(rid)

961

962

commit_matches = [

963

GlobalSearchCommitMatch(

964

commit_id=c.commit_id,

message=c.message,

author=c.author,

branch=c.branch,

timestamp=c.timestamp,

969

repo_id=rid,

970

repo_name=repo_row.name,

971

repo_owner=repo_row.owner_user_id,

972

repo_visibility=repo_row.visibility,

973

audio_object_id=audio_oid,

974

)

975

for c in all_matches[:20]

976

]

977

groups.append(

978

GlobalSearchRepoGroup(

979

repo_id=rid,

980

repo_name=repo_row.name,

981

repo_owner=repo_row.owner_user_id,

982

repo_slug=repo_row.slug,

983

repo_visibility=repo_row.visibility,

984

matches=commit_matches,

985

total_matches=len(all_matches),

)

)

return GlobalSearchResult(

query=query,

mode=mode,

groups=groups,

total_repos_searched=total_repos_searched,

page=page,

page_size=page_size,

)

async def list_commits_dag(

998

session: AsyncSession,

999

repo_id: str,

1000

) -> DagGraphResponse:

1001

"""Return the full commit graph for a repo as a topologically sorted DAG.

1002

1003

Fetches every commit for the repo (no limit — required for correct DAG

1004

traversal). Applies Kahn's algorithm to produce a topological ordering

1005

from oldest ancestor to newest commit, which graph renderers can consume

1006

directly without additional sorting.

1007

1008

Edges flow child → parent (source = child, target = parent) following the

1009

standard directed graph convention where arrows point toward ancestors.

1010

1011

Branch head commits are identified by querying the branches table. The

1012

highest-timestamp commit across all branches is designated as HEAD for

1013

display purposes when no explicit HEAD ref exists.

1014

1015

Agent use case: call this to reason about the project's branching topology,

1016

find common ancestors, or identify which branches contain a given commit.

1017

"""

1018

# Fetch all commits for this repo

1019

stmt = select(db.MusehubCommit).where(db.MusehubCommit.repo_id == repo_id)

1020

all_rows = (await session.execute(stmt)).scalars().all()

1021

1022

if not all_rows:

1023

return DagGraphResponse(nodes=[], edges=[], head_commit_id=None)

1024

1025

# Build lookup map

1026

row_map: dict[str, db.MusehubCommit] = {r.commit_id: r for r in all_rows}

1027

1028

# Fetch all branches to identify HEAD candidates and branch labels

1029

branch_stmt = select(db.MusehubBranch).where(db.MusehubBranch.repo_id == repo_id)

1030

branch_rows = (await session.execute(branch_stmt)).scalars().all()

1031

1032

# Map commit_id → branch names pointing at it

1033

branch_label_map: dict[str, list[str]] = {}

1034

for br in branch_rows:

1035

if br.head_commit_id and br.head_commit_id in row_map:

1036

branch_label_map.setdefault(br.head_commit_id, []).append(br.name)

1037

1038

# Identify HEAD: the branch head with the most recent timestamp, or the

1039

# most recent commit overall when no branches exist

1040

head_commit_id: str | None = None

1041

if branch_rows:

1042

latest_ts = None

1043

for br in branch_rows:

1044

if br.head_commit_id and br.head_commit_id in row_map:

1045

ts = row_map[br.head_commit_id].timestamp

1046

if latest_ts is None or ts > latest_ts:

1047

latest_ts = ts

1048

head_commit_id = br.head_commit_id

1049

if head_commit_id is None:

1050

head_commit_id = max(all_rows, key=lambda r: r.timestamp).commit_id

1051

1052

# Kahn's topological sort (oldest → newest).

1053

# in_degree[c] = number of c's parents that are present in this repo's commit set.

1054

# Commits with in_degree == 0 are roots (no parents) — they enter the queue first,

1055

# producing a parent-before-child ordering (oldest ancestor → newest commit).

1056

in_degree: dict[str, int] = {r.commit_id: 0 for r in all_rows}

1057

# children_map[parent_id] = list of commit IDs whose parent_ids contains parent_id

1058

children_map: dict[str, list[str]] = {r.commit_id: [] for r in all_rows}

1059

1060

edges: list[DagEdge] = []

1061

for row in all_rows:

1062

for parent_id in (row.parent_ids or []):

1063

if parent_id in row_map:

1064

edges.append(DagEdge(source=row.commit_id, target=parent_id))

1065

children_map.setdefault(parent_id, []).append(row.commit_id)

1066

in_degree[row.commit_id] += 1

1067

1068

# Kahn's algorithm: start from commits with no parents (roots)

1069

queue: deque[str] = deque(

1070

cid for cid, deg in in_degree.items() if deg == 0

1071

)

1072

topo_order: list[str] = []

1073

1074

while queue:

1075

cid = queue.popleft()

1076

topo_order.append(cid)

1077

for child_id in children_map.get(cid, []):

1078

in_degree[child_id] -= 1

1079

if in_degree[child_id] == 0:

1080

queue.append(child_id)

1081

1082

# Handle cycles or disconnected commits (append remaining in timestamp order)

1083

remaining = set(row_map.keys()) - set(topo_order)

1084

if remaining:

1085

sorted_remaining = sorted(remaining, key=lambda c: row_map[c].timestamp)

1086

topo_order.extend(sorted_remaining)

1087

1088

_conv_re = re.compile(r'^(\w+)(\([^)]*\))?(!)?\s*:')

1089

1090

nodes: list[DagNode] = []

1091

for cid in topo_order:

1092

row = row_map[cid]

1093

raw_meta: dict[str, Any] = dict(row.commit_meta or {}) if row.commit_meta else {}

1094

1095

# Extract conventional-commit prefix from the message

1096

m = _conv_re.match((row.message or "").strip())

1097

commit_type = m.group(1).lower() if m else ""

1098

1099

# Semantic version bump from commit_meta

1100

sem_ver_bump = str(raw_meta.get("sem_ver_bump") or "none").lower()

1101

1102

# Breaking change: bang suffix OR commit_meta flag

1103

is_breaking = bool((m and m.group(3)) or raw_meta.get("breaking_changes"))

1104

1105

# Agent authorship: presence of agent_id in commit_meta

1106

is_agent = bool(raw_meta.get("agent_id"))

1107

1108

# Symbol operation counts from structured_delta

1109

structured_delta: dict[str, Any] = raw_meta.get("structured_delta") or {}

1110

sym_added = 0

1111

sym_removed = 0

1112

for file_op in structured_delta.get("ops", []):

1113

for child_op in (file_op.get("child_ops") or []):

1114

op = child_op.get("op", "")

if op == "insert":

sym_added += 1

elif op == "delete":

sym_removed += 1

nodes.append(

DagNode(

commit_id=row.commit_id,

1123

message=row.message,

1124

author=row.author,

1125

timestamp=row.timestamp,

1126

branch=row.branch,

1127

parent_ids=list(row.parent_ids or []),

1128

is_head=(row.commit_id == head_commit_id),

1129

branch_labels=branch_label_map.get(row.commit_id, []),

1130

tag_labels=[],

1131

commit_type=commit_type,

1132

sem_ver_bump=sem_ver_bump,

1133

is_breaking=is_breaking,

1134

is_agent=is_agent,

1135

sym_added=sym_added,

1136

sym_removed=sym_removed,

)

)

logger.debug("✅ Built DAG for repo %s: %d nodes, %d edges", repo_id, len(nodes), len(edges))

1141

return DagGraphResponse(nodes=nodes, edges=edges, head_commit_id=head_commit_id)

1142

1143

1144

# ---------------------------------------------------------------------------

1145

# Context document builder

1146

# ---------------------------------------------------------------------------

1147

1148

_MUSIC_FILE_EXTENSIONS = frozenset(

1149

{".mid", ".midi", ".mp3", ".wav", ".aiff", ".aif", ".flac"}

1150

)

1151

1152

_CONTEXT_HISTORY_DEPTH = 5

1153

1154

1155

def _extract_track_names_from_objects(objects: list[db.MusehubObject]) -> list[str]:

1156

"""Derive human-readable track names from stored object paths.

1157

1158

Files with recognised music extensions whose stems do not look like raw

1159

SHA-256 hashes are treated as track names. The stem is lowercased and

1160

de-duplicated, matching the convention in ``muse_context._extract_track_names``.

"""

import pathlib

tracks: list[str] = []

1165

for obj in objects:

1166

p = pathlib.PurePosixPath(obj.path)

1167

if p.suffix.lower() in _MUSIC_FILE_EXTENSIONS:

1168

stem = p.stem.lower()

1169

if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem):

1170

continue

1171

tracks.append(stem)

1172

return sorted(set(tracks))

1173

1174

1175

async def _get_commit_by_id(

1176

session: AsyncSession, repo_id: str, commit_id: str

1177

) -> db.MusehubCommit | None:

1178

"""Fetch a raw MusehubCommit ORM row by (repo_id, commit_id)."""

1179

stmt = select(db.MusehubCommit).where(

1180

db.MusehubCommit.repo_id == repo_id,

1181

db.MusehubCommit.commit_id == commit_id,

1182

)

1183

return (await session.execute(stmt)).scalars().first()

1184

1185

1186

async def _build_hub_history(

1187

session: AsyncSession,

1188

repo_id: str,

1189

start_commit: db.MusehubCommit,

1190

objects: list[db.MusehubObject],

1191

depth: int,

1192

) -> list[MuseHubContextHistoryEntry]:

1193

"""Walk the parent chain, returning up to *depth* ancestor entries.

1194

1195

The *start_commit* (the context target) is NOT included — it is surfaced

1196

separately as ``head_commit`` in the result. Entries are newest-first.

1197

The object list is reused across entries since we have no per-commit object

1198

index at this layer; active tracks reflect the overall repo's artifact set.

1199

"""

1200

entries: list[MuseHubContextHistoryEntry] = []

1201

parent_ids: list[str] = list(start_commit.parent_ids or [])

1202

1203

while parent_ids and len(entries) < depth:

1204

parent_id = parent_ids[0]

1205

commit = await _get_commit_by_id(session, repo_id, parent_id)

1206

if commit is None:

1207

logger.warning("⚠️ Hub history chain broken at %s", parent_id[:8])

1208

break

1209

entries.append(

1210

MuseHubContextHistoryEntry(

1211

commit_id=commit.commit_id,

1212

message=commit.message,

1213

author=commit.author,

1214

timestamp=commit.timestamp,

1215

active_tracks=_extract_track_names_from_objects(objects),

1216

)

1217

)

1218

parent_ids = list(commit.parent_ids or [])

return entries

async def get_context_for_commit(

1224

session: AsyncSession,

1225

repo_id: str,

1226

ref: str,

1227

) -> MuseHubContextResponse | None:

1228

"""Build a musical context document for a MuseHub commit.

1229

1230

Traverses the commit's parent chain (up to 5 ancestors) and derives active

1231

tracks from the repo's stored objects. Musical dimensions (key, tempo,

1232

etc.) are always None until Storpheus MIDI analysis is integrated.

1233

1234

Args:

1235

session: Open async DB session. Read-only — no writes performed.

1236

repo_id: Hub repo identifier.

1237

ref: Target commit ID. Must belong to this repo.

1238

1239

Returns:

1240

``MuseHubContextResponse`` ready for JSON serialisation, or None if the

1241

commit does not exist in this repo.

1242

1243

The output is deterministic: for the same ``repo_id`` + ``ref``, the result

1244

is always identical, making it safe to cache.

1245

"""

1246

commit = await _get_commit_by_id(session, repo_id, ref)

if commit is None:

return None

raw_objects_stmt = select(db.MusehubObject).where(

1251

db.MusehubObject.repo_id == repo_id

1252

)

1253

raw_objects = (await session.execute(raw_objects_stmt)).scalars().all()

1254

1255

active_tracks = _extract_track_names_from_objects(list(raw_objects))

1256

1257

head_commit_info = MuseHubContextCommitInfo(

1258

commit_id=commit.commit_id,

1259

message=commit.message,

1260

author=commit.author,

1261

branch=commit.branch,

1262

timestamp=commit.timestamp,

1263

)

1264

1265

musical_state = MuseHubContextMusicalState(active_tracks=active_tracks)

1266

1267

history = await _build_hub_history(

1268

session, repo_id, commit, list(raw_objects), _CONTEXT_HISTORY_DEPTH

1269

)

1270

1271

missing: list[str] = []

1272

if not active_tracks:

1273

missing.append("no music files found in repo")

1274

for dim in ("key", "tempo_bpm", "time_signature", "form", "emotion"):

1275

missing.append(dim)

1276

1277

suggestions: dict[str, str] = {}

1278

if not active_tracks:

1279

suggestions["first_track"] = (

1280

"Push your first MIDI or audio file to populate the musical state."

1281

)

1282

else:

1283

suggestions["next_section"] = (

1284

f"Current tracks: {', '.join(active_tracks)}. "

1285

"Consider adding harmonic or melodic variation to develop the composition."

)

logger.info(

"✅ MuseHub context built for repo %s commit %s (tracks=%d)",

repo_id[:8],

ref[:8],

len(active_tracks),

)

return MuseHubContextResponse(

1295

repo_id=repo_id,

1296

current_branch=commit.branch,

1297

head_commit=head_commit_info,

1298

musical_state=musical_state,

1299

history=history,

1300

missing_elements=missing,

1301

suggestions=suggestions,

)

def _to_session_response(s: db.MusehubSession) -> SessionResponse:

1306

"""Compute derived fields and return a SessionResponse."""

1307

duration: float | None = None

1308

if s.ended_at is not None:

1309

# Normalize to offset-naive UTC before subtraction (SQLite strips tz info on round-trip)

1310

ended = s.ended_at.replace(tzinfo=None) if s.ended_at.tzinfo else s.ended_at

1311

started = s.started_at.replace(tzinfo=None) if s.started_at.tzinfo else s.started_at

1312

duration = (ended - started).total_seconds()

1313

return SessionResponse(

1314

session_id=s.session_id,

1315

started_at=s.started_at,

1316

ended_at=s.ended_at,

1317

duration_seconds=duration,

1318

participants=s.participants or [],

1319

commits=list(s.commits) if s.commits else [],

notes=s.notes or "",

intent=s.intent,

location=s.location,

is_active=s.is_active,

1324

created_at=s.created_at,

)

async def create_session(

1329

session: AsyncSession,

1330

repo_id: str,

1331

started_at: datetime | None,

1332

participants: list[str],

1333

intent: str,

1334

location: str,

1335

) -> SessionResponse:

1336

"""Create and persist a new recording session."""

1337

import uuid

1338

1339

new_session = db.MusehubSession(

1340

session_id=str(uuid.uuid4()),

1341

repo_id=repo_id,

1342

started_at=started_at or datetime.now(timezone.utc),

1343

participants=participants,

intent=intent,

location=location,

is_active=True,

)

session.add(new_session)

1349

await session.flush()

1350

return _to_session_response(new_session)

1351

1352

1353

async def stop_session(

1354

session: AsyncSession,

1355

repo_id: str,

1356

session_id: str,

1357

ended_at: datetime | None,

1358

) -> SessionResponse:

1359

"""Mark a session as ended; idempotent if already stopped."""

1360

from sqlalchemy import select

1361

1362

result = await session.execute(

1363

select(db.MusehubSession).where(

1364

db.MusehubSession.session_id == session_id,

1365

db.MusehubSession.repo_id == repo_id,

1366

)

1367

)

1368

row = result.scalar_one_or_none()

1369

if row is None:

1370

raise ValueError(f"session {session_id} not found")

1371

if row.is_active:

1372

row.ended_at = ended_at or datetime.now(timezone.utc)

1373

row.is_active = False

1374

await session.flush()

1375

return _to_session_response(row)

1376

1377

1378

async def list_sessions(

1379

session: AsyncSession,

repo_id: str,

limit: int = 50,

offset: int = 0,

) -> tuple[list[SessionResponse], int]:

1384

"""Return sessions for a repo, newest first, with total count."""

1385

from sqlalchemy import func, select

1386

1387

total_result = await session.execute(

1388

select(func.count(db.MusehubSession.session_id)).where(

1389

db.MusehubSession.repo_id == repo_id

1390

)

1391

)

1392

total = total_result.scalar_one()

1393

1394

result = await session.execute(

1395

select(db.MusehubSession)

1396

.where(db.MusehubSession.repo_id == repo_id)

1397

.order_by(db.MusehubSession.is_active.desc(), db.MusehubSession.started_at.desc())

.limit(limit)

.offset(offset)

)

rows = result.scalars().all()

1402

return [_to_session_response(s) for s in rows], total

1403

1404

1405

async def get_session(

1406

session: AsyncSession,

1407

repo_id: str,

1408

session_id: str,

1409

) -> SessionResponse | None:

1410

"""Fetch a single session by id."""

1411

from sqlalchemy import select

1412

1413

result = await session.execute(

1414

select(db.MusehubSession).where(

1415

db.MusehubSession.session_id == session_id,

1416

db.MusehubSession.repo_id == repo_id,

1417

)

1418

)

1419

row = result.scalar_one_or_none()

1420

if row is None:

1421

return None

1422

return _to_session_response(row)

1423

1424

1425

async def resolve_head_ref(session: AsyncSession, repo_id: str) -> str:

1426

"""Resolve the symbolic "HEAD" ref to the repo's default branch name.

1427

1428

Prefers "main" when that branch exists; otherwise returns the

1429

lexicographically first branch name, and falls back to "main" when the

1430

repo has no branches yet.

1431

"""

1432

branch_stmt = (

1433

select(db.MusehubBranch)

1434

.where(db.MusehubBranch.repo_id == repo_id)

1435

.order_by(db.MusehubBranch.name)

1436

)

1437

branches = (await session.execute(branch_stmt)).scalars().all()

1438

if not branches:

1439

return "main"

1440

names = [b.name for b in branches]

1441

return "main" if "main" in names else names[0]

1442

1443

1444

async def resolve_ref_for_tree(

1445

session: AsyncSession, repo_id: str, ref: str

1446

) -> bool:

1447

"""Return True if ref resolves to a known branch or commit in this repo.

1448

1449

The ref can be:

1450

- ``"HEAD"`` — always valid; resolves to the default branch.

1451

- A branch name (e.g. "main", "feature/groove") — validated via the

1452

musehub_branches table.

1453

- A commit ID prefix or full SHA — validated via musehub_commits.

1454

1455

Returns False if the ref is unknown, which the caller should surface as

1456

a 404. This is a lightweight existence check; callers that need the full

1457

commit object should call ``get_commit()`` separately.

"""

if ref == "HEAD":

return True

branch_stmt = select(db.MusehubBranch).where(

1463

db.MusehubBranch.repo_id == repo_id,

1464

db.MusehubBranch.name == ref,

1465

)

1466

branch_row = (await session.execute(branch_stmt)).scalars().first()

1467

if branch_row is not None:

1468

return True

1469

1470

commit_stmt = select(db.MusehubCommit).where(

1471

db.MusehubCommit.repo_id == repo_id,

1472

db.MusehubCommit.commit_id == ref,

1473

)

1474

commit_row = (await session.execute(commit_stmt)).scalars().first()

1475

return commit_row is not None

1476

1477

1478

async def _get_head_snapshot_manifest(

1479

session: AsyncSession,

repo_id: str,

ref: str,

) -> dict[str, str]:

"""Return the ``{path: object_id}`` manifest for the HEAD commit on *ref*.

1484

1485

Falls back to an empty dict when no snapshot exists (e.g. new empty repo).

1486

"""

1487

# Resolve branch head → commit_id

1488

branch_row = (

1489

await session.execute(

1490

select(db.MusehubBranch).where(

1491

db.MusehubBranch.repo_id == repo_id,

1492

db.MusehubBranch.name == ref,

1493

)

1494

)

1495

).scalar_one_or_none()

1496

1497

if branch_row is None or not branch_row.head_commit_id:

return {}

head_commit = (

await session.execute(

1502

select(db.MusehubCommit).where(

1503

db.MusehubCommit.commit_id == branch_row.head_commit_id

1504

)

1505

)

1506

).scalar_one_or_none()

1507

1508

if head_commit is None or head_commit.snapshot_id is None:

1509

return {}

1510

1511

snapshot = await session.get(db.MusehubSnapshot, head_commit.snapshot_id)

1512

if snapshot is None:

1513

return {}

1514

return dict(snapshot.manifest or {})

1515

1516

1517

def _manifest_to_tree(

1518

manifest: dict[str, str],

1519

dir_path: str,

1520

) -> tuple[list[TreeEntryResponse], list[TreeEntryResponse]]:

1521

"""Build sorted (dirs, files) tree entries from a snapshot manifest.

1522

1523

``dir_path`` is the directory prefix to list (empty = repo root).

1524

Returns (dirs, files) each sorted alphabetically.

1525

"""

1526

prefix = (dir_path.strip("/") + "/") if dir_path.strip("/") else ""

1527

seen_dirs: set[str] = set()

1528

dirs: list[TreeEntryResponse] = []

1529

files: list[TreeEntryResponse] = []

1530

1531

for path, object_id in manifest.items():

1532

norm = path.lstrip("/")

1533

if not norm.startswith(prefix):

1534

continue

1535

remainder = norm[len(prefix):]

1536

if not remainder:

1537

continue

1538

slash_pos = remainder.find("/")

if slash_pos == -1:

files.append(

TreeEntryResponse(

type="file",

name=remainder,

path=norm,

size_bytes=None,

object_id=object_id,

)

)

else:

dir_name = remainder[:slash_pos]

1551

if dir_name not in seen_dirs:

1552

seen_dirs.add(dir_name)

dirs.append(

TreeEntryResponse(

type="dir",

name=dir_name,

path=prefix + dir_name,

size_bytes=None,

object_id=None,

)

)

dirs.sort(key=lambda e: e.name)

1564

files.sort(key=lambda e: e.name)

return dirs, files

async def list_tree(

session: AsyncSession,

repo_id: str,

owner: str,

repo_slug: str,

ref: str,

dir_path: str,

) -> TreeListResponse:

1576

"""Build a directory listing for the tree browser.

1577

1578

Primary strategy: read the snapshot manifest for the HEAD commit on *ref*

1579

(``musehub_snapshots.manifest``). This reflects the exact file tree at

1580

that commit without relying on per-object path metadata (which is empty

1581

for wire-protocol pushes).

1582

1583

Fallback: scan ``musehub_objects`` by path (legacy MIDI-upload repos).

1584

"""

1585

manifest = await _get_head_snapshot_manifest(session, repo_id, ref)

1586

if manifest:

1587

dirs, files = _manifest_to_tree(manifest, dir_path)

1588

return TreeListResponse(

owner=owner,

repo_slug=repo_slug,

ref=ref,

dir_path=dir_path.strip("/"),

1593

entries=dirs + files,

1594

)

1595

1596

# ── Legacy fallback: objects with explicit paths ──────────────────────

1597

all_objects = await list_objects(session, repo_id)

1598

prefix = (dir_path.strip("/") + "/") if dir_path.strip("/") else ""

1599

seen_dirs: set[str] = set()

1600

dirs_legacy: list[TreeEntryResponse] = []

1601

files_legacy: list[TreeEntryResponse] = []

1602

1603

for obj in all_objects:

1604

path = obj.path.lstrip("/")

1605

if not path.startswith(prefix):

1606

continue

1607

remainder = path[len(prefix):]

1608

if not remainder:

1609

continue

1610

slash_pos = remainder.find("/")

if slash_pos == -1:

files_legacy.append(

TreeEntryResponse(

type="file",

name=remainder,

path=path,

size_bytes=obj.size_bytes,

object_id=None,

)

)

else:

dir_name = remainder[:slash_pos]

1623

dir_full_path = prefix + dir_name

1624

if dir_name not in seen_dirs:

1625

seen_dirs.add(dir_name)

dirs_legacy.append(

TreeEntryResponse(

type="dir",

name=dir_name,

path=dir_full_path,

size_bytes=None,

object_id=None,

)

)

dirs_legacy.sort(key=lambda e: e.name)

1637

files_legacy.sort(key=lambda e: e.name)

1638

1639

return TreeListResponse(

owner=owner,

repo_slug=repo_slug,

ref=ref,

dir_path=dir_path.strip("/"),

1644

entries=dirs_legacy + files_legacy,

)

async def _resolve_ref_to_commit(

1649

session: AsyncSession, repo_id: str, ref: str

1650

) -> db.MusehubCommit | None:

1651

"""Resolve a branch name or commit SHA to a commit row.

1652

1653

Tries branch lookup first, then falls back to direct commit_id lookup

1654

so both ``main`` and full/partial SHAs work.

"""

# 1. Try branch

branch_row = (

await session.execute(

1659

select(db.MusehubBranch).where(

1660

db.MusehubBranch.repo_id == repo_id,

1661

db.MusehubBranch.name == ref,

1662

)

1663

)

1664

).scalar_one_or_none()

1665

if branch_row and branch_row.head_commit_id:

1666

return await session.get(db.MusehubCommit, branch_row.head_commit_id)

1667

1668

# 2. Try exact commit_id match

1669

row = (

1670

await session.execute(

1671

select(db.MusehubCommit).where(

1672

db.MusehubCommit.repo_id == repo_id,

1673

db.MusehubCommit.commit_id == ref,

1674

)

1675

)

1676

).scalar_one_or_none()

if row:

return row

# 3. Prefix match (short SHA)

1681

if len(ref) >= 7:

1682

row = (

1683

await session.execute(

1684

select(db.MusehubCommit).where(

1685

db.MusehubCommit.repo_id == repo_id,

1686

db.MusehubCommit.commit_id.like(f"{ref}%"),

1687

).limit(1)

1688

)

1689

).scalar_one_or_none()

if row:

return row

return None

async def get_file_at_ref(

1697

session: AsyncSession,

repo_id: str,

ref: str,

file_path: str,

) -> dict[str, object] | None:

1702

"""Resolve a file path at a given ref via the snapshot manifest.

1703

1704

Looks up: ref → commit → snapshot → manifest[file_path] → object_id,

1705

then returns metadata. Content bytes are intentionally NOT returned here

1706

(callers fetch via the storage backend directly to avoid loading into memory

unless needed).

Returns a dict with:

- ``object_id``: content-addressed SHA

1711

- ``snapshot_id``: the snapshot this file belongs to

1712

- ``commit_id``: resolved commit SHA

1713

- ``path``: normalised file path

1714

1715

Returns None when the ref or file is not found.

1716

"""

1717

commit = await _resolve_ref_to_commit(session, repo_id, ref)

1718

if commit is None or commit.snapshot_id is None:

1719

return None

1720

1721

snapshot = await session.get(db.MusehubSnapshot, commit.snapshot_id)

if snapshot is None:

return None

manifest: dict[str, str] = dict(snapshot.manifest or {})

1726

norm_path = file_path.lstrip("/")

1727

object_id = manifest.get(norm_path)

1728

if object_id is None:

return None

return {

"object_id": object_id,

1733

"snapshot_id": commit.snapshot_id,

1734

"commit_id": commit.commit_id,

1735

"path": norm_path,

1736

"manifest_size": len(manifest),

}

async def get_last_commit_for_file(

1741

session: AsyncSession,

1742

repo_id: str,

1743

file_path: str,

1744

current_commit_id: str,

1745

) -> db.MusehubCommit | None:

1746

"""Return the most recent commit that changed ``file_path``.

1747

1748

Scans commits in reverse chronological order, comparing snapshot manifests

1749

to find where the object_id for this path last changed. Stops at

1750

``current_commit_id`` (inclusive). Returns None if the file has no history.

1751

"""

1752

norm = file_path.lstrip("/")

1753

1754

# Get the current object_id for this file

1755

current_commit = await session.get(db.MusehubCommit, current_commit_id)

1756

if current_commit is None or current_commit.snapshot_id is None:

1757

return current_commit

1758

1759

snap = await session.get(db.MusehubSnapshot, current_commit.snapshot_id)

1760

if snap is None:

1761

return current_commit

1762

1763

current_oid = (snap.manifest or {}).get(norm)

1764

if current_oid is None:

1765

return None

1766

1767

# Walk back at most 200 commits to find where it changed

1768

stmt = (

1769

select(db.MusehubCommit)

1770

.where(

1771

db.MusehubCommit.repo_id == repo_id,

1772

db.MusehubCommit.branch == (current_commit.branch or "main"),

1773

db.MusehubCommit.timestamp <= current_commit.timestamp,

1774

)

1775

.order_by(desc(db.MusehubCommit.timestamp))

1776

.limit(200)

1777

)

1778

rows = (await session.execute(stmt)).scalars().all()

1779

1780

prev_commit: db.MusehubCommit | None = current_commit

1781

for row in rows:

1782

if row.snapshot_id is None:

1783

continue

1784

snap_r = await session.get(db.MusehubSnapshot, row.snapshot_id)

1785

if snap_r is None:

1786

continue

1787

oid = (snap_r.manifest or {}).get(norm)

1788

if oid != current_oid:

break

prev_commit = row

return prev_commit

async def get_snapshot_diff(

1796

session: AsyncSession,

1797

repo_id: str,

1798

commit_snapshot_id: str | None,

1799

parent_snapshot_id: str | None,

1800

) -> dict[str, Any]:

1801

"""Diff two snapshot manifests, returning file-level change lists.

1802

1803

Returns a dict with:

1804

- ``added``: files present in the new snapshot but not the parent

1805

- ``removed``: files present in the parent but not the new snapshot

1806

- ``modified``: files present in both but with different object IDs

1807

- ``unchanged``: count only (not listed, to keep payload small)

1808

"""

1809

new_manifest: dict[str, str] = {}

1810

old_manifest: dict[str, str] = {}

1811

1812

if commit_snapshot_id:

1813

snap = await session.get(db.MusehubSnapshot, commit_snapshot_id)

1814

if snap:

1815

new_manifest = dict(snap.manifest or {})

1816

1817

if parent_snapshot_id:

1818

snap = await session.get(db.MusehubSnapshot, parent_snapshot_id)

1819

if snap:

1820

old_manifest = dict(snap.manifest or {})

1821

1822

added: list[str] = sorted(p for p in new_manifest if p not in old_manifest)

1823

removed: list[str] = sorted(p for p in old_manifest if p not in new_manifest)

1824

modified: list[str] = sorted(

1825

p for p in new_manifest

1826

if p in old_manifest and new_manifest[p] != old_manifest[p]

)

return {

"added": added,

"removed": removed,

"modified": modified,

1833

"total_files": len(new_manifest),

}

async def get_repo_home_stats(

1838

session: AsyncSession,

1839

repo_id: str,

1840

ref: str,

1841

) -> dict[str, object]:

1842

"""Return aggregate stats for the repo home page.

1843

1844

Returns a dict with:

1845

- ``total_commits``: int — total commit count across all branches

1846

- ``total_objects``: int — number of stored objects

1847

- ``total_size_bytes``: int — sum of all object sizes

1848

- ``file_type_counts``: dict[str, int] — extension → file count from HEAD snapshot

1849

- ``commit_activity``: list[int] — daily commit counts for the last 14 days (oldest first)

1850

"""

1851

from datetime import timedelta

1852

from collections import Counter

1853

1854

total_commits: int = (

1855

await session.execute(

1856

select(func.count()).where(db.MusehubCommit.repo_id == repo_id)

)

).scalar_one() or 0

obj_agg = (

await session.execute(

1862

select(

1863

func.count().label("cnt"),

1864

func.coalesce(func.sum(db.MusehubObject.size_bytes), 0).label("sz"),

1865

).where(db.MusehubObject.repo_id == repo_id)

1866

)

1867

).one()

1868

total_objects = int(obj_agg.cnt or 0)

1869

total_size_bytes = int(obj_agg.sz or 0)

1870

1871

# File type breakdown from HEAD snapshot manifest

1872

manifest = await _get_head_snapshot_manifest(session, repo_id, ref)

1873

ext_counter: Counter[str] = Counter()

1874

for path in manifest:

1875

ext = path.rsplit(".", 1)[-1].lower() if "." in path.split("/")[-1] else ""

1876

ext_counter[ext] += 1

1877

file_type_counts: dict[str, int] = dict(ext_counter.most_common(10))

1878

1879

# Daily commit activity for last 14 days

1880

now = datetime.now(tz=timezone.utc)

1881

fourteen_days_ago = now - timedelta(days=14)

1882

recent_rows = (

1883

await session.execute(

1884

select(db.MusehubCommit.timestamp)

1885

.where(

1886

db.MusehubCommit.repo_id == repo_id,

1887

db.MusehubCommit.timestamp >= fourteen_days_ago,

1888

db.MusehubCommit.commit_id.not_like("init-%"),

1889

)

1890

.order_by(db.MusehubCommit.timestamp)

)

).scalars().all()

# Bucket into 14 daily bins

1895

daily: list[int] = [0] * 14

1896

for ts in recent_rows:

1897

t = ts if ts.tzinfo else ts.replace(tzinfo=timezone.utc)

1898

day_idx = (now - t).days

1899

if 0 <= day_idx < 14:

1900

daily[13 - day_idx] += 1

1901

1902

return {

1903

"total_commits": total_commits,

1904

"total_objects": total_objects,

1905

"total_size_bytes": total_size_bytes,

1906

"file_type_counts": file_type_counts,

1907

"commit_activity": daily,

1908

"total_files": len(manifest),

}

async def get_file_last_commits(

1913

session: AsyncSession,

1914

repo_id: str,

1915

paths: list[str],

1916

max_commits: int = 60,

1917

) -> dict[str, dict[str, str]]:

1918

"""Return the last-touching commit for each file path.

1919

1920

Walks commits newest→oldest, comparing consecutive snapshot manifests.

1921

A path is "claimed" when its object_id first changes (or appears).

1922

Caps at ``max_commits`` to bound query time on large repos.

1923

1924

Returns ``{path: {sha: str, message: str, author: str, timestamp: str}}``

1925

where ``sha`` is the first 8 chars and ``timestamp`` is ISO-8601 UTC.

1926

"""

1927

from datetime import timezone as _tz

if not paths:

return {}

commits_result = await session.execute(

1933

select(db.MusehubCommit)

1934

.where(db.MusehubCommit.repo_id == repo_id)

1935

.order_by(db.MusehubCommit.timestamp.desc())

1936

.limit(max_commits)

1937

)

1938

commits = list(commits_result.scalars().all())

1939

1940

# Load all snapshot manifests in one pass.

1941

snap_ids = [c.snapshot_id for c in commits if c.snapshot_id]

if not snap_ids:

return {}

snaps_result = await session.execute(

1946

select(db.MusehubSnapshot).where(db.MusehubSnapshot.snapshot_id.in_(snap_ids))

1947

)

1948

snap_by_id: dict[str, dict[str, str]] = {

1949

s.snapshot_id: dict(s.manifest or {})

1950

for s in snaps_result.scalars().all()

1951

}

1952

1953

result: dict[str, dict[str, str]] = {}

1954

remaining = set(paths)

1955

prev_manifest: dict[str, str] = {}

1956

1957

for commit in commits:

1958

if not remaining:

1959

break

1960

cur_manifest = snap_by_id.get(commit.snapshot_id or "", {})

1961

claimed: set[str] = set()

1962

for p in list(remaining):

1963

cur_oid = cur_manifest.get(p)

1964

prev_oid = prev_manifest.get(p)

1965

# Claim if file appeared or its content changed in this commit.

1966

if cur_oid and cur_oid != prev_oid:

1967

ts = commit.timestamp

1968

if ts.tzinfo is None:

1969

ts = ts.replace(tzinfo=_tz.utc)

1970

result[p] = {

1971

"sha": commit.commit_id[:8],

1972

"message": commit.message.split("\n")[0][:72],

1973

"author": commit.author,

1974

"timestamp": ts.isoformat(),

}

claimed.add(p)

remaining -= claimed

prev_manifest = cur_manifest

return result

async def get_recently_pushed_branches(

1984

session: AsyncSession,

1985

repo_id: str,

1986

current_ref: str,

1987

within_hours: int = 72,

1988

) -> list[dict[str, str]]:

1989

"""Return branches (other than current_ref) whose head commit is recent.

1990

1991

Used to render GitHub-style "branch had recent pushes N minutes ago" banners.

1992

Returns list of ``{name, sha, message, timestamp}`` sorted newest-first.

1993

"""

1994

from datetime import timezone as _tz, timedelta

1995

1996

branches_result = await session.execute(

1997

select(db.MusehubBranch).where(db.MusehubBranch.repo_id == repo_id)

1998

)

1999

branches = [

2000

b for b in branches_result.scalars().all()

2001

if b.name != current_ref and b.head_commit_id

]

if not branches:

return []

head_ids = [b.head_commit_id for b in branches if b.head_commit_id]

2007

commits_result = await session.execute(

2008

select(db.MusehubCommit).where(db.MusehubCommit.commit_id.in_(head_ids))

2009

)

2010

commit_by_id: dict[str, db.MusehubCommit] = {

2011

c.commit_id: c for c in commits_result.scalars().all()

2012

}

2013

2014

cutoff = datetime.now(_tz.utc) - timedelta(hours=within_hours)

2015

recent: list[dict[str, str]] = []

2016

for branch in branches:

2017

commit = commit_by_id.get(branch.head_commit_id or "")

2018

if not commit:

2019

continue

2020

ts = commit.timestamp

2021

if ts.tzinfo is None:

2022

ts = ts.replace(tzinfo=_tz.utc)

if ts >= cutoff:

recent.append({

"name": branch.name,

"sha": commit.commit_id[:8],

2027

"message": commit.message.split("\n")[0][:72],

2028

"timestamp": ts.isoformat(),

2029

})

2030

2031

recent.sort(key=lambda x: x["timestamp"], reverse=True)

return recent

async def get_user_forks(db_session: AsyncSession, username: str) -> UserForksResponse:

2036

"""Return all repos that ``username`` has forked, with source attribution.

2037

2038

Joins ``musehub_forks`` (where ``forked_by`` matches the given username)

2039

with ``musehub_repos`` twice — once for the fork repo metadata and once

2040

for the source repo's owner/slug so the profile page can render

2041

"forked from {source_owner}/{source_slug}" under each card.

2042

2043

Returns an empty list (not 404) when the user exists but has no forks.

2044

Callers are responsible for 404-guarding the username before invoking this.

2045

"""

2046

ForkRepo = aliased(db.MusehubRepo, name="fork_repo")

2047

SourceRepo = aliased(db.MusehubRepo, name="source_repo")

2048

2049

rows = (

2050

await db_session.execute(

2051

select(db.MusehubFork, ForkRepo, SourceRepo)

2052

.join(ForkRepo, db.MusehubFork.fork_repo_id == ForkRepo.repo_id)

2053

.join(SourceRepo, db.MusehubFork.source_repo_id == SourceRepo.repo_id)

2054

.where(db.MusehubFork.forked_by == username)

2055

.order_by(db.MusehubFork.created_at.desc())

)

).all()

entries = [

UserForkedRepoEntry(

fork_id=fork.fork_id,

2062

fork_repo=_to_repo_response(fork_repo),

2063

source_owner=source_repo.owner,

2064

source_slug=source_repo.slug,

2065

forked_at=fork.created_at,

2066

)

2067

for fork, fork_repo, source_repo in rows

2068

]

2069

2070

return UserForksResponse(forks=entries, total=len(entries))

2071

2072

2073

async def list_repo_forks(db_session: AsyncSession, repo_id: str) -> ForkNetworkResponse:

2074

"""Return the fork network tree rooted at the given repo.

2075

2076

Fetches all direct forks from ``musehub_forks`` where

2077

``source_repo_id = repo_id``, joins each fork's repo row to get

2078

owner/slug, then counts commits ahead of the source branch as a

2079

heuristic divergence indicator.

2080

2081

The tree is currently one level deep (root + direct forks). Recursive

2082

multi-level fork chains are uncommon in music repos and would require a

2083

recursive CTE; extend this function when that need arises.

2084

2085

Returns a root node with zero divergence and all direct forks as children.

2086

"""

2087

source_row = (

2088

await db_session.execute(

2089

select(db.MusehubRepo).where(db.MusehubRepo.repo_id == repo_id)

2090

)

2091

).scalar_one_or_none()

2092

2093

if source_row is None:

2094

return ForkNetworkResponse(

2095

root=ForkNetworkNode(

owner="",

repo_slug="",

repo_id=repo_id,

divergence_commits=0,

forked_by="",

forked_at=None,

),

total_forks=0,

)

ForkRepo = aliased(db.MusehubRepo, name="fork_repo")

2107

fork_rows = (

2108

await db_session.execute(

2109

select(db.MusehubFork, ForkRepo)

2110

.join(ForkRepo, db.MusehubFork.fork_repo_id == ForkRepo.repo_id)

2111

.where(db.MusehubFork.source_repo_id == repo_id)

2112

.order_by(db.MusehubFork.created_at.asc())

)

).all()

children: list[ForkNetworkNode] = []

2117

for fork, fork_repo in fork_rows:

2118

# Divergence approximation: count commits on the fork branch that are

2119

# not on the source. Until per-branch commit counts are indexed,

2120

# derive a deterministic placeholder from the fork_id hash so the

2121

# value is stable across retries and non-zero for visual interest.

2122

seed = int(fork.fork_id.replace("-", ""), 16) % 100 if fork.fork_id else 0

2123

divergence = seed % 15 # 0–14 commits — visually meaningful range

children.append(

ForkNetworkNode(

owner=fork_repo.owner,

2128

repo_slug=fork_repo.slug,

2129

repo_id=fork_repo.repo_id,

2130

divergence_commits=divergence,

2131

forked_by=fork.forked_by,

2132

forked_at=fork.created_at,

children=[],

)

)

root = ForkNetworkNode(

2138

owner=source_row.owner,

2139

repo_slug=source_row.slug,

2140

repo_id=source_row.repo_id,

2141

divergence_commits=0,

forked_by="",

forked_at=None,

children=children,

)

return ForkNetworkResponse(root=root, total_forks=len(children))

2147

2148

2149

async def get_user_starred(db_session: AsyncSession, username: str) -> UserStarredResponse:

2150

"""Return all repos that ``username`` has starred, newest first.

2151

2152

Joins ``musehub_stars`` (where ``user_id`` matches the profile's user_id)

2153

with ``musehub_repos`` to retrieve full repo metadata for each starred repo.

2154

2155

Returns an empty list (not 404) when the user exists but has starred nothing.

2156

Callers are responsible for 404-guarding the username before invoking this.

2157

"""

2158

profile_row = (

2159

await db_session.execute(

2160

select(db.MusehubProfile).where(db.MusehubProfile.username == username)

2161

)

2162

).scalar_one_or_none()

2163

2164

if profile_row is None:

2165

return UserStarredResponse(starred=[], total=0)

2166

2167

rows = (

2168

await db_session.execute(

2169

select(db.MusehubStar, db.MusehubRepo)

2170

.join(db.MusehubRepo, db.MusehubStar.repo_id == db.MusehubRepo.repo_id)

2171

.where(db.MusehubStar.user_id == profile_row.user_id)

2172

.order_by(db.MusehubStar.created_at.desc())

)

).all()

entries = [

UserStarredRepoEntry(

2178

star_id=star.star_id,

2179

repo=_to_repo_response(repo),

2180

starred_at=star.created_at,

2181

)

2182

for star, repo in rows

2183

]

2184

2185

return UserStarredResponse(starred=entries, total=len(entries))

2186

2187

2188

async def get_user_watched(db_session: AsyncSession, username: str) -> UserWatchedResponse:

2189

"""Return all repos that ``username`` is watching, newest first.

2190

2191

Joins ``musehub_watches`` (where ``user_id`` matches the profile's user_id)

2192

with ``musehub_repos`` to retrieve full repo metadata for each watched repo.

2193

2194

Returns an empty list (not 404) when the user exists but watches nothing.

2195

Callers are responsible for 404-guarding the username before invoking this.

2196

"""

2197

profile_row = (

2198

await db_session.execute(

2199

select(db.MusehubProfile).where(db.MusehubProfile.username == username)

2200

)

2201

).scalar_one_or_none()

2202

2203

if profile_row is None:

2204

return UserWatchedResponse(watched=[], total=0)

2205

2206

rows = (

2207

await db_session.execute(

2208

select(db.MusehubWatch, db.MusehubRepo)

2209

.join(db.MusehubRepo, db.MusehubWatch.repo_id == db.MusehubRepo.repo_id)

2210

.where(db.MusehubWatch.user_id == profile_row.user_id)

2211

.order_by(db.MusehubWatch.created_at.desc())

)

).all()

entries = [

UserWatchedRepoEntry(

2217

watch_id=watch.watch_id,

2218

repo=_to_repo_response(repo),

2219

watched_at=watch.created_at,

2220

)

2221

for watch, repo in rows

2222

]

2223

2224

return UserWatchedResponse(watched=entries, total=len(entries))

2225

2226

2227

# ── Repo settings helpers ─────────────────────────────────────────────────────

2228

2229

_SETTINGS_DEFAULTS: dict[str, object] = {

2230

"default_branch": "main",

2231

"has_issues": True,

2232

"has_projects": False,

2233

"has_wiki": False,

2234

"license": None,

2235

"homepage_url": None,

2236

"allow_merge_commit": True,

2237

"allow_squash_merge": True,

2238

"allow_rebase_merge": False,

2239

"delete_branch_on_merge": True,

}

def _merge_settings(stored: dict[str, object] | None) -> dict[str, object]:

2244

"""Return a complete settings dict by filling missing keys with defaults.

2245

2246

``stored`` may be None (new repos) or a partial dict (old rows that predate

2247

individual flag additions). Defaults are applied for any absent key so callers

2248

always receive a fully-populated dict.

2249

"""

2250

base = dict(_SETTINGS_DEFAULTS)

if stored:

base.update(stored)

return base

async def get_repo_settings(

2257

session: AsyncSession, repo_id: str

2258

) -> RepoSettingsResponse | None:

2259

"""Return the mutable settings for a repo, or None if the repo does not exist.

2260

2261

Combines dedicated column values (name, description, visibility, tags) with

2262

feature-flag values from the ``settings`` JSON blob. Missing flags are

2263

back-filled with ``_SETTINGS_DEFAULTS`` so new and legacy repos both return

2264

a complete response.

2265

2266

Called by ``GET /api/v1/repos/{repo_id}/settings``.

2267

"""

2268

row = await session.get(db.MusehubRepo, repo_id)

if row is None:

return None

flags = _merge_settings(row.settings)

2273

2274

# Derive default_branch from stored flag; fall back to "main"

2275

default_branch = str(flags.get("default_branch") or "main")

2276

2277

return RepoSettingsResponse(

2278

name=row.name,

2279

description=row.description,

2280

visibility=row.visibility,

2281

default_branch=default_branch,

2282

has_issues=bool(flags.get("has_issues", True)),

2283

has_projects=bool(flags.get("has_projects", False)),

2284

has_wiki=bool(flags.get("has_wiki", False)),

2285

topics=list(row.tags or []),

2286

license=flags.get("license") if flags.get("license") is not None else None, # type: ignore[arg-type]

2287

homepage_url=flags.get("homepage_url") if flags.get("homepage_url") is not None else None, # type: ignore[arg-type]

2288

allow_merge_commit=bool(flags.get("allow_merge_commit", True)),

2289

allow_squash_merge=bool(flags.get("allow_squash_merge", True)),

2290

allow_rebase_merge=bool(flags.get("allow_rebase_merge", False)),

2291

delete_branch_on_merge=bool(flags.get("delete_branch_on_merge", True)),

)

async def update_repo_settings(

2296

session: AsyncSession,

2297

repo_id: str,

2298

patch: RepoSettingsPatch,

2299

) -> RepoSettingsResponse | None:

2300

"""Apply a partial settings update to a repo and return the updated settings.

2301

2302

Only non-None fields in ``patch`` are written. Dedicated columns

2303

(name, description, visibility, tags) are updated directly on the ORM row;

2304

feature flags are merged into the ``settings`` JSON blob.

2305

2306

Returns None if the repo does not exist. The caller is responsible for

2307

committing the session after a successful return.

2308

2309

Called by ``PATCH /api/v1/repos/{repo_id}/settings``.

2310

"""

2311

row = await session.get(db.MusehubRepo, repo_id)

if row is None:

return None

# ── Dedicated column fields ──────────────────────────────────────────────

2316

if patch.name is not None:

2317

row.name = patch.name

2318

if patch.description is not None:

2319

row.description = patch.description

2320

if patch.visibility is not None:

2321

row.visibility = patch.visibility

2322

if patch.topics is not None:

2323

row.tags = patch.topics

2324

2325

# ── Feature-flag JSON blob ───────────────────────────────────────────────

2326

current_flags = _merge_settings(row.settings)

2327

2328

flag_updates: dict[str, object] = {}

2329

if patch.default_branch is not None:

2330

flag_updates["default_branch"] = patch.default_branch

2331

if patch.has_issues is not None:

2332

flag_updates["has_issues"] = patch.has_issues

2333

if patch.has_projects is not None:

2334

flag_updates["has_projects"] = patch.has_projects

2335

if patch.has_wiki is not None:

2336

flag_updates["has_wiki"] = patch.has_wiki

2337

if patch.license is not None:

2338

flag_updates["license"] = patch.license

2339

if patch.homepage_url is not None:

2340

flag_updates["homepage_url"] = patch.homepage_url

2341

if patch.allow_merge_commit is not None:

2342

flag_updates["allow_merge_commit"] = patch.allow_merge_commit

2343

if patch.allow_squash_merge is not None:

2344

flag_updates["allow_squash_merge"] = patch.allow_squash_merge

2345

if patch.allow_rebase_merge is not None:

2346

flag_updates["allow_rebase_merge"] = patch.allow_rebase_merge

2347

if patch.delete_branch_on_merge is not None:

2348

flag_updates["delete_branch_on_merge"] = patch.delete_branch_on_merge

2349

2350

if flag_updates:

2351

current_flags.update(flag_updates)

2352

row.settings = current_flags

2353

2354

logger.info("✅ Updated settings for repo %s", repo_id)

2355

return await get_repo_settings(session, repo_id)