musehub/services/musehub_repository.py · gabriel/musehub

1

"""MuseHub persistence adapter — single point of DB access for Hub entities.

2

3

This module is the ONLY place that touches the musehub_* tables.

4

Route handlers delegate here; no business logic lives in routes.

5

6

Boundary rules:

7

- Must NOT import state stores, SSE queues, or LLM clients.

8

- Must NOT import musehub.core.* modules.

9

- May import ORM models from musehub.db.musehub_models.

10

- May import Pydantic response models from musehub.models.musehub.

11

"""

12

from datetime import datetime, timezone

import logging

import re

from collections import deque

17

18

from sqlalchemy import desc, func, or_, select

19

from sqlalchemy.ext.asyncio import AsyncSession

20

from sqlalchemy.orm import aliased

21

from sqlalchemy.sql.elements import ColumnElement

22

23

from musehub.db import musehub_models as db

24

from musehub.db import musehub_collaborator_models as collab_db

25

from musehub.models.musehub import (

26

SessionListResponse,

27

SessionResponse,

28

BranchDetailListResponse,

29

BranchDetailResponse,

30

BranchDivergenceScores,

31

BranchResponse,

32

CommitResponse,

33

GlobalSearchCommitMatch,

34

GlobalSearchRepoGroup,

GlobalSearchResult,

DagEdge,

DagGraphResponse,

DagNode,

InstrumentInfo,

MuseHubContextCommitInfo,

41

MuseHubContextHistoryEntry,

42

MuseHubContextMusicalState,

43

MuseHubContextResponse,

ObjectMetaResponse,

RepoListResponse,

RepoResponse,

RepoSettingsPatch,

RepoSettingsResponse,

49

ScoreMetaInfo,

50

TimelineCommitEvent,

51

TimelineEmotionEvent,

52

TimelineResponse,

53

TimelineSectionEvent,

TimelineTrackEvent,

TrackInfo,

TreeEntryResponse,

TreeListResponse,

ForkNetworkNode,

ForkNetworkResponse,

UserForkedRepoEntry,

UserForksResponse,

UserStarredRepoEntry,

63

UserStarredResponse,

64

UserWatchedRepoEntry,

UserWatchedResponse,

)

logger = logging.getLogger(__name__)

69

70

71

def _generate_slug(name: str) -> str:

72

"""Derive a URL-safe slug from a human-readable repo name.

73

74

Rules: lowercase, non-alphanumeric chars collapsed to single hyphens,

75

leading/trailing hyphens stripped, max 64 chars. If the result is empty

76

(e.g. name was all symbols) we fall back to "repo".

77

"""

78

slug = name.lower()

79

slug = re.sub(r"[^a-z0-9]+", "-", slug)

80

slug = slug.strip("-")

81

slug = slug[:64].strip("-")

82

return slug or "repo"

83

84

85

def _repo_clone_url(owner: str, slug: str) -> str:

86

"""Derive the canonical clone URL from owner and slug.

87

88

Uses the musehub://{owner}/{slug} scheme — the DAW's native protocol handler

89

resolves this to the correct API base at runtime. No internal UUID is exposed

90

in external URLs.

91

"""

92

return f"musehub://{owner}/{slug}"

93

94

95

def _to_repo_response(row: db.MusehubRepo) -> RepoResponse:

return RepoResponse(

repo_id=row.repo_id,

name=row.name,

owner=row.owner,

slug=row.slug,

visibility=row.visibility,

102

owner_user_id=row.owner_user_id,

103

clone_url=_repo_clone_url(row.owner, row.slug),

104

description=row.description,

105

tags=list(row.tags or []),

106

key_signature=row.key_signature,

107

tempo_bpm=row.tempo_bpm,

108

domain_id=getattr(row, "domain_id", None),

109

created_at=row.created_at,

)

def _to_branch_response(row: db.MusehubBranch) -> BranchResponse:

114

return BranchResponse(

115

branch_id=row.branch_id,

116

name=row.name,

117

head_commit_id=row.head_commit_id,

)

def _to_commit_response(row: db.MusehubCommit) -> CommitResponse:

122

return CommitResponse(

123

commit_id=row.commit_id,

124

branch=row.branch,

125

parent_ids=list(row.parent_ids or []),

126

message=row.message,

127

author=row.author,

128

timestamp=row.timestamp,

129

snapshot_id=row.snapshot_id,

)

async def create_repo(

134

session: AsyncSession,

*,

name: str,

owner: str,

visibility: str,

owner_user_id: str,

description: str = "",

141

tags: list[str] | None = None,

142

key_signature: str | None = None,

143

tempo_bpm: int | None = None,

144

# ── Wizard extensions ────────────────────────────────────────

145

license: str | None = None,

146

topics: list[str] | None = None,

147

initialize: bool = False,

148

default_branch: str = "main",

149

template_repo_id: str | None = None,

150

) -> RepoResponse:

151

"""Persist a new remote repo and return its wire representation.

152

153

``slug`` is auto-generated from ``name``. The ``(owner, slug)`` pair must

154

be unique — callers should catch ``IntegrityError`` and surface a 409.

155

156

Wizard behaviors:

157

- When ``template_repo_id`` is set, the template's description and topics

158

are copied into the new repo (template must be public; silently skipped

159

when it doesn't exist or is private).

160

- When ``initialize=True``, an empty "Initial commit" is written plus the

161

default branch pointer so the repo is immediately browsable.

162

- ``license`` is stored in the settings JSON blob under the ``license`` key.

163

- ``topics`` are merged with ``tags`` into a single unified tag list.

164

"""

165

# Merge topics into tags (deduplicated, stable order).

166

combined_tags: list[str] = list(dict.fromkeys((tags or []) + (topics or [])))

167

168

# Copy template metadata when a template repo is supplied.

169

if template_repo_id is not None:

170

tmpl = await session.get(db.MusehubRepo, template_repo_id)

171

if tmpl is not None and tmpl.visibility == "public":

172

if not description:

173

description = tmpl.description

174

# Prepend template tags; deduplicate preserving order.

175

combined_tags = list(dict.fromkeys(list(tmpl.tags or []) + combined_tags))

176

177

# Build the settings JSON blob with optional license field.

178

settings: dict[str, object] = {}

179

if license is not None:

180

settings["license"] = license

181

182

slug = _generate_slug(name)

183

repo = db.MusehubRepo(

name=name,

owner=owner,

slug=slug,

visibility=visibility,

188

owner_user_id=owner_user_id,

189

description=description,

190

tags=combined_tags,

191

key_signature=key_signature,

192

tempo_bpm=tempo_bpm,

193

settings=settings or None,

194

)

195

session.add(repo)

196

await session.flush() # populate default columns before reading

197

await session.refresh(repo)

198

199

# Wizard initialisation: create default branch + empty initial commit.

200

if initialize:

201

init_commit_id = f"init-{repo.repo_id[:8]}"

202

now = datetime.now(tz=timezone.utc)

203

204

branch = db.MusehubBranch(

205

repo_id=repo.repo_id,

206

name=default_branch,

207

head_commit_id=init_commit_id,

)

session.add(branch)

init_commit = db.MusehubCommit(

212

commit_id=init_commit_id,

213

repo_id=repo.repo_id,

214

branch=default_branch,

215

parent_ids=[],

216

message="Initial commit",

217

author=owner_user_id,

218

timestamp=now,

219

)

220

session.add(init_commit)

221

await session.flush()

222

223

logger.info(

224

"✅ Created MuseHub repo %s (%s/%s) for user %s (initialize=%s)",

225

repo.repo_id, owner, slug, owner_user_id, initialize,

226

)

227

return _to_repo_response(repo)

228

229

230

async def get_repo(session: AsyncSession, repo_id: str) -> RepoResponse | None:

231

"""Return repo metadata by internal UUID, or None if not found or soft-deleted."""

232

result = await session.get(db.MusehubRepo, repo_id)

233

if result is None or result.deleted_at is not None:

234

return None

235

return _to_repo_response(result)

236

237

238

async def delete_repo(session: AsyncSession, repo_id: str) -> bool:

239

"""Soft-delete a repo by recording its deletion timestamp.

240

241

Returns True when the repo existed and was deleted; False when the repo

242

was not found or had already been soft-deleted. The caller is responsible

243

for committing the session.

244

"""

245

row = await session.get(db.MusehubRepo, repo_id)

246

if row is None or row.deleted_at is not None:

247

return False

248

row.deleted_at = datetime.now(timezone.utc)

249

await session.flush()

250

logger.info("✅ Soft-deleted MuseHub repo %s", repo_id)

return True

async def transfer_repo_ownership(

255

session: AsyncSession, repo_id: str, new_owner_user_id: str

256

) -> RepoResponse | None:

257

"""Transfer repo ownership to a new user.

258

259

Only touches ``owner_user_id`` — the public ``owner`` username slug is

260

intentionally NOT changed here; the owner username update (if desired) is a

261

settings-level change the new owner makes separately.

262

263

Returns the updated RepoResponse, or None when the repo is not found or

264

has been soft-deleted. The caller is responsible for committing the session.

265

"""

266

row = await session.get(db.MusehubRepo, repo_id)

267

if row is None or row.deleted_at is not None:

268

return None

269

row.owner_user_id = new_owner_user_id

270

await session.flush()

271

await session.refresh(row)

272

logger.info("✅ Transferred MuseHub repo %s ownership to user %s", repo_id, new_owner_user_id)

273

return _to_repo_response(row)

274

275

276

async def get_repo_by_owner_slug(

277

session: AsyncSession, owner: str, slug: str

278

) -> RepoResponse | None:

279

"""Return repo metadata by owner+slug canonical URL pair, or None if not found.

280

281

This is the primary resolver for all external /{owner}/{slug} routes.

282

"""

283

stmt = select(db.MusehubRepo).where(

284

db.MusehubRepo.owner == owner,

285

db.MusehubRepo.slug == slug,

286

)

287

row = (await session.execute(stmt)).scalars().first()

288

if row is None:

289

return None

290

return _to_repo_response(row)

_PAGE_SIZE = 20

async def list_repos_for_user(

297

session: AsyncSession,

298

user_id: str,

299

*,

300

limit: int = _PAGE_SIZE,

301

cursor: str | None = None,

302

) -> RepoListResponse:

303

"""Return repos owned by or collaborated on by ``user_id``.

304

305

Results are ordered by ``created_at`` descending (newest first). Pagination

306

uses an opaque cursor encoding the ``created_at`` ISO timestamp of the last

307

item on the current page — pass it back as ``?cursor=`` to advance.

308

309

Args:

310

session: Active async DB session.

311

user_id: JWT ``sub`` of the authenticated caller.

312

limit: Maximum repos per page (default 20).

313

cursor: Opaque pagination cursor from a previous response.

314

315

Returns:

316

``RepoListResponse`` with the page of repos, total count, and next cursor.

317

"""

318

# Collect repo IDs the user collaborates on (accepted invitations only).

319

collab_stmt = select(collab_db.MusehubCollaborator.repo_id).where(

320

collab_db.MusehubCollaborator.user_id == user_id,

321

collab_db.MusehubCollaborator.accepted_at.is_not(None),

322

)

323

collab_repo_ids_result = (await session.execute(collab_stmt)).scalars().all()

324

collab_repo_ids = list(collab_repo_ids_result)

325

326

# Base filter: repos the caller owns OR collaborates on.

327

base_filter = or_(

328

db.MusehubRepo.owner_user_id == user_id,

329

db.MusehubRepo.repo_id.in_(collab_repo_ids),

330

)

331

332

# Total count across all pages.

333

count_stmt = select(func.count()).select_from(db.MusehubRepo).where(base_filter)

334

total: int = (await session.execute(count_stmt)).scalar_one()

335

336

# Apply cursor: skip repos created at or after the cursor timestamp.

337

page_filter = base_filter

338

if cursor is not None:

339

try:

340

cursor_dt = datetime.fromisoformat(cursor)

341

page_filter = base_filter & (db.MusehubRepo.created_at < cursor_dt)

342

except ValueError:

343

pass # malformed cursor — ignore and return from the beginning

344

345

stmt = (

346

select(db.MusehubRepo)

347

.where(page_filter)

348

.order_by(desc(db.MusehubRepo.created_at))

349

.limit(limit)

350

)

351

rows = (await session.execute(stmt)).scalars().all()

352

repos = [_to_repo_response(r) for r in rows]

353

354

# Build next cursor from the last item's created_at when there may be more.

355

next_cursor: str | None = None

356

if len(rows) == limit:

357

next_cursor = rows[-1].created_at.isoformat()

358

359

return RepoListResponse(repos=repos, next_cursor=next_cursor, total=total)

360

361

362

async def get_repo_orm_by_owner_slug(

363

session: AsyncSession, owner: str, slug: str

364

) -> db.MusehubRepo | None:

365

"""Return the raw ORM repo row by owner+slug, or None if not found.

366

367

Used internally when the route needs the repo_id for downstream calls.

368

"""

369

stmt = select(db.MusehubRepo).where(

370

db.MusehubRepo.owner == owner,

371

db.MusehubRepo.slug == slug,

372

)

373

return (await session.execute(stmt)).scalars().first()

374

375

376

async def list_branches(session: AsyncSession, repo_id: str) -> list[BranchResponse]:

377

"""Return all branches for a repo, ordered by name."""

378

stmt = (

379

select(db.MusehubBranch)

380

.where(db.MusehubBranch.repo_id == repo_id)

381

.order_by(db.MusehubBranch.name)

382

)

383

rows = (await session.execute(stmt)).scalars().all()

384

return [_to_branch_response(r) for r in rows]

385

386

387

async def list_branches_with_detail(

388

session: AsyncSession, repo_id: str

389

) -> BranchDetailListResponse:

390

"""Return branches enriched with ahead/behind counts vs the default branch.

391

392

The default branch is whichever branch is named "main"; if no "main" branch

393

exists, the first branch alphabetically is used. Ahead/behind counts are

394

computed by comparing the set of commit IDs on each branch vs the default

395

branch — a set-difference approximation suitable for display purposes.

396

397

Musical divergence scores are not yet computable server-side (they require

398

audio snapshots), so all divergence fields are returned as ``None`` (placeholder).

399

"""

400

branch_stmt = (

401

select(db.MusehubBranch)

402

.where(db.MusehubBranch.repo_id == repo_id)

403

.order_by(db.MusehubBranch.name)

404

)

405

branch_rows = (await session.execute(branch_stmt)).scalars().all()

406

if not branch_rows:

407

return BranchDetailListResponse(branches=[], default_branch="main")

408

409

# Determine default branch name: prefer "main", fall back to first alphabetically.

410

branch_names = [r.name for r in branch_rows]

411

default_branch_name = "main" if "main" in branch_names else branch_names[0]

412

413

# Load commit IDs per branch in one query.

414

commit_stmt = select(db.MusehubCommit.commit_id, db.MusehubCommit.branch).where(

415

db.MusehubCommit.repo_id == repo_id

416

)

417

commit_rows = (await session.execute(commit_stmt)).all()

418

commits_by_branch: dict[str, set[str]] = {}

419

for commit_id, branch_name in commit_rows:

420

commits_by_branch.setdefault(branch_name, set()).add(commit_id)

421

422

default_commits: set[str] = commits_by_branch.get(default_branch_name, set())

423

424

results: list[BranchDetailResponse] = []

425

for row in branch_rows:

426

is_default = row.name == default_branch_name

427

branch_commits: set[str] = commits_by_branch.get(row.name, set())

428

ahead = len(branch_commits - default_commits) if not is_default else 0

429

behind = len(default_commits - branch_commits) if not is_default else 0

430

results.append(

431

BranchDetailResponse(

432

branch_id=row.branch_id,

433

name=row.name,

434

head_commit_id=row.head_commit_id,

435

is_default=is_default,

436

ahead_count=ahead,

437

behind_count=behind,

438

divergence=BranchDivergenceScores(

439

melodic=None, harmonic=None, rhythmic=None, structural=None, dynamic=None

),

)

)

return BranchDetailListResponse(branches=results, default_branch=default_branch_name)

445

446

447

def _to_object_meta_response(row: db.MusehubObject) -> ObjectMetaResponse:

448

return ObjectMetaResponse(

449

object_id=row.object_id,

450

path=row.path,

451

size_bytes=row.size_bytes,

452

created_at=row.created_at,

)

async def get_commit(

457

session: AsyncSession, repo_id: str, commit_id: str

458

) -> CommitResponse | None:

459

"""Return a single commit by ID, or None if not found in this repo."""

460

stmt = (

461

select(db.MusehubCommit)

462

.where(

463

db.MusehubCommit.repo_id == repo_id,

464

db.MusehubCommit.commit_id == commit_id,

465

)

466

)

467

row = (await session.execute(stmt)).scalars().first()

468

if row is None:

469

return None

470

return _to_commit_response(row)

471

472

473

async def list_objects(

474

session: AsyncSession, repo_id: str

475

) -> list[ObjectMetaResponse]:

476

"""Return all object metadata for a repo (no binary content), ordered by path."""

477

stmt = (

478

select(db.MusehubObject)

479

.where(db.MusehubObject.repo_id == repo_id)

480

.order_by(db.MusehubObject.path)

481

)

482

rows = (await session.execute(stmt)).scalars().all()

483

return [_to_object_meta_response(r) for r in rows]

484

485

486

async def get_object_row(

487

session: AsyncSession, repo_id: str, object_id: str

488

) -> db.MusehubObject | None:

489

"""Return the raw ORM object row for content delivery, or None if not found.

490

491

Route handlers use this to stream the file from ``disk_path``.

492

"""

493

stmt = (

494

select(db.MusehubObject)

495

.where(

496

db.MusehubObject.repo_id == repo_id,

497

db.MusehubObject.object_id == object_id,

498

)

499

)

500

return (await session.execute(stmt)).scalars().first()

501

502

503

async def get_object_by_path(

504

session: AsyncSession, repo_id: str, path: str

505

) -> db.MusehubObject | None:

506

"""Return the most-recently-created object matching ``path`` in a repo.

507

508

Used by the raw file endpoint to resolve a human-readable path

509

(e.g. ``tracks/bass.mid``) to the stored artifact on disk. When

510

multiple objects share the same path (re-pushed content), the newest

511

one wins — consistent with Git's ref semantics where HEAD always

512

points at the latest version.

513

514

Args:

515

session: Active async DB session.

516

repo_id: UUID of the target repo.

517

path: Client-supplied relative file path, e.g. ``tracks/bass.mid``.

518

519

Returns:

520

The matching ORM row, or ``None`` if no object with that path exists.

521

"""

522

stmt = (

523

select(db.MusehubObject)

524

.where(

525

db.MusehubObject.repo_id == repo_id,

526

db.MusehubObject.path == path,

527

)

528

.order_by(desc(db.MusehubObject.created_at))

529

.limit(1)

530

)

531

return (await session.execute(stmt)).scalars().first()

532

533

534

def _instrument_name_from_path(path: str) -> str:

535

"""Derive a human-readable instrument name from a MIDI object path.

536

537

Strips directory components and the file extension, then title-cases

538

the result. ``tracks/bass.mid`` → ``"Bass"``. Falls back to the

539

bare filename when the stem is empty.

540

"""

541

stem = path.split("/")[-1].rsplit(".", 1)[0]

542

return stem.title() or path

543

544

545

async def get_track_info(

546

session: AsyncSession,

547

repo_id: str,

548

path: str,

549

) -> TrackInfo | None:

550

"""Return SSR metadata for a single MIDI track identified by path.

551

552

Fetches the most-recently-created object matching ``path`` and converts

553

its DB metadata to a :class:`TrackInfo` suitable for template rendering.

554

``duration_sec`` and ``track_count`` are left as ``None`` because MIDI

555

parsing is client-side only in the current architecture.

556

557

Returns ``None`` when no object with that path exists.

558

"""

559

obj = await get_object_by_path(session, repo_id, path)

if obj is None:

return None

return TrackInfo(

name=_instrument_name_from_path(obj.path),

564

size_bytes=obj.size_bytes,

duration_sec=None,

track_count=None,

)

async def get_instruments_for_repo(

571

session: AsyncSession,

572

repo_id: str,

573

) -> list[InstrumentInfo]:

574

"""Return a list of instrument lane descriptors for a repo.

575

576

Scans all MIDI objects (``*.mid`` / ``*.midi``) in the repo and derives

577

:class:`InstrumentInfo` from their stored path. The ``channel`` field

578

is the zero-based render order (objects sorted by path for consistency).

579

``gm_program`` is ``None`` because GM resolution requires MIDI parsing

580

which is handled client-side.

581

582

Returns an empty list when the repo has no MIDI objects.

583

"""

584

stmt = (

585

select(db.MusehubObject)

586

.where(

587

db.MusehubObject.repo_id == repo_id,

588

or_(

589

db.MusehubObject.path.like("%.mid"),

590

db.MusehubObject.path.like("%.midi"),

591

),

592

)

593

.order_by(db.MusehubObject.path)

594

)

595

rows = (await session.execute(stmt)).scalars().all()

596

return [

597

InstrumentInfo(

598

name=_instrument_name_from_path(row.path),

channel=idx,

gm_program=None,

)

for idx, row in enumerate(rows)

]

async def get_score_meta_for_repo(

607

session: AsyncSession,

repo_id: str,

path: str,

) -> ScoreMetaInfo:

"""Return SSR metadata for the score page header.

612

613

Derives the score title from the requested path; all musical fields

614

(``key``, ``meter``, ``composer``, ``instrument_count``) are ``None``

615

until server-side MIDI/ABC parsing is implemented. The score page

616

template renders these fields conditionally.

617

618

Always returns a :class:`ScoreMetaInfo` — even for unknown paths — so

619

the template can render a valid (if sparse) header.

620

"""

621

title = _instrument_name_from_path(path) if path else "Score"

stmt = (

select(func.count())

.where(

db.MusehubObject.repo_id == repo_id,

626

or_(

627

db.MusehubObject.path.like("%.mid"),

628

db.MusehubObject.path.like("%.midi"),

),

)

)

midi_count: int = (await session.execute(stmt)).scalar_one()

633

return ScoreMetaInfo(

title=title,

composer=None,

key=None,

meter=None,

instrument_count=midi_count if midi_count > 0 else None,

)

async def list_commits(

643

session: AsyncSession,

644

repo_id: str,

645

*,

646

branch: str | None = None,

647

limit: int = 50,

648

offset: int = 0,

649

) -> tuple[list[CommitResponse], int]:

650

"""Return commits for a repo, newest first, optionally filtered by branch.

651

652

Supports offset-based pagination via ``offset``.

653

Returns a tuple of (commits, total_count).

654

"""

655

base = select(db.MusehubCommit).where(db.MusehubCommit.repo_id == repo_id)

656

if branch:

657

base = base.where(db.MusehubCommit.branch == branch)

658

659

total_stmt = select(func.count()).select_from(base.subquery())

660

total: int = (await session.execute(total_stmt)).scalar_one()

661

662

rows_stmt = base.order_by(desc(db.MusehubCommit.timestamp)).offset(offset).limit(limit)

663

rows = (await session.execute(rows_stmt)).scalars().all()

664

return [_to_commit_response(r) for r in rows], total

665

666

667

# ── Section / track keyword heuristics ──────────────────────────────────────

668

669

_SECTION_KEYWORDS: list[str] = [

670

"intro", "verse", "chorus", "bridge", "outro", "hook",

671

"pre-chorus", "prechorus", "breakdown", "drop", "build",

672

"refrain", "coda", "tag", "interlude",

673

]

674

675

_TRACK_KEYWORDS: list[str] = [

676

"bass", "drums", "keys", "piano", "guitar", "synth", "pad",

677

"lead", "vocals", "strings", "brass", "horn",

678

"flute", "cello", "violin", "organ", "arp", "percussion",

679

"kick", "snare", "hi-hat", "hihat", "clap", "melody",

680

]

681

682

_ADDED_VERBS = re.compile(

re.IGNORECASE,

)

_REMOVED_VERBS = re.compile(

687

r"\b(remov(?:e[ds]?)?|delet(?:e[ds]?)?|drop(?:ped)?|cut|mute[ds]?)\b",

re.IGNORECASE,

)

def _infer_action(message: str) -> str:

693

"""Return 'added' or 'removed' based on verb presence in the commit message."""

694

if _REMOVED_VERBS.search(message):

return "removed"

return "added"

def _extract_section_events(row: db.MusehubCommit) -> list[TimelineSectionEvent]:

700

"""Extract zero or more section-change events from a commit message."""

701

msg_lower = row.message.lower()

702

events: list[TimelineSectionEvent] = []

703

for keyword in _SECTION_KEYWORDS:

704

if keyword in msg_lower:

705

events.append(

706

TimelineSectionEvent(

707

commit_id=row.commit_id,

708

timestamp=row.timestamp,

709

section_name=keyword,

710

action=_infer_action(row.message),

)

)

return events

def _extract_track_events(row: db.MusehubCommit) -> list[TimelineTrackEvent]:

717

"""Extract zero or more track-change events from a commit message."""

718

msg_lower = row.message.lower()

719

events: list[TimelineTrackEvent] = []

720

for keyword in _TRACK_KEYWORDS:

721

if keyword in msg_lower:

722

events.append(

723

TimelineTrackEvent(

724

commit_id=row.commit_id,

725

timestamp=row.timestamp,

726

track_name=keyword,

727

action=_infer_action(row.message),

)

)

return events

def _derive_emotion(row: db.MusehubCommit) -> TimelineEmotionEvent:

734

"""Derive a deterministic emotion vector from the commit SHA.

735

736

Uses three non-overlapping byte windows of the SHA hex to produce

737

valence, energy, and tension in [0.0, 1.0]. Deterministic so the

738

timeline is always reproducible without external ML inference.

739

"""

740

sha = row.commit_id

741

# Pad short commit IDs (e.g. test fixtures) so indexing is safe.

742

sha = sha.ljust(12, "0")

743

valence = int(sha[0:4], 16) / 0xFFFF if all(c in "0123456789abcdefABCDEF" for c in sha[0:4]) else 0.5

744

energy = int(sha[4:8], 16) / 0xFFFF if all(c in "0123456789abcdefABCDEF" for c in sha[4:8]) else 0.5

745

tension = int(sha[8:12], 16) / 0xFFFF if all(c in "0123456789abcdefABCDEF" for c in sha[8:12]) else 0.5

746

return TimelineEmotionEvent(

747

commit_id=row.commit_id,

748

timestamp=row.timestamp,

749

valence=round(valence, 4),

750

energy=round(energy, 4),

751

tension=round(tension, 4),

)

async def get_timeline_events(

756

session: AsyncSession,

repo_id: str,

*,

limit: int = 200,

) -> TimelineResponse:

761

"""Return a chronological timeline of musical evolution for a repo.

762

763

Fetches up to ``limit`` commits (oldest-first for temporal rendering) and

764

derives four event streams:

765

- commits: every commit as a timeline marker

766

- emotion: deterministic emotion vectors from commit SHAs

767

- sections: section-change markers parsed from commit messages

768

- tracks: track add/remove markers parsed from commit messages

769

770

Callers must verify the repo exists before calling this function.

771

Returns an empty timeline when the repo has no commits.

772

"""

773

total_stmt = select(func.count()).where(db.MusehubCommit.repo_id == repo_id)

774

total: int = (await session.execute(total_stmt)).scalar_one()

775

776

rows_stmt = (

777

select(db.MusehubCommit)

778

.where(db.MusehubCommit.repo_id == repo_id)

779

.order_by(db.MusehubCommit.timestamp) # oldest-first for temporal rendering

780

.limit(limit)

781

)

782

rows = (await session.execute(rows_stmt)).scalars().all()

783

784

commit_events: list[TimelineCommitEvent] = []

785

emotion_events: list[TimelineEmotionEvent] = []

786

section_events: list[TimelineSectionEvent] = []

787

track_events: list[TimelineTrackEvent] = []

788

789

for row in rows:

790

commit_events.append(

791

TimelineCommitEvent(

792

commit_id=row.commit_id,

branch=row.branch,

message=row.message,

author=row.author,

timestamp=row.timestamp,

797

parent_ids=list(row.parent_ids or []),

798

)

799

)

800

emotion_events.append(_derive_emotion(row))

801

section_events.extend(_extract_section_events(row))

802

track_events.extend(_extract_track_events(row))

803

804

return TimelineResponse(

805

commits=commit_events,

806

emotion=emotion_events,

807

sections=section_events,

tracks=track_events,

total_commits=total,

)

async def global_search(

812

session: AsyncSession,

813

*,

814

query: str,

815

mode: str = "keyword",

816

page: int = 1,

817

page_size: int = 10,

818

) -> GlobalSearchResult:

819

"""Search commit messages across all public MuseHub repos.

820

821

Only ``visibility='public'`` repos are searched — private repos are never

822

exposed regardless of caller identity. This enforces the public-only

823

contract at the persistence layer so no route handler can accidentally

824

bypass it.

825

826

``mode`` controls matching strategy:

827

- ``keyword``: OR-match of whitespace-split query terms against message and

828

repo name using LIKE (case-insensitive via lower()).

829

- ``pattern``: raw SQL LIKE pattern applied to commit message only.

830

831

Results are grouped by repo and paginated by repo-group (``page_size``

832

controls how many repo-groups per page). Within each group, up to 20

833

matching commits are returned newest-first.

834

835

An audio preview object ID is attached when the repo contains any .mp3,

836

.ogg, or .wav artifact — the first one found by path ordering is used.

837

Audio previews are resolved in a single batched query across all matching

838

repos (not N per-repo queries) to avoid the N+1 pattern.

839

840

Args:

841

session: Active async DB session.

842

query: Raw search string from the user or agent.

843

mode: "keyword" or "pattern". Defaults to "keyword".

844

page: 1-based page number for repo-group pagination.

845

page_size: Number of repo-groups per page (1–50).

846

847

Returns:

848

GlobalSearchResult with groups, pagination metadata, and counts.

849

"""

850

# ── 1. Collect all public repos ─────────────────────────────────────────

851

public_repos_stmt = (

852

select(db.MusehubRepo)

853

.where(db.MusehubRepo.visibility == "public")

854

.order_by(db.MusehubRepo.created_at)

855

)

856

public_repo_rows = (await session.execute(public_repos_stmt)).scalars().all()

857

total_repos_searched = len(public_repo_rows)

858

859

if not public_repo_rows or not query.strip():

860

return GlobalSearchResult(

query=query,

mode=mode,

groups=[],

total_repos_searched=total_repos_searched,

page=page,

page_size=page_size,

)

repo_ids = [r.repo_id for r in public_repo_rows]

870

repo_map: dict[str, db.MusehubRepo] = {r.repo_id: r for r in public_repo_rows}

871

872

# ── 2. Build commit filter predicate ────────────────────────────────────

873

predicate: ColumnElement[bool]

874

if mode == "pattern":

875

predicate = db.MusehubCommit.message.like(query)

876

else:

877

# keyword: OR-match each whitespace-split term against message (lower)

878

terms = [t for t in query.lower().split() if t]

879

if not terms:

880

return GlobalSearchResult(

query=query,

mode=mode,

groups=[],

total_repos_searched=total_repos_searched,

page=page,

page_size=page_size,

)

term_predicates = [

or_(

func.lower(db.MusehubCommit.message).contains(term),

891

func.lower(db.MusehubRepo.name).contains(term),

)

for term in terms

]

predicate = or_(*term_predicates)

896

897

# ── 3. Query matching commits joined to their repo ───────────────────────

898

commits_stmt = (

899

select(db.MusehubCommit, db.MusehubRepo)

900

.join(db.MusehubRepo, db.MusehubCommit.repo_id == db.MusehubRepo.repo_id)

901

.where(

902

db.MusehubCommit.repo_id.in_(repo_ids),

903

predicate,

904

)

905

.order_by(desc(db.MusehubCommit.timestamp))

906

)

907

commit_pairs = (await session.execute(commits_stmt)).all()

908

909

# ── 4. Group commits by repo ─────────────────────────────────────────────

910

groups_map: dict[str, list[db.MusehubCommit]] = {}

911

for commit_row, _repo_row in commit_pairs:

912

groups_map.setdefault(commit_row.repo_id, []).append(commit_row)

913

914

# ── 5. Resolve audio preview objects — single batched query (eliminates N+1) ──

915

# Fetch all qualifying audio objects for every matching repo in one round-trip,

916

# ordered by (repo_id, path) so we naturally encounter each repo's

917

# alphabetically-first audio file first when iterating the result set.

918

# Python deduplication (first-seen wins) replicates the previous LIMIT 1

919

# per-repo semantics without issuing N separate queries.

920

audio_map: dict[str, str] = {}

921

if groups_map:

922

audio_batch_stmt = (

923

select(db.MusehubObject.repo_id, db.MusehubObject.object_id)

924

.where(

925

db.MusehubObject.repo_id.in_(list(groups_map.keys())),

926

or_(

927

db.MusehubObject.path.like("%.mp3"),

928

db.MusehubObject.path.like("%.ogg"),

929

db.MusehubObject.path.like("%.wav"),

930

),

931

)

932

.order_by(db.MusehubObject.repo_id, db.MusehubObject.path)

933

)

934

audio_rows = (await session.execute(audio_batch_stmt)).all()

935

for audio_row in audio_rows:

936

if audio_row.repo_id not in audio_map:

937

audio_map[audio_row.repo_id] = audio_row.object_id

938

939

# ── 6. Paginate repo-groups ──────────────────────────────────────────────

940

sorted_repo_ids = list(groups_map.keys())

941

offset = (page - 1) * page_size

942

page_repo_ids = sorted_repo_ids[offset : offset + page_size]

943

944

groups: list[GlobalSearchRepoGroup] = []

945

for rid in page_repo_ids:

946

repo_row = repo_map[rid]

947

all_matches = groups_map[rid]

948

audio_oid = audio_map.get(rid)

949

950

commit_matches = [

951

GlobalSearchCommitMatch(

952

commit_id=c.commit_id,

message=c.message,

author=c.author,

branch=c.branch,

timestamp=c.timestamp,

957

repo_id=rid,

958

repo_name=repo_row.name,

959

repo_owner=repo_row.owner_user_id,

960

repo_visibility=repo_row.visibility,

961

audio_object_id=audio_oid,

962

)

963

for c in all_matches[:20]

964

]

965

groups.append(

966

GlobalSearchRepoGroup(

967

repo_id=rid,

968

repo_name=repo_row.name,

969

repo_owner=repo_row.owner_user_id,

970

repo_slug=repo_row.slug,

971

repo_visibility=repo_row.visibility,

972

matches=commit_matches,

973

total_matches=len(all_matches),

)

)

return GlobalSearchResult(

query=query,

mode=mode,

groups=groups,

total_repos_searched=total_repos_searched,

page=page,

page_size=page_size,

)

async def list_commits_dag(

986

session: AsyncSession,

987

repo_id: str,

988

) -> DagGraphResponse:

989

"""Return the full commit graph for a repo as a topologically sorted DAG.

990

991

Fetches every commit for the repo (no limit — required for correct DAG

992

traversal). Applies Kahn's algorithm to produce a topological ordering

993

from oldest ancestor to newest commit, which graph renderers can consume

994

directly without additional sorting.

995

996

Edges flow child → parent (source = child, target = parent) following the

997

standard directed graph convention where arrows point toward ancestors.

998

999

Branch head commits are identified by querying the branches table. The

1000

highest-timestamp commit across all branches is designated as HEAD for

1001

display purposes when no explicit HEAD ref exists.

1002

1003

Agent use case: call this to reason about the project's branching topology,

1004

find common ancestors, or identify which branches contain a given commit.

1005

"""

1006

# Fetch all commits for this repo

1007

stmt = select(db.MusehubCommit).where(db.MusehubCommit.repo_id == repo_id)

1008

all_rows = (await session.execute(stmt)).scalars().all()

1009

1010

if not all_rows:

1011

return DagGraphResponse(nodes=[], edges=[], head_commit_id=None)

1012

1013

# Build lookup map

1014

row_map: dict[str, db.MusehubCommit] = {r.commit_id: r for r in all_rows}

1015

1016

# Fetch all branches to identify HEAD candidates and branch labels

1017

branch_stmt = select(db.MusehubBranch).where(db.MusehubBranch.repo_id == repo_id)

1018

branch_rows = (await session.execute(branch_stmt)).scalars().all()

1019

1020

# Map commit_id → branch names pointing at it

1021

branch_label_map: dict[str, list[str]] = {}

1022

for br in branch_rows:

1023

if br.head_commit_id and br.head_commit_id in row_map:

1024

branch_label_map.setdefault(br.head_commit_id, []).append(br.name)

1025

1026

# Identify HEAD: the branch head with the most recent timestamp, or the

1027

# most recent commit overall when no branches exist

1028

head_commit_id: str | None = None

1029

if branch_rows:

1030

latest_ts = None

1031

for br in branch_rows:

1032

if br.head_commit_id and br.head_commit_id in row_map:

1033

ts = row_map[br.head_commit_id].timestamp

1034

if latest_ts is None or ts > latest_ts:

1035

latest_ts = ts

1036

head_commit_id = br.head_commit_id

1037

if head_commit_id is None:

1038

head_commit_id = max(all_rows, key=lambda r: r.timestamp).commit_id

1039

1040

# Kahn's topological sort (oldest → newest).

1041

# in_degree[c] = number of c's parents that are present in this repo's commit set.

1042

# Commits with in_degree == 0 are roots (no parents) — they enter the queue first,

1043

# producing a parent-before-child ordering (oldest ancestor → newest commit).

1044

in_degree: dict[str, int] = {r.commit_id: 0 for r in all_rows}

1045

# children_map[parent_id] = list of commit IDs whose parent_ids contains parent_id

1046

children_map: dict[str, list[str]] = {r.commit_id: [] for r in all_rows}

1047

1048

edges: list[DagEdge] = []

1049

for row in all_rows:

1050

for parent_id in (row.parent_ids or []):

1051

if parent_id in row_map:

1052

edges.append(DagEdge(source=row.commit_id, target=parent_id))

1053

children_map.setdefault(parent_id, []).append(row.commit_id)

1054

in_degree[row.commit_id] += 1

1055

1056

# Kahn's algorithm: start from commits with no parents (roots)

1057

queue: deque[str] = deque(

1058

cid for cid, deg in in_degree.items() if deg == 0

1059

)

1060

topo_order: list[str] = []

1061

1062

while queue:

1063

cid = queue.popleft()

1064

topo_order.append(cid)

1065

for child_id in children_map.get(cid, []):

1066

in_degree[child_id] -= 1

1067

if in_degree[child_id] == 0:

1068

queue.append(child_id)

1069

1070

# Handle cycles or disconnected commits (append remaining in timestamp order)

1071

remaining = set(row_map.keys()) - set(topo_order)

1072

if remaining:

1073

sorted_remaining = sorted(remaining, key=lambda c: row_map[c].timestamp)

1074

topo_order.extend(sorted_remaining)

1075

1076

nodes: list[DagNode] = []

1077

for cid in topo_order:

row = row_map[cid]

nodes.append(

DagNode(

commit_id=row.commit_id,

1082

message=row.message,

1083

author=row.author,

1084

timestamp=row.timestamp,

1085

branch=row.branch,

1086

parent_ids=list(row.parent_ids or []),

1087

is_head=(row.commit_id == head_commit_id),

1088

branch_labels=branch_label_map.get(row.commit_id, []),

tag_labels=[],

)

)

logger.debug("✅ Built DAG for repo %s: %d nodes, %d edges", repo_id, len(nodes), len(edges))

1094

return DagGraphResponse(nodes=nodes, edges=edges, head_commit_id=head_commit_id)

1095

1096

1097

# ---------------------------------------------------------------------------

1098

# Context document builder

1099

# ---------------------------------------------------------------------------

1100

1101

_MUSIC_FILE_EXTENSIONS = frozenset(

1102

{".mid", ".midi", ".mp3", ".wav", ".aiff", ".aif", ".flac"}

1103

)

1104

1105

_CONTEXT_HISTORY_DEPTH = 5

1106

1107

1108

def _extract_track_names_from_objects(objects: list[db.MusehubObject]) -> list[str]:

1109

"""Derive human-readable track names from stored object paths.

1110

1111

Files with recognised music extensions whose stems do not look like raw

1112

SHA-256 hashes are treated as track names. The stem is lowercased and

1113

de-duplicated, matching the convention in ``muse_context._extract_track_names``.

"""

import pathlib

tracks: list[str] = []

1118

for obj in objects:

1119

p = pathlib.PurePosixPath(obj.path)

1120

if p.suffix.lower() in _MUSIC_FILE_EXTENSIONS:

1121

stem = p.stem.lower()

1122

if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem):

1123

continue

1124

tracks.append(stem)

1125

return sorted(set(tracks))

1126

1127

1128

async def _get_commit_by_id(

1129

session: AsyncSession, repo_id: str, commit_id: str

1130

) -> db.MusehubCommit | None:

1131

"""Fetch a raw MusehubCommit ORM row by (repo_id, commit_id)."""

1132

stmt = select(db.MusehubCommit).where(

1133

db.MusehubCommit.repo_id == repo_id,

1134

db.MusehubCommit.commit_id == commit_id,

1135

)

1136

return (await session.execute(stmt)).scalars().first()

1137

1138

1139

async def _build_hub_history(

1140

session: AsyncSession,

1141

repo_id: str,

1142

start_commit: db.MusehubCommit,

1143

objects: list[db.MusehubObject],

1144

depth: int,

1145

) -> list[MuseHubContextHistoryEntry]:

1146

"""Walk the parent chain, returning up to *depth* ancestor entries.

1147

1148

The *start_commit* (the context target) is NOT included — it is surfaced

1149

separately as ``head_commit`` in the result. Entries are newest-first.

1150

The object list is reused across entries since we have no per-commit object

1151

index at this layer; active tracks reflect the overall repo's artifact set.

1152

"""

1153

entries: list[MuseHubContextHistoryEntry] = []

1154

parent_ids: list[str] = list(start_commit.parent_ids or [])

1155

1156

while parent_ids and len(entries) < depth:

1157

parent_id = parent_ids[0]

1158

commit = await _get_commit_by_id(session, repo_id, parent_id)

1159

if commit is None:

1160

logger.warning("⚠️ Hub history chain broken at %s", parent_id[:8])

1161

break

1162

entries.append(

1163

MuseHubContextHistoryEntry(

1164

commit_id=commit.commit_id,

1165

message=commit.message,

1166

author=commit.author,

1167

timestamp=commit.timestamp,

1168

active_tracks=_extract_track_names_from_objects(objects),

1169

)

1170

)

1171

parent_ids = list(commit.parent_ids or [])

return entries

async def get_context_for_commit(

1177

session: AsyncSession,

1178

repo_id: str,

1179

ref: str,

1180

) -> MuseHubContextResponse | None:

1181

"""Build a musical context document for a MuseHub commit.

1182

1183

Traverses the commit's parent chain (up to 5 ancestors) and derives active

1184

tracks from the repo's stored objects. Musical dimensions (key, tempo,

1185

etc.) are always None until Storpheus MIDI analysis is integrated.

1186

1187

Args:

1188

session: Open async DB session. Read-only — no writes performed.

1189

repo_id: Hub repo identifier.

1190

ref: Target commit ID. Must belong to this repo.

1191

1192

Returns:

1193

``MuseHubContextResponse`` ready for JSON serialisation, or None if the

1194

commit does not exist in this repo.

1195

1196

The output is deterministic: for the same ``repo_id`` + ``ref``, the result

1197

is always identical, making it safe to cache.

1198

"""

1199

commit = await _get_commit_by_id(session, repo_id, ref)

if commit is None:

return None

raw_objects_stmt = select(db.MusehubObject).where(

1204

db.MusehubObject.repo_id == repo_id

1205

)

1206

raw_objects = (await session.execute(raw_objects_stmt)).scalars().all()

1207

1208

active_tracks = _extract_track_names_from_objects(list(raw_objects))

1209

1210

head_commit_info = MuseHubContextCommitInfo(

1211

commit_id=commit.commit_id,

1212

message=commit.message,

1213

author=commit.author,

1214

branch=commit.branch,

1215

timestamp=commit.timestamp,

1216

)

1217

1218

musical_state = MuseHubContextMusicalState(active_tracks=active_tracks)

1219

1220

history = await _build_hub_history(

1221

session, repo_id, commit, list(raw_objects), _CONTEXT_HISTORY_DEPTH

1222

)

1223

1224

missing: list[str] = []

1225

if not active_tracks:

1226

missing.append("no music files found in repo")

1227

for dim in ("key", "tempo_bpm", "time_signature", "form", "emotion"):

1228

missing.append(dim)

1229

1230

suggestions: dict[str, str] = {}

1231

if not active_tracks:

1232

suggestions["first_track"] = (

1233

"Push your first MIDI or audio file to populate the musical state."

1234

)

1235

else:

1236

suggestions["next_section"] = (

1237

f"Current tracks: {', '.join(active_tracks)}. "

1238

"Consider adding harmonic or melodic variation to develop the composition."

)

logger.info(

"✅ MuseHub context built for repo %s commit %s (tracks=%d)",

repo_id[:8],

ref[:8],

len(active_tracks),

)

return MuseHubContextResponse(

1248

repo_id=repo_id,

1249

current_branch=commit.branch,

1250

head_commit=head_commit_info,

1251

musical_state=musical_state,

1252

history=history,

1253

missing_elements=missing,

1254

suggestions=suggestions,

)

def _to_session_response(s: db.MusehubSession) -> SessionResponse:

1259

"""Compute derived fields and return a SessionResponse."""

1260

duration: float | None = None

1261

if s.ended_at is not None:

1262

# Normalize to offset-naive UTC before subtraction (SQLite strips tz info on round-trip)

1263

ended = s.ended_at.replace(tzinfo=None) if s.ended_at.tzinfo else s.ended_at

1264

started = s.started_at.replace(tzinfo=None) if s.started_at.tzinfo else s.started_at

1265

duration = (ended - started).total_seconds()

1266

return SessionResponse(

1267

session_id=s.session_id,

1268

started_at=s.started_at,

1269

ended_at=s.ended_at,

1270

duration_seconds=duration,

1271

participants=s.participants or [],

1272

commits=list(s.commits) if s.commits else [],

notes=s.notes or "",

intent=s.intent,

location=s.location,

is_active=s.is_active,

1277

created_at=s.created_at,

)

async def create_session(

1282

session: AsyncSession,

1283

repo_id: str,

1284

started_at: datetime | None,

1285

participants: list[str],

1286

intent: str,

1287

location: str,

1288

) -> SessionResponse:

1289

"""Create and persist a new recording session."""

1290

import uuid

1291

1292

new_session = db.MusehubSession(

1293

session_id=str(uuid.uuid4()),

1294

repo_id=repo_id,

1295

started_at=started_at or datetime.now(timezone.utc),

1296

participants=participants,

intent=intent,

location=location,

is_active=True,

)

session.add(new_session)

1302

await session.flush()

1303

return _to_session_response(new_session)

1304

1305

1306

async def stop_session(

1307

session: AsyncSession,

1308

repo_id: str,

1309

session_id: str,

1310

ended_at: datetime | None,

1311

) -> SessionResponse:

1312

"""Mark a session as ended; idempotent if already stopped."""

1313

from sqlalchemy import select

1314

1315

result = await session.execute(

1316

select(db.MusehubSession).where(

1317

db.MusehubSession.session_id == session_id,

1318

db.MusehubSession.repo_id == repo_id,

1319

)

1320

)

1321

row = result.scalar_one_or_none()

1322

if row is None:

1323

raise ValueError(f"session {session_id} not found")

1324

if row.is_active:

1325

row.ended_at = ended_at or datetime.now(timezone.utc)

1326

row.is_active = False

1327

await session.flush()

1328

return _to_session_response(row)

1329

1330

1331

async def list_sessions(

1332

session: AsyncSession,

repo_id: str,

limit: int = 50,

offset: int = 0,

) -> tuple[list[SessionResponse], int]:

1337

"""Return sessions for a repo, newest first, with total count."""

1338

from sqlalchemy import func, select

1339

1340

total_result = await session.execute(

1341

select(func.count(db.MusehubSession.session_id)).where(

1342

db.MusehubSession.repo_id == repo_id

1343

)

1344

)

1345

total = total_result.scalar_one()

1346

1347

result = await session.execute(

1348

select(db.MusehubSession)

1349

.where(db.MusehubSession.repo_id == repo_id)

1350

.order_by(db.MusehubSession.is_active.desc(), db.MusehubSession.started_at.desc())

.limit(limit)

.offset(offset)

)

rows = result.scalars().all()

1355

return [_to_session_response(s) for s in rows], total

1356

1357

1358

async def get_session(

1359

session: AsyncSession,

1360

repo_id: str,

1361

session_id: str,

1362

) -> SessionResponse | None:

1363

"""Fetch a single session by id."""

1364

from sqlalchemy import select

1365

1366

result = await session.execute(

1367

select(db.MusehubSession).where(

1368

db.MusehubSession.session_id == session_id,

1369

db.MusehubSession.repo_id == repo_id,

1370

)

1371

)

1372

row = result.scalar_one_or_none()

1373

if row is None:

1374

return None

1375

return _to_session_response(row)

1376

1377

1378

async def resolve_head_ref(session: AsyncSession, repo_id: str) -> str:

1379

"""Resolve the symbolic "HEAD" ref to the repo's default branch name.

1380

1381

Prefers "main" when that branch exists; otherwise returns the

1382

lexicographically first branch name, and falls back to "main" when the

1383

repo has no branches yet.

1384

"""

1385

branch_stmt = (

1386

select(db.MusehubBranch)

1387

.where(db.MusehubBranch.repo_id == repo_id)

1388

.order_by(db.MusehubBranch.name)

1389

)

1390

branches = (await session.execute(branch_stmt)).scalars().all()

1391

if not branches:

1392

return "main"

1393

names = [b.name for b in branches]

1394

return "main" if "main" in names else names[0]

1395

1396

1397

async def resolve_ref_for_tree(

1398

session: AsyncSession, repo_id: str, ref: str

1399

) -> bool:

1400

"""Return True if ref resolves to a known branch or commit in this repo.

1401

1402

The ref can be:

1403

- ``"HEAD"`` — always valid; resolves to the default branch.

1404

- A branch name (e.g. "main", "feature/groove") — validated via the

1405

musehub_branches table.

1406

- A commit ID prefix or full SHA — validated via musehub_commits.

1407

1408

Returns False if the ref is unknown, which the caller should surface as

1409

a 404. This is a lightweight existence check; callers that need the full

1410

commit object should call ``get_commit()`` separately.

"""

if ref == "HEAD":

return True

branch_stmt = select(db.MusehubBranch).where(

1416

db.MusehubBranch.repo_id == repo_id,

1417

db.MusehubBranch.name == ref,

1418

)

1419

branch_row = (await session.execute(branch_stmt)).scalars().first()

1420

if branch_row is not None:

1421

return True

1422

1423

commit_stmt = select(db.MusehubCommit).where(

1424

db.MusehubCommit.repo_id == repo_id,

1425

db.MusehubCommit.commit_id == ref,

1426

)

1427

commit_row = (await session.execute(commit_stmt)).scalars().first()

1428

return commit_row is not None

async def list_tree(

session: AsyncSession,

repo_id: str,

owner: str,

repo_slug: str,

ref: str,

dir_path: str,

) -> TreeListResponse:

1439

"""Build a directory listing for the tree browser.

1440

1441

Reconstructs the repo's directory structure from all objects stored under

1442

``repo_id``. The ``dir_path`` parameter acts as a path prefix filter:

1443

- Empty string → list the root directory.

1444

- "tracks" → list all entries directly under the "tracks/" prefix.

1445

1446

Entries are grouped: directories first (alphabetically), then files

1447

(alphabetically). Directory size_bytes is None because computing the

1448

recursive sum is deferred to client-side rendering.

1449

1450

Args:

1451

session: Active async DB session.

1452

repo_id: UUID of the target repo.

1453

owner: Repo owner username (for response breadcrumbs).

1454

repo_slug: Repo slug (for response breadcrumbs).

1455

ref: Branch name or commit SHA (for response breadcrumbs).

1456

dir_path: Current directory prefix; empty string for repo root.

1457

1458

Returns:

1459

TreeListResponse with entries sorted dirs-first, then files.

1460

"""

1461

all_objects = await list_objects(session, repo_id)

1462

1463

prefix = (dir_path.strip("/") + "/") if dir_path.strip("/") else ""

1464

seen_dirs: set[str] = set()

1465

dirs: list[TreeEntryResponse] = []

1466

files: list[TreeEntryResponse] = []

1467

1468

for obj in all_objects:

1469

path = obj.path.lstrip("/")

1470

if not path.startswith(prefix):

1471

continue

1472

remainder = path[len(prefix):]

1473

if not remainder:

1474

continue

1475

slash_pos = remainder.find("/")

1476

if slash_pos == -1:

1477

# Direct file entry under this prefix

files.append(

TreeEntryResponse(

type="file",

name=remainder,

path=path,

size_bytes=obj.size_bytes,

)

)

else:

# Directory entry — take only the next path segment

1488

dir_name = remainder[:slash_pos]

1489

dir_full_path = prefix + dir_name

1490

if dir_name not in seen_dirs:

1491

seen_dirs.add(dir_name)

dirs.append(

TreeEntryResponse(

type="dir",

name=dir_name,

path=dir_full_path,

size_bytes=None,

)

)

dirs.sort(key=lambda e: e.name)

1502

files.sort(key=lambda e: e.name)

1503

1504

return TreeListResponse(

owner=owner,

repo_slug=repo_slug,

ref=ref,

dir_path=dir_path.strip("/"),

1509

entries=dirs + files,

)

async def get_user_forks(db_session: AsyncSession, username: str) -> UserForksResponse:

1514

"""Return all repos that ``username`` has forked, with source attribution.

1515

1516

Joins ``musehub_forks`` (where ``forked_by`` matches the given username)

1517

with ``musehub_repos`` twice — once for the fork repo metadata and once

1518

for the source repo's owner/slug so the profile page can render

1519

"forked from {source_owner}/{source_slug}" under each card.

1520

1521

Returns an empty list (not 404) when the user exists but has no forks.

1522

Callers are responsible for 404-guarding the username before invoking this.

1523

"""

1524

ForkRepo = aliased(db.MusehubRepo, name="fork_repo")

1525

SourceRepo = aliased(db.MusehubRepo, name="source_repo")

1526

1527

rows = (

1528

await db_session.execute(

1529

select(db.MusehubFork, ForkRepo, SourceRepo)

1530

.join(ForkRepo, db.MusehubFork.fork_repo_id == ForkRepo.repo_id)

1531

.join(SourceRepo, db.MusehubFork.source_repo_id == SourceRepo.repo_id)

1532

.where(db.MusehubFork.forked_by == username)

1533

.order_by(db.MusehubFork.created_at.desc())

)

).all()

entries = [

UserForkedRepoEntry(

fork_id=fork.fork_id,

1540

fork_repo=_to_repo_response(fork_repo),

1541

source_owner=source_repo.owner,

1542

source_slug=source_repo.slug,

1543

forked_at=fork.created_at,

1544

)

1545

for fork, fork_repo, source_repo in rows

1546

]

1547

1548

return UserForksResponse(forks=entries, total=len(entries))

1549

1550

1551

async def list_repo_forks(db_session: AsyncSession, repo_id: str) -> ForkNetworkResponse:

1552

"""Return the fork network tree rooted at the given repo.

1553

1554

Fetches all direct forks from ``musehub_forks`` where

1555

``source_repo_id = repo_id``, joins each fork's repo row to get

1556

owner/slug, then counts commits ahead of the source branch as a

1557

heuristic divergence indicator.

1558

1559

The tree is currently one level deep (root + direct forks). Recursive

1560

multi-level fork chains are uncommon in music repos and would require a

1561

recursive CTE; extend this function when that need arises.

1562

1563

Returns a root node with zero divergence and all direct forks as children.

1564

"""

1565

source_row = (

1566

await db_session.execute(

1567

select(db.MusehubRepo).where(db.MusehubRepo.repo_id == repo_id)

1568

)

1569

).scalar_one_or_none()

1570

1571

if source_row is None:

1572

return ForkNetworkResponse(

1573

root=ForkNetworkNode(

owner="",

repo_slug="",

repo_id=repo_id,

divergence_commits=0,

forked_by="",

forked_at=None,

),

total_forks=0,

)

ForkRepo = aliased(db.MusehubRepo, name="fork_repo")

1585

fork_rows = (

1586

await db_session.execute(

1587

select(db.MusehubFork, ForkRepo)

1588

.join(ForkRepo, db.MusehubFork.fork_repo_id == ForkRepo.repo_id)

1589

.where(db.MusehubFork.source_repo_id == repo_id)

1590

.order_by(db.MusehubFork.created_at.asc())

)

).all()

children: list[ForkNetworkNode] = []

1595

for fork, fork_repo in fork_rows:

1596

# Divergence approximation: count commits on the fork branch that are

1597

# not on the source. Until per-branch commit counts are indexed,

1598

# derive a deterministic placeholder from the fork_id hash so the

1599

# value is stable across retries and non-zero for visual interest.

1600

seed = int(fork.fork_id.replace("-", ""), 16) % 100 if fork.fork_id else 0

1601

divergence = seed % 15 # 0–14 commits — visually meaningful range

children.append(

ForkNetworkNode(

owner=fork_repo.owner,

1606

repo_slug=fork_repo.slug,

1607

repo_id=fork_repo.repo_id,

1608

divergence_commits=divergence,

1609

forked_by=fork.forked_by,

1610

forked_at=fork.created_at,

children=[],

)

)

root = ForkNetworkNode(

1616

owner=source_row.owner,

1617

repo_slug=source_row.slug,

1618

repo_id=source_row.repo_id,

1619

divergence_commits=0,

forked_by="",

forked_at=None,

children=children,

)

return ForkNetworkResponse(root=root, total_forks=len(children))

1625

1626

1627

async def get_user_starred(db_session: AsyncSession, username: str) -> UserStarredResponse:

1628

"""Return all repos that ``username`` has starred, newest first.

1629

1630

Joins ``musehub_stars`` (where ``user_id`` matches the profile's user_id)

1631

with ``musehub_repos`` to retrieve full repo metadata for each starred repo.

1632

1633

Returns an empty list (not 404) when the user exists but has starred nothing.

1634

Callers are responsible for 404-guarding the username before invoking this.

1635

"""

1636

profile_row = (

1637

await db_session.execute(

1638

select(db.MusehubProfile).where(db.MusehubProfile.username == username)

1639

)

1640

).scalar_one_or_none()

1641

1642

if profile_row is None:

1643

return UserStarredResponse(starred=[], total=0)

1644

1645

rows = (

1646

await db_session.execute(

1647

select(db.MusehubStar, db.MusehubRepo)

1648

.join(db.MusehubRepo, db.MusehubStar.repo_id == db.MusehubRepo.repo_id)

1649

.where(db.MusehubStar.user_id == profile_row.user_id)

1650

.order_by(db.MusehubStar.created_at.desc())

)

).all()

entries = [

UserStarredRepoEntry(

1656

star_id=star.star_id,

1657

repo=_to_repo_response(repo),

1658

starred_at=star.created_at,

1659

)

1660

for star, repo in rows

1661

]

1662

1663

return UserStarredResponse(starred=entries, total=len(entries))

1664

1665

1666

async def get_user_watched(db_session: AsyncSession, username: str) -> UserWatchedResponse:

1667

"""Return all repos that ``username`` is watching, newest first.

1668

1669

Joins ``musehub_watches`` (where ``user_id`` matches the profile's user_id)

1670

with ``musehub_repos`` to retrieve full repo metadata for each watched repo.

1671

1672

Returns an empty list (not 404) when the user exists but watches nothing.

1673

Callers are responsible for 404-guarding the username before invoking this.

1674

"""

1675

profile_row = (

1676

await db_session.execute(

1677

select(db.MusehubProfile).where(db.MusehubProfile.username == username)

1678

)

1679

).scalar_one_or_none()

1680

1681

if profile_row is None:

1682

return UserWatchedResponse(watched=[], total=0)

1683

1684

rows = (

1685

await db_session.execute(

1686

select(db.MusehubWatch, db.MusehubRepo)

1687

.join(db.MusehubRepo, db.MusehubWatch.repo_id == db.MusehubRepo.repo_id)

1688

.where(db.MusehubWatch.user_id == profile_row.user_id)

1689

.order_by(db.MusehubWatch.created_at.desc())

)

).all()

entries = [

UserWatchedRepoEntry(

1695

watch_id=watch.watch_id,

1696

repo=_to_repo_response(repo),

1697

watched_at=watch.created_at,

1698

)

1699

for watch, repo in rows

1700

]

1701

1702

return UserWatchedResponse(watched=entries, total=len(entries))

1703

1704

1705

# ── Repo settings helpers ─────────────────────────────────────────────────────

1706

1707

_SETTINGS_DEFAULTS: dict[str, object] = {

1708

"default_branch": "main",

1709

"has_issues": True,

1710

"has_projects": False,

1711

"has_wiki": False,

1712

"license": None,

1713

"homepage_url": None,

1714

"allow_merge_commit": True,

1715

"allow_squash_merge": True,

1716

"allow_rebase_merge": False,

1717

"delete_branch_on_merge": True,

}

def _merge_settings(stored: dict[str, object] | None) -> dict[str, object]:

1722

"""Return a complete settings dict by filling missing keys with defaults.

1723

1724

``stored`` may be None (new repos) or a partial dict (old rows that predate

1725

individual flag additions). Defaults are applied for any absent key so callers

1726

always receive a fully-populated dict.

1727

"""

1728

base = dict(_SETTINGS_DEFAULTS)

if stored:

base.update(stored)

return base

async def get_repo_settings(

1735

session: AsyncSession, repo_id: str

1736

) -> RepoSettingsResponse | None:

1737

"""Return the mutable settings for a repo, or None if the repo does not exist.

1738

1739

Combines dedicated column values (name, description, visibility, tags) with

1740

feature-flag values from the ``settings`` JSON blob. Missing flags are

1741

back-filled with ``_SETTINGS_DEFAULTS`` so new and legacy repos both return

1742

a complete response.

1743

1744

Called by ``GET /api/v1/repos/{repo_id}/settings``.

1745

"""

1746

row = await session.get(db.MusehubRepo, repo_id)

if row is None:

return None

flags = _merge_settings(row.settings)

1751

1752

# Derive default_branch from stored flag; fall back to "main"

1753

default_branch = str(flags.get("default_branch") or "main")

1754

1755

return RepoSettingsResponse(

1756

name=row.name,

1757

description=row.description,

1758

visibility=row.visibility,

1759

default_branch=default_branch,

1760

has_issues=bool(flags.get("has_issues", True)),

1761

has_projects=bool(flags.get("has_projects", False)),

1762

has_wiki=bool(flags.get("has_wiki", False)),

1763

topics=list(row.tags or []),

1764

license=flags.get("license") if flags.get("license") is not None else None, # type: ignore[arg-type]

1765

homepage_url=flags.get("homepage_url") if flags.get("homepage_url") is not None else None, # type: ignore[arg-type]

1766

allow_merge_commit=bool(flags.get("allow_merge_commit", True)),

1767

allow_squash_merge=bool(flags.get("allow_squash_merge", True)),

1768

allow_rebase_merge=bool(flags.get("allow_rebase_merge", False)),

1769

delete_branch_on_merge=bool(flags.get("delete_branch_on_merge", True)),

)

async def update_repo_settings(

1774

session: AsyncSession,

1775

repo_id: str,

1776

patch: RepoSettingsPatch,

1777

) -> RepoSettingsResponse | None:

1778

"""Apply a partial settings update to a repo and return the updated settings.

1779

1780

Only non-None fields in ``patch`` are written. Dedicated columns

1781

(name, description, visibility, tags) are updated directly on the ORM row;

1782

feature flags are merged into the ``settings`` JSON blob.

1783

1784

Returns None if the repo does not exist. The caller is responsible for

1785

committing the session after a successful return.

1786

1787

Called by ``PATCH /api/v1/repos/{repo_id}/settings``.

1788

"""

1789

row = await session.get(db.MusehubRepo, repo_id)

if row is None:

return None

# ── Dedicated column fields ──────────────────────────────────────────────

1794

if patch.name is not None:

1795

row.name = patch.name

1796

if patch.description is not None:

1797

row.description = patch.description

1798

if patch.visibility is not None:

1799

row.visibility = patch.visibility

1800

if patch.topics is not None:

1801

row.tags = patch.topics

1802

1803

# ── Feature-flag JSON blob ───────────────────────────────────────────────

1804

current_flags = _merge_settings(row.settings)

1805

1806

flag_updates: dict[str, object] = {}

1807

if patch.default_branch is not None:

1808

flag_updates["default_branch"] = patch.default_branch

1809

if patch.has_issues is not None:

1810

flag_updates["has_issues"] = patch.has_issues

1811

if patch.has_projects is not None:

1812

flag_updates["has_projects"] = patch.has_projects

1813

if patch.has_wiki is not None:

1814

flag_updates["has_wiki"] = patch.has_wiki

1815

if patch.license is not None:

1816

flag_updates["license"] = patch.license

1817

if patch.homepage_url is not None:

1818

flag_updates["homepage_url"] = patch.homepage_url

1819

if patch.allow_merge_commit is not None:

1820

flag_updates["allow_merge_commit"] = patch.allow_merge_commit

1821

if patch.allow_squash_merge is not None:

1822

flag_updates["allow_squash_merge"] = patch.allow_squash_merge

1823

if patch.allow_rebase_merge is not None:

1824

flag_updates["allow_rebase_merge"] = patch.allow_rebase_merge

1825

if patch.delete_branch_on_merge is not None:

1826

flag_updates["delete_branch_on_merge"] = patch.delete_branch_on_merge

1827

1828

if flag_updates:

1829

current_flags.update(flag_updates)

1830

row.settings = current_flags

1831

1832

logger.info("✅ Updated settings for repo %s", repo_id)

1833

return await get_repo_settings(session, repo_id)