musehub/api/routes/musehub/analysis.py · gabriel/musehub

1

"""Muse Hub Analysis API — agent-friendly structured JSON for all musical dimensions.

2

3

Endpoint summary:

4

GET /musehub/repos/{repo_id}/analysis/{ref} — all 13 dimensions

5

GET /musehub/repos/{repo_id}/analysis/{ref}/emotion-map — emotion map

6

GET /musehub/repos/{repo_id}/analysis/{ref}/recall?q=<query> — semantic recall

7

GET /musehub/repos/{repo_id}/analysis/{ref}/similarity — cross-ref similarity

8

GET /musehub/repos/{repo_id}/analysis/{ref}/emotion-diff?base=X — 8-axis emotion diff

9

GET /musehub/repos/{repo_id}/analysis/{ref}/dynamics/page — per-track dynamics page

10

GET /musehub/repos/{repo_id}/analysis/{ref}/{dimension} — one dimension

11

12

Supported dimensions (13):

13

harmony, dynamics, motifs, form, groove, emotion, chord-map, contour,

14

key, tempo, meter, similarity, divergence

15

16

Query params (both endpoints):

17

?track=<instrument> — restrict analysis to a named instrument track

18

?section=<label> — restrict analysis to a named musical section (e.g. chorus)

19

20

Route ordering note:

21

Specific fixed-segment routes (/emotion-map, /similarity, /dynamics/page) MUST be

22

registered before the /{dimension} catch-all so FastAPI matches them first. New

23

fixed-segment routes added in future batches must follow this same ordering rule.

24

25

Cache semantics:

26

Responses include ETag (MD5 of dimension + ref) and Last-Modified headers.

27

Agents may use these to avoid re-fetching unchanged analysis.

28

29

Auth: all endpoints require a valid JWT Bearer token (inherited from the

30

musehub router-level dependency). No business logic lives here — all

31

analysis is delegated to :mod:`musehub.services.musehub_analysis`.

32

"""

33

from __future__ import annotations

import hashlib

import logging

from datetime import datetime, timezone

38

39

from fastapi import APIRouter, Depends, HTTPException, Query, Response, status

40

from sqlalchemy.ext.asyncio import AsyncSession

41

42

from musehub.auth.dependencies import TokenClaims, optional_token, require_valid_token

43

from musehub.db import get_db

44

from musehub.models.musehub_analysis import (

45

ALL_DIMENSIONS,

46

AggregateAnalysisResponse,

AnalysisResponse,

DynamicsPageData,

EmotionDiffResponse,

EmotionMapResponse,

RefSimilarityResponse,

52

HarmonyAnalysisResponse,

53

RecallResponse,

54

)

55

from musehub.services import musehub_analysis, musehub_repository

56

57

logger = logging.getLogger(__name__)

router = APIRouter()

_LAST_MODIFIED = datetime(2026, 1, 1, tzinfo=timezone.utc).strftime("%a, %d %b %Y %H:%M:%S GMT")

62

63

64

def _etag(repo_id: str, ref: str, dimension: str) -> str:

65

"""Derive a stable ETag for a dimension+ref combination."""

66

raw = f"{repo_id}:{ref}:{dimension}"

67

return f'"{hashlib.md5(raw.encode()).hexdigest()}"' # noqa: S324 — non-crypto use

@router.get(

"/repos/{repo_id}/analysis/{ref}",

72

response_model=AggregateAnalysisResponse,

73

operation_id="getAnalysis",

74

summary="Aggregate analysis — all 13 musical dimensions for a ref",

75

description=(

76

"Returns structured JSON for all 13 musical dimensions of a Muse commit ref "

77

"in a single response. Agents that need a full musical picture should prefer "

78

"this endpoint over 13 sequential per-dimension requests."

79

),

80

)

81

async def get_aggregate_analysis(

repo_id: str,

ref: str,

response: Response,

track: str | None = Query(None, description="Instrument track filter, e.g. 'bass', 'keys'"),

86

section: str | None = Query(None, description="Section filter, e.g. 'chorus', 'verse_1'"),

87

db: AsyncSession = Depends(get_db),

88

claims: TokenClaims | None = Depends(optional_token),

89

) -> AggregateAnalysisResponse:

90

"""Return all 13 dimension analyses for a Muse repo ref.

91

92

The response envelope carries ``computed_at``, ``ref``, and per-dimension

93

:class:`~musehub.models.musehub_analysis.AnalysisResponse` entries.

94

Use ``?track=`` and ``?section=`` to narrow analysis to a specific instrument

95

or musical section.

96

"""

97

repo = await musehub_repository.get_repo(db, repo_id)

98

if repo is None:

99

raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")

100

if repo.visibility != "public" and claims is None:

101

raise HTTPException(

102

status_code=status.HTTP_401_UNAUTHORIZED,

103

detail="Authentication required to access private repos.",

104

headers={"WWW-Authenticate": "Bearer"},

105

)

106

107

result = musehub_analysis.compute_aggregate_analysis(

repo_id=repo_id,

ref=ref,

track=track,

section=section,

)

etag = _etag(repo_id, ref, "aggregate")

115

response.headers["ETag"] = etag

116

response.headers["Last-Modified"] = _LAST_MODIFIED

117

response.headers["Cache-Control"] = "private, max-age=60"

return result

@router.get(

"/repos/{repo_id}/analysis/{ref}/emotion-map",

123

response_model=EmotionMapResponse,

124

summary="Emotion map — energy/valence/tension/darkness across time and commits",

125

description=(

126

"Returns a full emotion map for a Muse repo ref, combining:\n"

127

"- **Per-beat evolution**: how energy, valence, tension, and darkness "

128

"change beat-by-beat within this ref.\n"

129

"- **Cross-commit trajectory**: aggregated emotion vectors for the 5 most "

130

"recent ancestor commits plus HEAD, enabling cross-version comparison.\n"

131

"- **Drift distances**: Euclidean distance in emotion space between "

132

"consecutive commits, with the dominant-change axis identified.\n"

133

"- **Narrative**: auto-generated text describing the emotional journey.\n"

134

"- **Source**: whether emotion data is explicit (tags), inferred, or mixed.\n\n"

135

"Use ``?track=`` and ``?section=`` to restrict analysis to a specific "

136

"instrument or musical section."

137

),

138

)

139

async def get_emotion_map(

repo_id: str,

ref: str,

response: Response,

track: str | None = Query(None, description="Instrument track filter, e.g. 'bass', 'keys'"),

144

section: str | None = Query(None, description="Section filter, e.g. 'chorus', 'verse_1'"),

145

db: AsyncSession = Depends(get_db),

146

_: TokenClaims = Depends(require_valid_token),

147

) -> EmotionMapResponse:

148

"""Return the full emotion map for a Muse repo ref.

149

150

The response combines intra-ref per-beat evolution, cross-commit trajectory,

151

drift distances, narrative text, and source attribution — everything the

152

MuseHub emotion map page needs in a single authenticated request.

153

154

Emotion vectors use four normalised axes (all 0.0–1.0):

155

- ``energy`` — compositional drive/activity level

156

- ``valence`` — brightness/positivity (0=dark, 1=bright)

157

- ``tension`` — harmonic and rhythmic tension

158

- ``darkness`` — brooding/ominous quality (inversely correlated with valence)

159

"""

160

repo = await musehub_repository.get_repo(db, repo_id)

161

if repo is None:

162

raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")

163

164

result = musehub_analysis.compute_emotion_map(

repo_id=repo_id,

ref=ref,

track=track,

section=section,

)

etag = _etag(repo_id, ref, "emotion-map")

172

response.headers["ETag"] = etag

173

response.headers["Last-Modified"] = _LAST_MODIFIED

174

response.headers["Cache-Control"] = "private, max-age=60"

return result

# NOTE: emotion-diff is registered HERE (before the generic {dimension} catch-all)

179

# because FastAPI matches routes in registration order — a literal segment like

180

# "emotion-diff" does NOT automatically take precedence over a path parameter.

181

@router.get(

182

"/repos/{repo_id}/analysis/{ref}/emotion-diff",

183

response_model=EmotionDiffResponse,

184

operation_id="getEmotionDiff",

185

summary="Emotion diff — 8-axis emotional radar comparing two Muse refs",

186

description=(

187

"Returns an 8-axis emotional diff between ``ref`` (head) and ``base`` (baseline). "

188

"The eight axes are: valence, energy, tension, complexity, warmth, brightness, "

189

"darkness, and playfulness — all normalised to [0, 1].\n\n"

190

"``delta`` is the signed per-axis difference (head − base); positive means the "

191

"head commit increased that emotional dimension.\n\n"

192

"``interpretation`` provides a natural-language summary of the dominant shifts "

193

"for human and agent readability.\n\n"

194

"Maps to the ``muse emotion-diff`` CLI command and the PR detail emotion radar."

195

),

196

)

197

async def get_emotion_diff(

repo_id: str,

ref: str,

response: Response,

base: str = Query(

...,

description="Base ref to compare against, e.g. 'main', 'main~1', or a commit SHA",

204

),

205

db: AsyncSession = Depends(get_db),

206

_: TokenClaims = Depends(require_valid_token),

207

) -> EmotionDiffResponse:

208

"""Return an 8-axis emotional diff between two Muse commit refs.

209

210

Compares the emotional character of ``ref`` (head) against ``base``,

211

returning per-axis emotion vectors for each ref, their signed delta, and a

212

natural-language interpretation of the dominant shifts.

213

214

Requires authentication — emotion diff reveals musical context that may be

215

private to the repo owner.

216

217

The eight dimensions extend the four-axis emotion-map model with

218

``complexity``, ``warmth``, ``brightness``, and ``playfulness`` so the PR

219

detail page can render a full radar chart without a separate request.

220

"""

221

repo = await musehub_repository.get_repo(db, repo_id)

222

if repo is None:

223

raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")

224

225

result = musehub_analysis.compute_emotion_diff(

repo_id=repo_id,

head_ref=ref,

base_ref=base,

)

etag = _etag(repo_id, f"{base}..{ref}", "emotion-diff")

232

response.headers["ETag"] = etag

233

response.headers["Last-Modified"] = _LAST_MODIFIED

234

response.headers["Cache-Control"] = "private, max-age=60"

return result

@router.get(

"/repos/{repo_id}/analysis/{ref}/recall",

240

response_model=RecallResponse,

241

operation_id="getAnalysisRecall",

242

summary="Semantic recall — find commits similar to a natural-language query",

243

description=(

244

"Queries the musical feature vector store for commits semantically similar "

245

"to a natural-language description.\n\n"

246

"**Example:** ``?q=jazzy+chord+progression+with+swing+groove``\n\n"

247

"Results are ranked by cosine similarity in the 128-dim musical feature "

248

"embedding space. Each match includes the commit ID, message, branch, "

249

"similarity score (0–1), and the musical dimensions most responsible for "

250

"the match.\n\n"

251

"Use ``?limit=N`` to control how many results are returned (default 10, max 50). "

252

"Authentication is required — private repos are never surfaced without a valid "

"Bearer token."

),

)

async def get_analysis_recall(

repo_id: str,

ref: str,

response: Response,

q: str = Query(..., description="Natural-language query, e.g. 'jazzy chord progression with swing'"),

261

limit: int = Query(10, ge=1, le=50, description="Maximum number of results (1–50)"),

262

db: AsyncSession = Depends(get_db),

263

_: TokenClaims = Depends(require_valid_token),

264

) -> RecallResponse:

265

"""Return commits semantically matching a natural-language query.

266

267

Embeds ``q`` into the 128-dim musical feature space and retrieves the

268

``limit`` most similar commits reachable from ``ref``. Authentication is

269

required unconditionally — the recall index may surface private content.

270

271

The response is deterministic for a given (repo_id, ref, q) triple so

272

agents receive consistent results across retries without hitting the

273

vector store redundantly.

274

275

Args:

276

repo_id: Muse Hub repo UUID.

277

ref: Muse commit ref scoping the search.

278

q: Natural-language query string.

279

limit: Result count cap (1–50, default 10).

280

"""

281

repo = await musehub_repository.get_repo(db, repo_id)

282

if repo is None:

283

raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")

284

285

result = musehub_analysis.compute_recall(

repo_id=repo_id,

ref=ref,

query=q,

limit=limit,

)

etag = _etag(repo_id, ref, f"recall:{q}")

293

response.headers["ETag"] = etag

294

response.headers["Last-Modified"] = _LAST_MODIFIED

295

response.headers["Cache-Control"] = "private, max-age=30"

return result

@router.get(

"/repos/{repo_id}/analysis/{ref}/similarity",

301

response_model=RefSimilarityResponse,

302

operation_id="getAnalysisRefSimilarity",

303

summary="Cross-ref similarity — compare two Muse refs across 10 musical dimensions",

304

description=(

305

"Compares two Muse refs (branches, tags, or commit hashes) and returns a "

306

"per-dimension similarity score plus an overall weighted mean.\n\n"

307

"The ``compare`` query parameter is **required** — it specifies the second "

308

"ref to compare against the ``{ref}`` path parameter.\n\n"

309

"**10 dimensions scored (0–1 each):** pitch_distribution, rhythm_pattern, "

310

"tempo, dynamics, harmonic_content, form, instrument_blend, groove, "

311

"contour, emotion.\n\n"

312

"``overall_similarity`` is a weighted mean of all 10 dimensions. "

313

"``interpretation`` is auto-generated text suitable for display and for "

314

"agent reasoning without further computation.\n\n"

315

"Maps to ``muse similarity --base {ref} --head {ref2} --dimensions all``."

316

),

317

)

318

async def get_ref_similarity(

repo_id: str,

ref: str,

response: Response,

compare: str = Query(

323

...,

324

description="Second ref to compare against (branch name, tag, or commit hash)",

325

),

326

db: AsyncSession = Depends(get_db),

327

claims: TokenClaims | None = Depends(optional_token),

328

) -> RefSimilarityResponse:

329

"""Return cross-ref similarity between ``ref`` and ``compare`` for a Muse repo.

330

331

Scores all 10 musical dimensions independently, then computes an overall

332

weighted mean. The ``interpretation`` field provides a human-readable

333

summary identifying the dominant divergence axis when overall similarity

334

is below 0.90.

335

336

Authentication follows the same rules as other analysis endpoints:

337

public repos are readable without a token; private repos require a valid

338

JWT Bearer token.

339

340

Cache semantics: ETag is derived from ``repo_id``, ``ref``, and

341

``compare`` so that the same pair always returns the same cached

342

response until invalidated by a new commit.

343

344

Route ordering: this route MUST remain registered before ``/{dimension}``

345

so FastAPI matches the fixed ``/similarity`` segment before the catch-all

346

parameter captures it.

347

"""

348

repo = await musehub_repository.get_repo(db, repo_id)

349

if repo is None:

350

raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")

351

if repo.visibility != "public" and claims is None:

352

raise HTTPException(

353

status_code=status.HTTP_401_UNAUTHORIZED,

354

detail="Authentication required to access private repos.",

355

headers={"WWW-Authenticate": "Bearer"},

356

)

357

358

result = musehub_analysis.compute_ref_similarity(

repo_id=repo_id,

base_ref=ref,

compare_ref=compare,

)

etag = _etag(repo_id, f"{ref}:{compare}", "similarity")

365

response.headers["ETag"] = etag

366

response.headers["Last-Modified"] = _LAST_MODIFIED

367

response.headers["Cache-Control"] = "private, max-age=60"

return result

@router.get(

"/repos/{repo_id}/analysis/{ref}/{dimension}",

373

response_model=AnalysisResponse,

374

operation_id="getAnalysisDimension",

375

summary="Single-dimension analysis for a Muse ref",

376

description=(

377

"Returns structured JSON for one of the 13 supported musical dimensions. "

378

"Supported dimensions: harmony, dynamics, motifs, form, groove, emotion, "

379

"chord-map, contour, key, tempo, meter, similarity, divergence. "

380

"Returns 404 for unknown dimension names."

381

),

382

)

383

async def get_dimension_analysis(

repo_id: str,

ref: str,

dimension: str,

response: Response,

track: str | None = Query(None, description="Instrument track filter, e.g. 'bass', 'keys'"),

389

section: str | None = Query(None, description="Section filter, e.g. 'chorus', 'verse_1'"),

390

db: AsyncSession = Depends(get_db),

391

claims: TokenClaims | None = Depends(optional_token),

392

) -> AnalysisResponse:

393

"""Return analysis for one musical dimension of a Muse repo ref.

394

395

The ``dimension`` path parameter must be one of the 13 supported values.

396

Returns HTTP 404 for unknown dimension names so agents receive a clear

397

signal rather than a generic 422 validation error.

398

399

The ``data`` field in the response is the dimension-specific typed model

400

(e.g. :class:`~musehub.models.musehub_analysis.HarmonyData` for ``harmony``).

401

"""

402

if dimension not in ALL_DIMENSIONS:

403

raise HTTPException(

404

status_code=status.HTTP_404_NOT_FOUND,

405

detail=f"Unknown dimension {dimension!r}. Supported: {', '.join(ALL_DIMENSIONS)}",

406

)

407

408

repo = await musehub_repository.get_repo(db, repo_id)

409

if repo is None:

410

raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")

411

if repo.visibility != "public" and claims is None:

412

raise HTTPException(

413

status_code=status.HTTP_401_UNAUTHORIZED,

414

detail="Authentication required to access private repos.",

415

headers={"WWW-Authenticate": "Bearer"},

416

)

417

418

result = musehub_analysis.compute_analysis_response(

repo_id=repo_id,

dimension=dimension,

ref=ref,

track=track,

section=section,

)

etag = _etag(repo_id, ref, dimension)

427

response.headers["ETag"] = etag

428

response.headers["Last-Modified"] = _LAST_MODIFIED

429

response.headers["Cache-Control"] = "private, max-age=60"

return result

@router.get(

"/repos/{repo_id}/analysis/{ref}/dynamics/page",

435

response_model=DynamicsPageData,

436

operation_id="getAnalysisDynamicsPage",

437

summary="Per-track dynamics page data for the Dynamics Analysis page",

438

description=(

439

"Returns enriched per-track dynamic analysis: velocity profiles, arc "

440

"classifications, peak velocity, velocity range, and cross-track loudness "

441

"data. Consumed by the Dynamics Analysis web page and by AI agents that "

442

"need per-track dynamic context for orchestration decisions. "

443

"Use ``?track=<name>`` to restrict to a single instrument track. "

444

"Use ``?section=<label>`` to restrict to a musical section."

445

),

446

)

447

async def get_dynamics_page_data(

repo_id: str,

ref: str,

response: Response,

track: str | None = Query(None, description="Instrument track filter, e.g. 'bass', 'keys'"),

452

section: str | None = Query(None, description="Section filter, e.g. 'chorus', 'verse_1'"),

453

db: AsyncSession = Depends(get_db),

454

claims: TokenClaims | None = Depends(optional_token),

455

) -> DynamicsPageData:

456

"""Return per-track dynamics data for the Dynamics Analysis web page.

457

458

Unlike the single-dimension ``dynamics`` endpoint (which returns aggregate

459

metrics for the whole piece), this endpoint returns one

460

:class:`~musehub.models.musehub_analysis.TrackDynamicsProfile` per active

461

instrument track so the page can render individual velocity graphs and arc

462

badges.

463

464

Cache semantics match the other analysis endpoints: ETag is derived from

465

``repo_id``, ``ref``, and the ``"dynamics-page"`` sentinel.

466

"""

467

repo = await musehub_repository.get_repo(db, repo_id)

468

if repo is None:

469

raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")

470

if repo.visibility != "public" and claims is None:

471

raise HTTPException(

472

status_code=status.HTTP_401_UNAUTHORIZED,

473

detail="Authentication required to access private repos.",

474

headers={"WWW-Authenticate": "Bearer"},

475

)

476

477

result = musehub_analysis.compute_dynamics_page_data(

repo_id=repo_id,

ref=ref,

track=track,

section=section,

)

etag = _etag(repo_id, ref, "dynamics-page")

485

response.headers["ETag"] = etag

486

response.headers["Last-Modified"] = _LAST_MODIFIED

487

response.headers["Cache-Control"] = "private, max-age=60"

return result

# Dedicated harmony router — must be included BEFORE the main analysis router in

492

# __init__.py so this specific path takes priority over the generic /{dimension}

493

# catch-all route. See: musehub/api/routes/musehub/__init__.py.

494

harmony_router = APIRouter()

@harmony_router.get(

"/repos/{repo_id}/analysis/{ref}/harmony",

499

response_model=HarmonyAnalysisResponse,

500

operation_id="getAnalysisHarmony",

501

summary="Harmonic analysis — Roman numerals, cadences, and modulations for a ref",

502

description=(

503

"Returns a Roman-numeral-centric harmonic analysis of a Muse commit ref. "

504

"Maps to the ``muse harmony --ref {ref}`` CLI command.\n\n"

505

"Unlike the generic ``/analysis/{ref}/harmony`` dimension (which returns "

506

"raw chord symbols and a tension curve), this endpoint returns:\n\n"

507

"- **key** and **mode**: detected tonal centre and scale type\n"

508

"- **roman_numerals**: each chord event labelled with scale degree, root, "

509

"quality, and tonal function (tonic / subdominant / dominant)\n"

510

"- **cadences**: detected phrase-ending cadence types and their beat positions\n"

511

"- **modulations**: key-area changes with from/to key and pivot chord\n"

512

"- **harmonic_rhythm_bpm**: rate of chord changes in chords per minute\n\n"

513

"Agents use this to compose harmonically coherent continuations that respect "

514

"existing tonal narrative, cadence structure, and phrase boundaries. "

515

"Use ``?track=<instrument>`` or ``?section=<label>`` to narrow the scope."

516

),

517

)

518

async def get_harmony_analysis(

repo_id: str,

ref: str,

response: Response,

track: str | None = Query(None, description="Instrument track filter, e.g. 'bass', 'keys'"),

523

section: str | None = Query(None, description="Section filter, e.g. 'chorus', 'verse_1'"),

524

db: AsyncSession = Depends(get_db),

525

claims: TokenClaims | None = Depends(optional_token),

526

) -> HarmonyAnalysisResponse:

527

"""Return dedicated harmonic analysis for a Muse repo ref.

528

529

Provides a Roman-numeral view of the harmonic content — scale degrees,

530

tonal functions, cadence positions, and detected modulations — structured

531

for agent consumption. Maps to ``muse harmony --ref {ref}``.

532

533

Access control mirrors the other analysis endpoints: public repos are

534

accessible without authentication; private repos require a valid JWT.

535

"""

536

repo = await musehub_repository.get_repo(db, repo_id)

537

if repo is None:

538

raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")

539

if repo.visibility != "public" and claims is None:

540

raise HTTPException(

541

status_code=status.HTTP_401_UNAUTHORIZED,

542

detail="Authentication required to access private repos.",

543

headers={"WWW-Authenticate": "Bearer"},

544

)

545

546

result = musehub_analysis.compute_harmony_analysis(

repo_id=repo_id,

ref=ref,

track=track,

section=section,

)

etag = _etag(repo_id, ref, "harmony")

554

response.headers["ETag"] = etag

555

response.headers["Last-Modified"] = _LAST_MODIFIED

556

response.headers["Cache-Control"] = "private, max-age=60"

557

return result