gabriel / musehub public
analysis.py python
557 lines 23.2 KB
e6fad116 Remove all Stori, Maestro, and AgentCeption references; rebrand to Muse VCS Gabriel Cardona <gabriel@tellurstori.com> 6d ago
1 """Muse Hub Analysis API — agent-friendly structured JSON for all musical dimensions.
2
3 Endpoint summary:
4 GET /musehub/repos/{repo_id}/analysis/{ref} — all 13 dimensions
5 GET /musehub/repos/{repo_id}/analysis/{ref}/emotion-map — emotion map
6 GET /musehub/repos/{repo_id}/analysis/{ref}/recall?q=<query> — semantic recall
7 GET /musehub/repos/{repo_id}/analysis/{ref}/similarity — cross-ref similarity
8 GET /musehub/repos/{repo_id}/analysis/{ref}/emotion-diff?base=X — 8-axis emotion diff
9 GET /musehub/repos/{repo_id}/analysis/{ref}/dynamics/page — per-track dynamics page
10 GET /musehub/repos/{repo_id}/analysis/{ref}/{dimension} — one dimension
11
12 Supported dimensions (13):
13 harmony, dynamics, motifs, form, groove, emotion, chord-map, contour,
14 key, tempo, meter, similarity, divergence
15
16 Query params (both endpoints):
17 ?track=<instrument> — restrict analysis to a named instrument track
18 ?section=<label> — restrict analysis to a named musical section (e.g. chorus)
19
20 Route ordering note:
21 Specific fixed-segment routes (/emotion-map, /similarity, /dynamics/page) MUST be
22 registered before the /{dimension} catch-all so FastAPI matches them first. New
23 fixed-segment routes added in future batches must follow this same ordering rule.
24
25 Cache semantics:
26 Responses include ETag (MD5 of dimension + ref) and Last-Modified headers.
27 Agents may use these to avoid re-fetching unchanged analysis.
28
29 Auth: all endpoints require a valid JWT Bearer token (inherited from the
30 musehub router-level dependency). No business logic lives here — all
31 analysis is delegated to :mod:`musehub.services.musehub_analysis`.
32 """
33 from __future__ import annotations
34
35 import hashlib
36 import logging
37 from datetime import datetime, timezone
38
39 from fastapi import APIRouter, Depends, HTTPException, Query, Response, status
40 from sqlalchemy.ext.asyncio import AsyncSession
41
42 from musehub.auth.dependencies import TokenClaims, optional_token, require_valid_token
43 from musehub.db import get_db
44 from musehub.models.musehub_analysis import (
45 ALL_DIMENSIONS,
46 AggregateAnalysisResponse,
47 AnalysisResponse,
48 DynamicsPageData,
49 EmotionDiffResponse,
50 EmotionMapResponse,
51 RefSimilarityResponse,
52 HarmonyAnalysisResponse,
53 RecallResponse,
54 )
55 from musehub.services import musehub_analysis, musehub_repository
56
57 logger = logging.getLogger(__name__)
58
59 router = APIRouter()
60
61 _LAST_MODIFIED = datetime(2026, 1, 1, tzinfo=timezone.utc).strftime("%a, %d %b %Y %H:%M:%S GMT")
62
63
64 def _etag(repo_id: str, ref: str, dimension: str) -> str:
65 """Derive a stable ETag for a dimension+ref combination."""
66 raw = f"{repo_id}:{ref}:{dimension}"
67 return f'"{hashlib.md5(raw.encode()).hexdigest()}"' # noqa: S324 — non-crypto use
68
69
70 @router.get(
71 "/repos/{repo_id}/analysis/{ref}",
72 response_model=AggregateAnalysisResponse,
73 operation_id="getAnalysis",
74 summary="Aggregate analysis — all 13 musical dimensions for a ref",
75 description=(
76 "Returns structured JSON for all 13 musical dimensions of a Muse commit ref "
77 "in a single response. Agents that need a full musical picture should prefer "
78 "this endpoint over 13 sequential per-dimension requests."
79 ),
80 )
81 async def get_aggregate_analysis(
82 repo_id: str,
83 ref: str,
84 response: Response,
85 track: str | None = Query(None, description="Instrument track filter, e.g. 'bass', 'keys'"),
86 section: str | None = Query(None, description="Section filter, e.g. 'chorus', 'verse_1'"),
87 db: AsyncSession = Depends(get_db),
88 claims: TokenClaims | None = Depends(optional_token),
89 ) -> AggregateAnalysisResponse:
90 """Return all 13 dimension analyses for a Muse repo ref.
91
92 The response envelope carries ``computed_at``, ``ref``, and per-dimension
93 :class:`~musehub.models.musehub_analysis.AnalysisResponse` entries.
94 Use ``?track=`` and ``?section=`` to narrow analysis to a specific instrument
95 or musical section.
96 """
97 repo = await musehub_repository.get_repo(db, repo_id)
98 if repo is None:
99 raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")
100 if repo.visibility != "public" and claims is None:
101 raise HTTPException(
102 status_code=status.HTTP_401_UNAUTHORIZED,
103 detail="Authentication required to access private repos.",
104 headers={"WWW-Authenticate": "Bearer"},
105 )
106
107 result = musehub_analysis.compute_aggregate_analysis(
108 repo_id=repo_id,
109 ref=ref,
110 track=track,
111 section=section,
112 )
113
114 etag = _etag(repo_id, ref, "aggregate")
115 response.headers["ETag"] = etag
116 response.headers["Last-Modified"] = _LAST_MODIFIED
117 response.headers["Cache-Control"] = "private, max-age=60"
118 return result
119
120
121 @router.get(
122 "/repos/{repo_id}/analysis/{ref}/emotion-map",
123 response_model=EmotionMapResponse,
124 summary="Emotion map — energy/valence/tension/darkness across time and commits",
125 description=(
126 "Returns a full emotion map for a Muse repo ref, combining:\n"
127 "- **Per-beat evolution**: how energy, valence, tension, and darkness "
128 "change beat-by-beat within this ref.\n"
129 "- **Cross-commit trajectory**: aggregated emotion vectors for the 5 most "
130 "recent ancestor commits plus HEAD, enabling cross-version comparison.\n"
131 "- **Drift distances**: Euclidean distance in emotion space between "
132 "consecutive commits, with the dominant-change axis identified.\n"
133 "- **Narrative**: auto-generated text describing the emotional journey.\n"
134 "- **Source**: whether emotion data is explicit (tags), inferred, or mixed.\n\n"
135 "Use ``?track=`` and ``?section=`` to restrict analysis to a specific "
136 "instrument or musical section."
137 ),
138 )
139 async def get_emotion_map(
140 repo_id: str,
141 ref: str,
142 response: Response,
143 track: str | None = Query(None, description="Instrument track filter, e.g. 'bass', 'keys'"),
144 section: str | None = Query(None, description="Section filter, e.g. 'chorus', 'verse_1'"),
145 db: AsyncSession = Depends(get_db),
146 _: TokenClaims = Depends(require_valid_token),
147 ) -> EmotionMapResponse:
148 """Return the full emotion map for a Muse repo ref.
149
150 The response combines intra-ref per-beat evolution, cross-commit trajectory,
151 drift distances, narrative text, and source attribution — everything the
152 MuseHub emotion map page needs in a single authenticated request.
153
154 Emotion vectors use four normalised axes (all 0.0–1.0):
155 - ``energy`` — compositional drive/activity level
156 - ``valence`` — brightness/positivity (0=dark, 1=bright)
157 - ``tension`` — harmonic and rhythmic tension
158 - ``darkness`` — brooding/ominous quality (inversely correlated with valence)
159 """
160 repo = await musehub_repository.get_repo(db, repo_id)
161 if repo is None:
162 raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")
163
164 result = musehub_analysis.compute_emotion_map(
165 repo_id=repo_id,
166 ref=ref,
167 track=track,
168 section=section,
169 )
170
171 etag = _etag(repo_id, ref, "emotion-map")
172 response.headers["ETag"] = etag
173 response.headers["Last-Modified"] = _LAST_MODIFIED
174 response.headers["Cache-Control"] = "private, max-age=60"
175 return result
176
177
178 # NOTE: emotion-diff is registered HERE (before the generic {dimension} catch-all)
179 # because FastAPI matches routes in registration order — a literal segment like
180 # "emotion-diff" does NOT automatically take precedence over a path parameter.
181 @router.get(
182 "/repos/{repo_id}/analysis/{ref}/emotion-diff",
183 response_model=EmotionDiffResponse,
184 operation_id="getEmotionDiff",
185 summary="Emotion diff — 8-axis emotional radar comparing two Muse refs",
186 description=(
187 "Returns an 8-axis emotional diff between ``ref`` (head) and ``base`` (baseline). "
188 "The eight axes are: valence, energy, tension, complexity, warmth, brightness, "
189 "darkness, and playfulness — all normalised to [0, 1].\n\n"
190 "``delta`` is the signed per-axis difference (head − base); positive means the "
191 "head commit increased that emotional dimension.\n\n"
192 "``interpretation`` provides a natural-language summary of the dominant shifts "
193 "for human and agent readability.\n\n"
194 "Maps to the ``muse emotion-diff`` CLI command and the PR detail emotion radar."
195 ),
196 )
197 async def get_emotion_diff(
198 repo_id: str,
199 ref: str,
200 response: Response,
201 base: str = Query(
202 ...,
203 description="Base ref to compare against, e.g. 'main', 'main~1', or a commit SHA",
204 ),
205 db: AsyncSession = Depends(get_db),
206 _: TokenClaims = Depends(require_valid_token),
207 ) -> EmotionDiffResponse:
208 """Return an 8-axis emotional diff between two Muse commit refs.
209
210 Compares the emotional character of ``ref`` (head) against ``base``,
211 returning per-axis emotion vectors for each ref, their signed delta, and a
212 natural-language interpretation of the dominant shifts.
213
214 Requires authentication — emotion diff reveals musical context that may be
215 private to the repo owner.
216
217 The eight dimensions extend the four-axis emotion-map model with
218 ``complexity``, ``warmth``, ``brightness``, and ``playfulness`` so the PR
219 detail page can render a full radar chart without a separate request.
220 """
221 repo = await musehub_repository.get_repo(db, repo_id)
222 if repo is None:
223 raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")
224
225 result = musehub_analysis.compute_emotion_diff(
226 repo_id=repo_id,
227 head_ref=ref,
228 base_ref=base,
229 )
230
231 etag = _etag(repo_id, f"{base}..{ref}", "emotion-diff")
232 response.headers["ETag"] = etag
233 response.headers["Last-Modified"] = _LAST_MODIFIED
234 response.headers["Cache-Control"] = "private, max-age=60"
235 return result
236
237
238 @router.get(
239 "/repos/{repo_id}/analysis/{ref}/recall",
240 response_model=RecallResponse,
241 operation_id="getAnalysisRecall",
242 summary="Semantic recall — find commits similar to a natural-language query",
243 description=(
244 "Queries the musical feature vector store for commits semantically similar "
245 "to a natural-language description.\n\n"
246 "**Example:** ``?q=jazzy+chord+progression+with+swing+groove``\n\n"
247 "Results are ranked by cosine similarity in the 128-dim musical feature "
248 "embedding space. Each match includes the commit ID, message, branch, "
249 "similarity score (0–1), and the musical dimensions most responsible for "
250 "the match.\n\n"
251 "Use ``?limit=N`` to control how many results are returned (default 10, max 50). "
252 "Authentication is required — private repos are never surfaced without a valid "
253 "Bearer token."
254 ),
255 )
256 async def get_analysis_recall(
257 repo_id: str,
258 ref: str,
259 response: Response,
260 q: str = Query(..., description="Natural-language query, e.g. 'jazzy chord progression with swing'"),
261 limit: int = Query(10, ge=1, le=50, description="Maximum number of results (1–50)"),
262 db: AsyncSession = Depends(get_db),
263 _: TokenClaims = Depends(require_valid_token),
264 ) -> RecallResponse:
265 """Return commits semantically matching a natural-language query.
266
267 Embeds ``q`` into the 128-dim musical feature space and retrieves the
268 ``limit`` most similar commits reachable from ``ref``. Authentication is
269 required unconditionally — the recall index may surface private content.
270
271 The response is deterministic for a given (repo_id, ref, q) triple so
272 agents receive consistent results across retries without hitting the
273 vector store redundantly.
274
275 Args:
276 repo_id: Muse Hub repo UUID.
277 ref: Muse commit ref scoping the search.
278 q: Natural-language query string.
279 limit: Result count cap (1–50, default 10).
280 """
281 repo = await musehub_repository.get_repo(db, repo_id)
282 if repo is None:
283 raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")
284
285 result = musehub_analysis.compute_recall(
286 repo_id=repo_id,
287 ref=ref,
288 query=q,
289 limit=limit,
290 )
291
292 etag = _etag(repo_id, ref, f"recall:{q}")
293 response.headers["ETag"] = etag
294 response.headers["Last-Modified"] = _LAST_MODIFIED
295 response.headers["Cache-Control"] = "private, max-age=30"
296 return result
297
298
299 @router.get(
300 "/repos/{repo_id}/analysis/{ref}/similarity",
301 response_model=RefSimilarityResponse,
302 operation_id="getAnalysisRefSimilarity",
303 summary="Cross-ref similarity — compare two Muse refs across 10 musical dimensions",
304 description=(
305 "Compares two Muse refs (branches, tags, or commit hashes) and returns a "
306 "per-dimension similarity score plus an overall weighted mean.\n\n"
307 "The ``compare`` query parameter is **required** — it specifies the second "
308 "ref to compare against the ``{ref}`` path parameter.\n\n"
309 "**10 dimensions scored (0–1 each):** pitch_distribution, rhythm_pattern, "
310 "tempo, dynamics, harmonic_content, form, instrument_blend, groove, "
311 "contour, emotion.\n\n"
312 "``overall_similarity`` is a weighted mean of all 10 dimensions. "
313 "``interpretation`` is auto-generated text suitable for display and for "
314 "agent reasoning without further computation.\n\n"
315 "Maps to ``muse similarity --base {ref} --head {ref2} --dimensions all``."
316 ),
317 )
318 async def get_ref_similarity(
319 repo_id: str,
320 ref: str,
321 response: Response,
322 compare: str = Query(
323 ...,
324 description="Second ref to compare against (branch name, tag, or commit hash)",
325 ),
326 db: AsyncSession = Depends(get_db),
327 claims: TokenClaims | None = Depends(optional_token),
328 ) -> RefSimilarityResponse:
329 """Return cross-ref similarity between ``ref`` and ``compare`` for a Muse repo.
330
331 Scores all 10 musical dimensions independently, then computes an overall
332 weighted mean. The ``interpretation`` field provides a human-readable
333 summary identifying the dominant divergence axis when overall similarity
334 is below 0.90.
335
336 Authentication follows the same rules as other analysis endpoints:
337 public repos are readable without a token; private repos require a valid
338 JWT Bearer token.
339
340 Cache semantics: ETag is derived from ``repo_id``, ``ref``, and
341 ``compare`` so that the same pair always returns the same cached
342 response until invalidated by a new commit.
343
344 Route ordering: this route MUST remain registered before ``/{dimension}``
345 so FastAPI matches the fixed ``/similarity`` segment before the catch-all
346 parameter captures it.
347 """
348 repo = await musehub_repository.get_repo(db, repo_id)
349 if repo is None:
350 raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")
351 if repo.visibility != "public" and claims is None:
352 raise HTTPException(
353 status_code=status.HTTP_401_UNAUTHORIZED,
354 detail="Authentication required to access private repos.",
355 headers={"WWW-Authenticate": "Bearer"},
356 )
357
358 result = musehub_analysis.compute_ref_similarity(
359 repo_id=repo_id,
360 base_ref=ref,
361 compare_ref=compare,
362 )
363
364 etag = _etag(repo_id, f"{ref}:{compare}", "similarity")
365 response.headers["ETag"] = etag
366 response.headers["Last-Modified"] = _LAST_MODIFIED
367 response.headers["Cache-Control"] = "private, max-age=60"
368 return result
369
370
371 @router.get(
372 "/repos/{repo_id}/analysis/{ref}/{dimension}",
373 response_model=AnalysisResponse,
374 operation_id="getAnalysisDimension",
375 summary="Single-dimension analysis for a Muse ref",
376 description=(
377 "Returns structured JSON for one of the 13 supported musical dimensions. "
378 "Supported dimensions: harmony, dynamics, motifs, form, groove, emotion, "
379 "chord-map, contour, key, tempo, meter, similarity, divergence. "
380 "Returns 404 for unknown dimension names."
381 ),
382 )
383 async def get_dimension_analysis(
384 repo_id: str,
385 ref: str,
386 dimension: str,
387 response: Response,
388 track: str | None = Query(None, description="Instrument track filter, e.g. 'bass', 'keys'"),
389 section: str | None = Query(None, description="Section filter, e.g. 'chorus', 'verse_1'"),
390 db: AsyncSession = Depends(get_db),
391 claims: TokenClaims | None = Depends(optional_token),
392 ) -> AnalysisResponse:
393 """Return analysis for one musical dimension of a Muse repo ref.
394
395 The ``dimension`` path parameter must be one of the 13 supported values.
396 Returns HTTP 404 for unknown dimension names so agents receive a clear
397 signal rather than a generic 422 validation error.
398
399 The ``data`` field in the response is the dimension-specific typed model
400 (e.g. :class:`~musehub.models.musehub_analysis.HarmonyData` for ``harmony``).
401 """
402 if dimension not in ALL_DIMENSIONS:
403 raise HTTPException(
404 status_code=status.HTTP_404_NOT_FOUND,
405 detail=f"Unknown dimension {dimension!r}. Supported: {', '.join(ALL_DIMENSIONS)}",
406 )
407
408 repo = await musehub_repository.get_repo(db, repo_id)
409 if repo is None:
410 raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")
411 if repo.visibility != "public" and claims is None:
412 raise HTTPException(
413 status_code=status.HTTP_401_UNAUTHORIZED,
414 detail="Authentication required to access private repos.",
415 headers={"WWW-Authenticate": "Bearer"},
416 )
417
418 result = musehub_analysis.compute_analysis_response(
419 repo_id=repo_id,
420 dimension=dimension,
421 ref=ref,
422 track=track,
423 section=section,
424 )
425
426 etag = _etag(repo_id, ref, dimension)
427 response.headers["ETag"] = etag
428 response.headers["Last-Modified"] = _LAST_MODIFIED
429 response.headers["Cache-Control"] = "private, max-age=60"
430 return result
431
432
433 @router.get(
434 "/repos/{repo_id}/analysis/{ref}/dynamics/page",
435 response_model=DynamicsPageData,
436 operation_id="getAnalysisDynamicsPage",
437 summary="Per-track dynamics page data for the Dynamics Analysis page",
438 description=(
439 "Returns enriched per-track dynamic analysis: velocity profiles, arc "
440 "classifications, peak velocity, velocity range, and cross-track loudness "
441 "data. Consumed by the Dynamics Analysis web page and by AI agents that "
442 "need per-track dynamic context for orchestration decisions. "
443 "Use ``?track=<name>`` to restrict to a single instrument track. "
444 "Use ``?section=<label>`` to restrict to a musical section."
445 ),
446 )
447 async def get_dynamics_page_data(
448 repo_id: str,
449 ref: str,
450 response: Response,
451 track: str | None = Query(None, description="Instrument track filter, e.g. 'bass', 'keys'"),
452 section: str | None = Query(None, description="Section filter, e.g. 'chorus', 'verse_1'"),
453 db: AsyncSession = Depends(get_db),
454 claims: TokenClaims | None = Depends(optional_token),
455 ) -> DynamicsPageData:
456 """Return per-track dynamics data for the Dynamics Analysis web page.
457
458 Unlike the single-dimension ``dynamics`` endpoint (which returns aggregate
459 metrics for the whole piece), this endpoint returns one
460 :class:`~musehub.models.musehub_analysis.TrackDynamicsProfile` per active
461 instrument track so the page can render individual velocity graphs and arc
462 badges.
463
464 Cache semantics match the other analysis endpoints: ETag is derived from
465 ``repo_id``, ``ref``, and the ``"dynamics-page"`` sentinel.
466 """
467 repo = await musehub_repository.get_repo(db, repo_id)
468 if repo is None:
469 raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")
470 if repo.visibility != "public" and claims is None:
471 raise HTTPException(
472 status_code=status.HTTP_401_UNAUTHORIZED,
473 detail="Authentication required to access private repos.",
474 headers={"WWW-Authenticate": "Bearer"},
475 )
476
477 result = musehub_analysis.compute_dynamics_page_data(
478 repo_id=repo_id,
479 ref=ref,
480 track=track,
481 section=section,
482 )
483
484 etag = _etag(repo_id, ref, "dynamics-page")
485 response.headers["ETag"] = etag
486 response.headers["Last-Modified"] = _LAST_MODIFIED
487 response.headers["Cache-Control"] = "private, max-age=60"
488 return result
489
490
491 # Dedicated harmony router — must be included BEFORE the main analysis router in
492 # __init__.py so this specific path takes priority over the generic /{dimension}
493 # catch-all route. See: musehub/api/routes/musehub/__init__.py.
494 harmony_router = APIRouter()
495
496
497 @harmony_router.get(
498 "/repos/{repo_id}/analysis/{ref}/harmony",
499 response_model=HarmonyAnalysisResponse,
500 operation_id="getAnalysisHarmony",
501 summary="Harmonic analysis — Roman numerals, cadences, and modulations for a ref",
502 description=(
503 "Returns a Roman-numeral-centric harmonic analysis of a Muse commit ref. "
504 "Maps to the ``muse harmony --ref {ref}`` CLI command.\n\n"
505 "Unlike the generic ``/analysis/{ref}/harmony`` dimension (which returns "
506 "raw chord symbols and a tension curve), this endpoint returns:\n\n"
507 "- **key** and **mode**: detected tonal centre and scale type\n"
508 "- **roman_numerals**: each chord event labelled with scale degree, root, "
509 "quality, and tonal function (tonic / subdominant / dominant)\n"
510 "- **cadences**: detected phrase-ending cadence types and their beat positions\n"
511 "- **modulations**: key-area changes with from/to key and pivot chord\n"
512 "- **harmonic_rhythm_bpm**: rate of chord changes in chords per minute\n\n"
513 "Agents use this to compose harmonically coherent continuations that respect "
514 "existing tonal narrative, cadence structure, and phrase boundaries. "
515 "Use ``?track=<instrument>`` or ``?section=<label>`` to narrow the scope."
516 ),
517 )
518 async def get_harmony_analysis(
519 repo_id: str,
520 ref: str,
521 response: Response,
522 track: str | None = Query(None, description="Instrument track filter, e.g. 'bass', 'keys'"),
523 section: str | None = Query(None, description="Section filter, e.g. 'chorus', 'verse_1'"),
524 db: AsyncSession = Depends(get_db),
525 claims: TokenClaims | None = Depends(optional_token),
526 ) -> HarmonyAnalysisResponse:
527 """Return dedicated harmonic analysis for a Muse repo ref.
528
529 Provides a Roman-numeral view of the harmonic content — scale degrees,
530 tonal functions, cadence positions, and detected modulations — structured
531 for agent consumption. Maps to ``muse harmony --ref {ref}``.
532
533 Access control mirrors the other analysis endpoints: public repos are
534 accessible without authentication; private repos require a valid JWT.
535 """
536 repo = await musehub_repository.get_repo(db, repo_id)
537 if repo is None:
538 raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")
539 if repo.visibility != "public" and claims is None:
540 raise HTTPException(
541 status_code=status.HTTP_401_UNAUTHORIZED,
542 detail="Authentication required to access private repos.",
543 headers={"WWW-Authenticate": "Bearer"},
544 )
545
546 result = musehub_analysis.compute_harmony_analysis(
547 repo_id=repo_id,
548 ref=ref,
549 track=track,
550 section=section,
551 )
552
553 etag = _etag(repo_id, ref, "harmony")
554 response.headers["ETag"] = etag
555 response.headers["Last-Modified"] = _LAST_MODIFIED
556 response.headers["Cache-Control"] = "private, max-age=60"
557 return result