"""Muse Hub Analysis Service — structured musical analysis for agent consumption.

This module is the single orchestration point for all 13 analysis dimensions.
Route handlers delegate here; no business logic lives in routes.

Why this exists
---------------
AI agents need structured, typed JSON data to make informed composition
decisions. HTML analysis pages are not machine-readable. This service
bridges the gap by returning fully-typed Pydantic models for every musical
dimension of a Muse commit.

Stub implementation
-------------------
Full MIDI content analysis will be wired in once Storpheus exposes a
per-dimension introspection route. Until then, the service returns
deterministic stub data keyed on the ``ref`` value — deterministic so that
agents receive consistent responses across retries and across sessions.

The stub data is musically realistic: values are drawn from realistic ranges
for jazz/soul/pop production and are internally consistent within each
dimension (e.g. the key reported by ``harmony`` matches the key reported by
``key``).

Boundary rules
--------------
- Pure data — no side effects, no external I/O beyond reading ``ref``.
- Must NOT import StateStore, EntityRegistry, or executor modules.
- Must NOT import LLM handlers or muse_* pipeline modules.
"""
from __future__ import annotations

import hashlib
import logging
from collections import defaultdict
from collections.abc import Callable
from datetime import datetime, timezone
from typing import Literal

from sqlalchemy.ext.asyncio import AsyncSession

from musehub.models.musehub import (
    ArrangementCellData,
    ArrangementColumnSummary,
    ArrangementMatrixResponse,
    ArrangementRowSummary,
)
from musehub.models.musehub_analysis import (
    ALL_DIMENSIONS,
    AggregateAnalysisResponse,
    AlternateKey,
    AnalysisFilters,
    AnalysisResponse,
    CadenceEvent,
    ChangedDimension,
    ChordEvent,
    ChordMapData,
    CommitEmotionSnapshot,
    CompareDimension,
    CompareResult,
    ContourData,
    ContextResult,
    DimensionData,
    DivergenceData,
    DivergenceDimension,
    DivergenceResult,
    DynamicArc,
    DynamicsData,
    DynamicsPageData,
    EmotionData,
    EmotionDelta8D,
    EmotionDiffResponse,
    EmotionDrift,
    EmotionMapPoint,
    EmotionMapResponse,
    EmotionVector,
    EmotionVector8D,
    FormData,
    FormStructureResponse,
    GrooveData,
    HarmonyAnalysisResponse,
    HarmonyData,
    HarmonyModulationEvent,
    IrregularSection,
    KeyData,
    MeterData,
    ModulationPoint,
    MotifEntry,
    MotifRecurrenceCell,
    MotifTransformation,
    MotifsData,
    RecallMatch,
    RecallResponse,
    RepetitionEntry,
    RomanNumeralEvent,
    SectionEntry,
    SectionMapEntry,
    SectionSimilarityHeatmap,
    RefSimilarityDimensions,
    RefSimilarityResponse,
    SimilarCommit,
    SimilarityData,
    TempoChange,
    TempoData,
    TrackDynamicsProfile,
    VelocityEvent,
)

logger = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# Stub data constants — musically realistic, deterministic by ref hash
# ---------------------------------------------------------------------------

_MODES = ["major", "minor", "dorian", "mixolydian", "lydian", "phrygian"]
_EMOTIONS = ["joyful", "melancholic", "tense", "serene", "energetic", "brooding"]
_FORMS = ["AABA", "verse-chorus", "through-composed", "rondo", "binary", "ternary"]
_GROOVES = ["straight", "swing", "shuffled", "half-time", "double-time"]
_TONICS = ["C", "F", "G", "D", "Bb", "Eb"]
_DYNAMIC_ARCS: list[DynamicArc] = [
    "flat", "terraced", "crescendo", "decrescendo", "swell", "hairpin",
]
_DEFAULT_TRACKS = ["bass", "keys", "drums", "melody", "pads"]


def _ref_hash(ref: str) -> int:
    """Derive a stable integer seed from a ref string for deterministic stubs."""
    return int(hashlib.md5(ref.encode()).hexdigest(), 16) # noqa: S324 — non-crypto use


def _pick(seed: int, items: list[str], offset: int = 0) -> str:
    return items[(seed + offset) % len(items)]


def _utc_now() -> datetime:
    return datetime.now(tz=timezone.utc)


# ---------------------------------------------------------------------------
# Per-dimension stub builders
# ---------------------------------------------------------------------------


def _build_harmony(ref: str, track: str | None, section: str | None) -> HarmonyData:
    """Build stub harmonic analysis. Deterministic for a given ref."""
    seed = _ref_hash(ref)
    tonic = _pick(seed, _TONICS)
    mode = _pick(seed, _MODES)
    total_beats = 32

    progression = [
        ChordEvent(beat=0.0, chord=f"{tonic}maj7", function="Imaj7", tension=0.1),
        ChordEvent(beat=4.0, chord="Am7", function="VIm7", tension=0.2),
        ChordEvent(beat=8.0, chord="Dm7", function="IIm7", tension=0.25),
        ChordEvent(beat=12.0, chord="G7", function="V7", tension=0.6),
        ChordEvent(beat=16.0, chord=f"{tonic}maj7", function="Imaj7", tension=0.1),
        ChordEvent(beat=20.0, chord="Em7b5", function="VIIm7b5", tension=0.7),
        ChordEvent(beat=24.0, chord="A7", function="V7/IIm", tension=0.65),
        ChordEvent(beat=28.0, chord="Dm7", function="IIm7", tension=0.25),
    ]

    tension_curve = [
        round(0.1 + 0.5 * abs((i - total_beats / 2) / (total_beats / 2)) * (seed % 3 + 1) / 3, 4)
        for i in range(total_beats)
    ]

    modulation_points = (
        [ModulationPoint(beat=16.0, from_key=f"{tonic} {mode}", to_key=f"G {mode}", confidence=0.72)]
        if seed % 3 == 0
        else []
    )

    return HarmonyData(
        tonic=tonic,
        mode=mode,
        key_confidence=round(0.7 + (seed % 30) / 100, 4),
        chord_progression=progression,
        tension_curve=tension_curve,
        modulation_points=modulation_points,
        total_beats=total_beats,
    )


def _build_dynamics(ref: str, track: str | None, section: str | None) -> DynamicsData:
    seed = _ref_hash(ref)
    base_vel = 64 + (seed % 32)
    peak = min(127, base_vel + 30)
    low = max(20, base_vel - 20)

    curve = [
        VelocityEvent(
            beat=float(i * 2),
            velocity=min(127, max(20, base_vel + (seed >> i) % 20 - 10)),
        )
        for i in range(16)
    ]

    events = ["crescendo@8", "sfz@16"] if seed % 2 == 0 else ["diminuendo@12", "fp@0"]

    return DynamicsData(
        peak_velocity=peak,
        mean_velocity=round(float(base_vel), 2),
        min_velocity=low,
        dynamic_range=peak - low,
        velocity_curve=curve,
        dynamic_events=events,
    )


_CONTOUR_LABELS = [
    "ascending-step",
    "descending-step",
    "arch",
    "valley",
    "oscillating",
    "static",
]
_TRANSFORMATION_TYPES = ["inversion", "retrograde", "retrograde-inversion", "transposition"]
_MOTIF_TRACKS = ["melody", "bass", "keys", "strings", "brass"]
_MOTIF_SECTIONS = ["intro", "verse_1", "chorus", "verse_2", "outro"]


def _invert_intervals(intervals: list[int]) -> list[int]:
    """Return the melodic inversion (negate all semitone intervals)."""
    return [-x for x in intervals]


def _retrograde_intervals(intervals: list[int]) -> list[int]:
    """Return the retrograde (reversed interval sequence)."""
    return list(reversed(intervals))


def _build_motifs(ref: str, track: str | None, section: str | None) -> MotifsData:
    """Build stub motif analysis with transformations, contour, and recurrence grid.

    Deterministic for a given ``ref`` value. Produces 2–4 motifs, each with:
    - Original interval sequence and occurrence beats
    - Melodic contour label (arch, valley, oscillating, etc.)
    - All tracks where the motif or its transformations appear
    - Up to 3 transformations (inversion, retrograde, transposition)
    - Flat track×section recurrence grid for heatmap rendering
    """
    seed = _ref_hash(ref)
    n_motifs = 2 + (seed % 3)
    all_tracks = _MOTIF_TRACKS[: 2 + (seed % 3)]
    sections = _MOTIF_SECTIONS

    motifs: list[MotifEntry] = []
    for i in range(n_motifs):
        intervals = [2, -1, 3, -2][: 2 + i]
        occurrences = [float(j * 8 + i * 2) for j in range(2 + (seed % 2))]
        contour_label = _pick(seed, _CONTOUR_LABELS, offset=i)
        primary_track = track or all_tracks[i % len(all_tracks)]

        # Cross-track sharing: motif appears in 1–3 tracks
        n_sharing_tracks = 1 + (seed + i) % min(3, len(all_tracks))
        sharing_tracks = [all_tracks[(i + k) % len(all_tracks)] for k in range(n_sharing_tracks)]
        if primary_track not in sharing_tracks:
            sharing_tracks = [primary_track] + sharing_tracks[: n_sharing_tracks - 1]

        # Build transformations
        transformations: list[MotifTransformation] = []
        inv_occurrences = [float(j * 8 + i * 2 + 4) for j in range(1 + (seed % 2))]
        transformations.append(
            MotifTransformation(
                transformation_type="inversion",
                intervals=_invert_intervals(intervals),
                transposition_semitones=0,
                occurrences=inv_occurrences,
                track=sharing_tracks[0],
            )
        )
        if len(intervals) >= 2:
            retro_occurrences = [float(j * 8 + i * 2 + 2) for j in range(1 + (seed % 2))]
            transformations.append(
                MotifTransformation(
                    transformation_type="retrograde",
                    intervals=_retrograde_intervals(intervals),
                    transposition_semitones=0,
                    occurrences=retro_occurrences,
                    track=sharing_tracks[-1],
                )
            )
        if (seed + i) % 2 == 0:
            transpose_by = 5 if (seed % 2 == 0) else 7
            transpo_occurrences = [float(j * 16 + i * 2) for j in range(1 + (seed % 2))]
            transformations.append(
                MotifTransformation(
                    transformation_type="transposition",
                    intervals=[x for x in intervals],
                    transposition_semitones=transpose_by,
                    occurrences=transpo_occurrences,
                    track=sharing_tracks[min(1, len(sharing_tracks) - 1)],
                )
            )

        # Build recurrence grid: track × section
        recurrence_grid: list[MotifRecurrenceCell] = []
        for t in all_tracks:
            for s in sections:
                # Original present in primary track, first two sections
                if t == primary_track and s in sections[:2]:
                    recurrence_grid.append(
                        MotifRecurrenceCell(
                            track=t,
                            section=s,
                            present=True,
                            occurrence_count=1 + (seed % 2),
                            transformation_types=["original"],
                        )
                    )
                # Inversion in sharing tracks at chorus
                elif t in sharing_tracks and s == "chorus":
                    recurrence_grid.append(
                        MotifRecurrenceCell(
                            track=t,
                            section=s,
                            present=True,
                            occurrence_count=1,
                            transformation_types=["inversion"],
                        )
                    )
                # Transposition in bridge / outro for certain motifs
                elif (seed + i) % 2 == 0 and t in sharing_tracks and s == "outro":
                    recurrence_grid.append(
                        MotifRecurrenceCell(
                            track=t,
                            section=s,
                            present=True,
                            occurrence_count=1,
                            transformation_types=["transposition"],
                        )
                    )
                else:
                    recurrence_grid.append(
                        MotifRecurrenceCell(
                            track=t,
                            section=s,
                            present=False,
                            occurrence_count=0,
                            transformation_types=[],
                        )
                    )

        motifs.append(
            MotifEntry(
                motif_id=f"M{i + 1:02d}",
                intervals=intervals,
                length_beats=float(2 + i),
                occurrence_count=len(occurrences),
                occurrences=occurrences,
                track=primary_track,
                contour_label=contour_label,
                tracks=sharing_tracks,
                transformations=transformations,
                recurrence_grid=recurrence_grid,
            )
        )

    return MotifsData(
        total_motifs=len(motifs),
        motifs=motifs,
        sections=sections,
        all_tracks=all_tracks,
    )


def _build_form(ref: str, track: str | None, section: str | None) -> FormData:
    seed = _ref_hash(ref)
    form_label = _pick(seed, _FORMS)
    sections = [
        SectionEntry(label="intro", function="exposition", start_beat=0.0, end_beat=8.0, length_beats=8.0),
        SectionEntry(label="verse_1", function="statement", start_beat=8.0, end_beat=24.0, length_beats=16.0),
        SectionEntry(label="chorus", function="climax", start_beat=24.0, end_beat=40.0, length_beats=16.0),
        SectionEntry(label="verse_2", function="restatement", start_beat=40.0, end_beat=56.0, length_beats=16.0),
        SectionEntry(label="outro", function="resolution", start_beat=56.0, end_beat=64.0, length_beats=8.0),
    ]
    return FormData(form_label=form_label, total_beats=64, sections=sections)


def _build_groove(ref: str, track: str | None, section: str | None) -> GrooveData:
    seed = _ref_hash(ref)
    style = _pick(seed, _GROOVES)
    swing = 0.5 if style == "straight" else round(0.55 + (seed % 20) / 100, 4)
    bpm = round(80.0 + (seed % 80), 1)
    return GrooveData(
        swing_factor=swing,
        grid_resolution="1/16" if style == "straight" else "1/8T",
        onset_deviation=round(0.01 + (seed % 10) / 200, 4),
        groove_score=round(0.6 + (seed % 40) / 100, 4),
        style=style,
        bpm=bpm,
    )


def _build_emotion(ref: str, track: str | None, section: str | None) -> EmotionData:
    seed = _ref_hash(ref)
    emotion = _pick(seed, _EMOTIONS)
    valence_map: dict[str, float] = {
        "joyful": 0.8, "melancholic": -0.5, "tense": -0.3,
        "serene": 0.4, "energetic": 0.6, "brooding": -0.7,
    }
    arousal_map: dict[str, float] = {
        "joyful": 0.7, "melancholic": 0.3, "tense": 0.8,
        "serene": 0.2, "energetic": 0.9, "brooding": 0.5,
    }
    return EmotionData(
        valence=valence_map[emotion],
        arousal=arousal_map[emotion],
        tension=round(0.1 + (seed % 60) / 100, 4),
        primary_emotion=emotion,
        confidence=round(0.65 + (seed % 35) / 100, 4),
    )


def _build_chord_map(ref: str, track: str | None, section: str | None) -> ChordMapData:
    harmony = _build_harmony(ref, track, section)
    return ChordMapData(
        progression=harmony.chord_progression,
        total_chords=len(harmony.chord_progression),
        total_beats=harmony.total_beats,
    )


def _build_contour(ref: str, track: str | None, section: str | None) -> ContourData:
    seed = _ref_hash(ref)
    shapes = ["arch", "ascending", "descending", "flat", "wave"]
    shape = _pick(seed, shapes)
    base_pitch = 60 + (seed % 12)
    pitch_curve = [
        round(base_pitch + 5 * (i / 16) * (1 if seed % 2 == 0 else -1) + (seed >> i) % 3, 1)
        for i in range(16)
    ]
    return ContourData(
        shape=shape,
        direction_changes=1 + (seed % 4),
        peak_beat=float(4 + (seed % 12)),
        valley_beat=float(seed % 8),
        overall_direction="up" if seed % 3 == 0 else ("down" if seed % 3 == 1 else "flat"),
        pitch_curve=pitch_curve,
    )


def _build_key(ref: str, track: str | None, section: str | None) -> KeyData:
    seed = _ref_hash(ref)
    tonic = _pick(seed, _TONICS)
    mode = _pick(seed, _MODES[:2])
    rel_choices = ["A", "D", "E", "B", "G", "C"]
    relative = f"{_pick(seed + 3, rel_choices)}m" if mode == "major" else f"{tonic}m"
    alternates = [
        AlternateKey(
            tonic=_pick(seed + 2, ["G", "D", "A", "E", "Bb"]),
            mode="dorian",
            confidence=round(0.3 + (seed % 20) / 100, 4),
        )
    ]
    return KeyData(
        tonic=tonic,
        mode=mode,
        confidence=round(0.75 + (seed % 25) / 100, 4),
        relative_key=relative,
        alternate_keys=alternates,
    )


def _build_tempo(ref: str, track: str | None, section: str | None) -> TempoData:
    seed = _ref_hash(ref)
    bpm = round(80.0 + (seed % 80), 1)
    stability = round(0.7 + (seed % 30) / 100, 4)
    feels = ["straight", "laid-back", "rushing"]
    feel = _pick(seed, feels)
    changes = (
        [TempoChange(beat=32.0, bpm=round(bpm * 1.05, 1))]
        if seed % 4 == 0
        else []
    )
    return TempoData(bpm=bpm, stability=stability, time_feel=feel, tempo_changes=changes)


def _build_meter(ref: str, track: str | None, section: str | None) -> MeterData:
    seed = _ref_hash(ref)
    sigs = ["4/4", "3/4", "6/8", "5/4", "7/8"]
    sig = _pick(seed, sigs[:2])
    is_compound = sig in ("6/8", "12/8")
    profile_44 = [1.0, 0.2, 0.6, 0.2]
    profile_34 = [1.0, 0.3, 0.5]
    profile = profile_44 if sig == "4/4" else profile_34
    irregular: list[IrregularSection] = (
        [IrregularSection(start_beat=24.0, end_beat=25.0, time_signature="5/4")]
        if seed % 5 == 0
        else []
    )
    return MeterData(
        time_signature=sig,
        irregular_sections=irregular,
        beat_strength_profile=profile,
        is_compound=is_compound,
    )


def _build_similarity(ref: str, track: str | None, section: str | None) -> SimilarityData:
    seed = _ref_hash(ref)
    n = 1 + (seed % 3)
    similar = [
        SimilarCommit(
            ref=f"commit_{hashlib.md5(f'{ref}{i}'.encode()).hexdigest()[:8]}", # noqa: S324
            score=round(0.5 + (seed >> i) % 50 / 100, 4),
            shared_motifs=[f"M{j + 1:02d}" for j in range(1 + i % 2)],
            commit_message=f"Add {'bridge' if i == 0 else 'variation'} section",
        )
        for i in range(n)
    ]
    return SimilarityData(similar_commits=similar, embedding_dimensions=128)


def _build_divergence(ref: str, track: str | None, section: str | None) -> DivergenceData:
    seed = _ref_hash(ref)
    score = round((seed % 60) / 100, 4)
    changed = [
        ChangedDimension(
            dimension="harmony",
            change_magnitude=round(0.2 + (seed % 40) / 100, 4),
            description="Key shifted from C major to F major",
        ),
        ChangedDimension(
            dimension="tempo",
            change_magnitude=round(0.1 + (seed % 20) / 100, 4),
            description="BPM increased by ~8%",
        ),
    ]
    return DivergenceData(
        divergence_score=score,
        base_ref=f"parent:{ref[:8]}",
        changed_dimensions=changed,
    )


# ---------------------------------------------------------------------------
# Dimension dispatch table
# ---------------------------------------------------------------------------

# Each builder has signature (ref: str, track: str | None, section: str | None) -> DimensionData
_DimBuilder = Callable[[str, str | None, str | None], DimensionData]

_BUILDERS: dict[str, _DimBuilder] = {
    "harmony": _build_harmony,
    "dynamics": _build_dynamics,
    "motifs": _build_motifs,
    "form": _build_form,
    "groove": _build_groove,
    "emotion": _build_emotion,
    "chord-map": _build_chord_map,
    "contour": _build_contour,
    "key": _build_key,
    "tempo": _build_tempo,
    "meter": _build_meter,
    "similarity": _build_similarity,
    "divergence": _build_divergence,
}


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------


def compute_dimension(
    dimension: str,
    ref: str,
    track: str | None = None,
    section: str | None = None,
) -> DimensionData:
    """Compute analysis data for a single musical dimension.

    Dispatches to the appropriate stub builder based on ``dimension``.
    Returns a fully-typed Pydantic model for the given dimension.

    Args:
        dimension: One of the 13 supported dimension names.
        ref: Muse commit ref (branch name, commit ID, or tag).
        track: Optional instrument track filter.
        section: Optional musical section filter.

    Returns:
        Dimension-specific Pydantic data model.

    Raises:
        ValueError: If ``dimension`` is not a supported analysis dimension.
    """
    builder = _BUILDERS.get(dimension)
    if builder is None:
        raise ValueError(f"Unknown analysis dimension: {dimension!r}")
    return builder(ref, track, section)


def compute_analysis_response(
    *,
    repo_id: str,
    dimension: str,
    ref: str,
    track: str | None = None,
    section: str | None = None,
) -> AnalysisResponse:
    """Build a complete :class:`AnalysisResponse` envelope for one dimension.

    This is the primary entry point for the single-dimension endpoint.

    Args:
        repo_id: Muse Hub repo UUID.
        dimension: Analysis dimension name.
        ref: Muse commit ref.
        track: Optional track filter.
        section: Optional section filter.

    Returns:
        :class:`AnalysisResponse` with typed ``data`` and filter metadata.
    """
    data = compute_dimension(dimension, ref, track, section)
    response = AnalysisResponse(
        dimension=dimension,
        ref=ref,
        computed_at=_utc_now(),
        data=data,
        filters_applied=AnalysisFilters(track=track, section=section),
    )
    logger.info("✅ analysis/%s repo=%s ref=%s", dimension, repo_id[:8], ref)
    return response


def compute_form_structure(*, repo_id: str, ref: str) -> FormStructureResponse:
    """Build a combined form and structure response for the UI visualisation.

    Derives three complementary structural views from the ``form`` and ``meter``
    stubs so the form-structure page can render a section map, a repetition
    panel, and a section-comparison heatmap in a single API call.

    Stub implementation: values are deterministic for a given ``ref`` and
    musically consistent with the results of the ``form`` and ``meter``
    analysis dimensions.

    Args:
        repo_id: Muse Hub repo UUID (used for logging only).
        ref: Muse commit ref.

    Returns:
        :class:`FormStructureResponse` with section_map, repetition_structure,
        and section_comparison fields populated.
    """
    seed = _ref_hash(ref)
    form_data = _build_form(ref, None, None)
    meter_data = _build_meter(ref, None, None)

    time_sig = meter_data.time_signature
    beats_per_bar = 3 if time_sig == "3/4" else (6 if time_sig == "6/8" else 4)

    # Colour palette for section types — stable mapping
    _SECTION_COLORS: dict[str, str] = {
        "intro": "#1f6feb",
        "verse": "#3fb950",
        "chorus": "#f0883e",
        "bridge": "#bc8cff",
        "outro": "#8b949e",
        "pre-chorus": "#ff7b72",
        "breakdown": "#56d364",
    }

    def _section_color(label: str) -> str:
        for key, color in _SECTION_COLORS.items():
            if key in label.lower():
                return color
        return "#58a6ff"

    # Build section map (convert beats → bars, 1-indexed)
    section_map: list[SectionMapEntry] = []
    for sec in form_data.sections:
        start_bar = max(1, int(sec.start_beat / beats_per_bar) + 1)
        end_bar = max(start_bar, int((sec.end_beat - 1) / beats_per_bar) + 1)
        bar_count = end_bar - start_bar + 1
        section_map.append(
            SectionMapEntry(
                label=sec.label,
                function=sec.function,
                start_bar=start_bar,
                end_bar=end_bar,
                bar_count=bar_count,
                color_hint=_section_color(sec.label),
            )
        )

    total_bars = max(1, int(form_data.total_beats / beats_per_bar))

    # Build repetition structure — group sections by base label (strip _N suffix)
    def _base_label(label: str) -> str:
        """Strip numeric suffix from section label, e.g. 'verse_1' → 'verse'."""
        parts = label.rsplit("_", 1)
        if len(parts) == 2 and parts[1].isdigit():
            return parts[0]
        return label

    groups: dict[str, list[int]] = defaultdict(list)
    for entry in section_map:
        groups[_base_label(entry.label)].append(entry.start_bar)

    repetition_structure = [
        RepetitionEntry(
            pattern_label=pattern,
            occurrences=bars,
            occurrence_count=len(bars),
            similarity_score=round(0.85 + (seed % 15) / 100, 4) if len(bars) > 1 else 1.0,
        )
        for pattern, bars in groups.items()
        if len(bars) >= 1
    ]

    # Build section-comparison heatmap — symmetric cosine-similarity stub
    labels = [s.label for s in section_map]
    n = len(labels)
    matrix: list[list[float]] = []
    for i in range(n):
        row: list[float] = []
        for j in range(n):
            if i == j:
                row.append(1.0)
            else:
                # Sections with the same base label score higher
                same_base = _base_label(labels[i]) == _base_label(labels[j])
                base_sim = 0.75 if same_base else round(0.1 + (seed >> (i + j)) % 50 / 100, 4)
                row.append(min(1.0, base_sim))
        matrix.append(row)

    section_comparison = SectionSimilarityHeatmap(labels=labels, matrix=matrix)

    logger.info("✅ form-structure repo=%s ref=%s sections=%d", repo_id[:8], ref, len(section_map))
    return FormStructureResponse(
        repo_id=repo_id,
        ref=ref,
        form_label=form_data.form_label,
        time_signature=time_sig,
        beats_per_bar=beats_per_bar,
        total_bars=total_bars,
        section_map=section_map,
        repetition_structure=repetition_structure,
        section_comparison=section_comparison,
    )


def _build_track_dynamics_profile(
    ref: str,
    track: str,
    track_index: int,
) -> TrackDynamicsProfile:
    """Build a deterministic per-track dynamic profile for the dynamics page.

    Seed is derived from ``ref`` XOR ``track_index`` so each track gets a
    distinct but reproducible curve for the same ref.
    """
    seed = _ref_hash(ref) ^ (track_index * 0x9E3779B9)
    base_vel = 50 + (seed % 50)
    peak = min(127, base_vel + 20 + (seed % 30))
    low = max(10, base_vel - 20 - (seed % 20))
    mean = round(float((peak + low) / 2), 2)

    curve = [
        VelocityEvent(
            beat=float(i * 2),
            velocity=min(127, max(10, base_vel + (seed >> (i % 16)) % 25 - 12)),
        )
        for i in range(16)
    ]

    arc: DynamicArc = _DYNAMIC_ARCS[(seed + track_index) % len(_DYNAMIC_ARCS)]

    return TrackDynamicsProfile(
        track=track,
        peak_velocity=peak,
        min_velocity=low,
        mean_velocity=mean,
        velocity_range=peak - low,
        arc=arc,
        velocity_curve=curve,
    )


def compute_dynamics_page_data(
    *,
    repo_id: str,
    ref: str,
    track: str | None = None,
    section: str | None = None,
) -> DynamicsPageData:
    """Build per-track dynamics data for the Dynamics Analysis page.

    Returns one :class:`TrackDynamicsProfile` per active track, or a single
    entry when ``track`` filter is applied. Each profile includes a velocity
    curve suitable for rendering a profile graph, an arc classification badge,
    and peak/range metrics for the loudness comparison bar chart.

    Args:
        repo_id: Muse Hub repo UUID.
        ref: Muse commit ref (branch name, commit ID, or tag).
        track: Optional track filter — if set, only that track is returned.
        section: Optional section filter (recorded in ``filters_applied``).

    Returns:
        :class:`DynamicsPageData` with per-track profiles.
    """
    tracks_to_include = [track] if track else _DEFAULT_TRACKS
    profiles = [
        _build_track_dynamics_profile(ref, t, i)
        for i, t in enumerate(tracks_to_include)
    ]
    now = _utc_now()
    logger.info(
        "✅ dynamics/page repo=%s ref=%s tracks=%d",
        repo_id[:8], ref, len(profiles),
    )
    return DynamicsPageData(
        ref=ref,
        repo_id=repo_id,
        computed_at=now,
        tracks=profiles,
        filters_applied=AnalysisFilters(track=track, section=section),
    )


def compute_emotion_map(
    *,
    repo_id: str,
    ref: str,
    track: str | None = None,
    section: str | None = None,
) -> EmotionMapResponse:
    """Build a complete :class:`EmotionMapResponse` for an emotion map page.

    Returns per-beat intra-ref evolution, cross-commit trajectory, drift
    distances between consecutive commits, a generated narrative, and source
    attribution. All data is deterministic for a given ``ref`` so agents
    receive consistent results across retries.

    Why separate from ``compute_dimension('emotion', ...)``
    -------------------------------------------------------
    The generic emotion dimension returns a single aggregate snapshot. This
    function returns the *temporal* and *cross-commit* shape of the emotional
    arc — the information needed to render line charts and trajectory plots.

    Args:
        repo_id: Muse Hub repo UUID (used for logging).
        ref: Head Muse commit ref (branch name or commit ID).
        track: Optional instrument track filter.
        section: Optional musical section filter.

    Returns:
        :class:`EmotionMapResponse` with evolution, trajectory, drift, and narrative.
    """
    seed = _ref_hash(ref)
    now = _utc_now()

    # ── Per-beat evolution within this ref ─────────────────────────────────
    total_beats = 32
    evolution: list[EmotionMapPoint] = []
    for i in range(total_beats):
        phase = i / total_beats
        # Each axis follows a gentle sinusoidal arc seeded by ref hash
        energy = round(0.4 + 0.4 * abs((phase - 0.5) * 2) * (1 + (seed >> (i % 16)) % 3) / 4, 4)
        valence = round(max(0.0, min(1.0, 0.5 + 0.3 * ((seed + i) % 7 - 3) / 3)), 4)
        tension_val = round(min(1.0, 0.2 + 0.6 * phase * (1 + (seed % 3) / 3)), 4)
        darkness = round(max(0.0, min(1.0, 1.0 - valence * 0.6 - energy * 0.2)), 4)
        evolution.append(
            EmotionMapPoint(
                beat=float(i),
                vector=EmotionVector(
                    energy=energy,
                    valence=valence,
                    tension=tension_val,
                    darkness=darkness,
                ),
            )
        )

    # Summary vector = mean across all evolution points
    n = len(evolution)
    summary_vector = EmotionVector(
        energy=round(sum(p.vector.energy for p in evolution) / n, 4),
        valence=round(sum(p.vector.valence for p in evolution) / n, 4),
        tension=round(sum(p.vector.tension for p in evolution) / n, 4),
        darkness=round(sum(p.vector.darkness for p in evolution) / n, 4),
    )

    # ── Cross-commit trajectory (5 synthetic ancestor snapshots + head) ───
    _COMMIT_EMOTIONS = ["serene", "tense", "brooding", "joyful", "melancholic", "energetic"]
    trajectory: list[CommitEmotionSnapshot] = []
    n_commits = 5
    for j in range(n_commits):
        commit_seed = _ref_hash(f"{ref}:{j}")
        em = _COMMIT_EMOTIONS[(seed + j) % len(_COMMIT_EMOTIONS)]
        valence_traj = round(max(0.0, min(1.0, 0.3 + (commit_seed % 70) / 100)), 4)
        energy_traj = round(max(0.0, min(1.0, 0.2 + (commit_seed % 80) / 100)), 4)
        tension_traj = round(max(0.0, min(1.0, 0.1 + (commit_seed % 90) / 100)), 4)
        darkness_traj = round(max(0.0, min(1.0, 1.0 - valence_traj * 0.7)), 4)
        trajectory.append(
            CommitEmotionSnapshot(
                commit_id=hashlib.md5(f"{ref}:{j}".encode()).hexdigest()[:16], # noqa: S324
                message=f"Ancestor commit {n_commits - j}: {em} passage",
                timestamp=f"2026-0{1 + j % 9}-{10 + j:02d}T12:00:00Z",
                vector=EmotionVector(
                    energy=energy_traj,
                    valence=valence_traj,
                    tension=tension_traj,
                    darkness=darkness_traj,
                ),
                primary_emotion=em,
            )
        )
    # Append head commit snapshot
    trajectory.append(
        CommitEmotionSnapshot(
            commit_id=ref[:16] if len(ref) >= 16 else ref,
            message="HEAD — current composition state",
            timestamp=now.strftime("%Y-%m-%dT%H:%M:%SZ"),
            vector=summary_vector,
            primary_emotion=_COMMIT_EMOTIONS[seed % len(_COMMIT_EMOTIONS)],
        )
    )

    # ── Drift between consecutive commits ──────────────────────────────────
    _AXES = ["energy", "valence", "tension", "darkness"]
    drift: list[EmotionDrift] = []
    for k in range(len(trajectory) - 1):
        a = trajectory[k].vector
        b = trajectory[k + 1].vector
        diff = [
            abs(b.energy - a.energy),
            abs(b.valence - a.valence),
            abs(b.tension - a.tension),
            abs(b.darkness - a.darkness),
        ]
        euclidean = round((sum(d**2 for d in diff) ** 0.5), 4)
        dominant_change = _AXES[diff.index(max(diff))]
        drift.append(
            EmotionDrift(
                from_commit=trajectory[k].commit_id,
                to_commit=trajectory[k + 1].commit_id,
                drift=euclidean,
                dominant_change=dominant_change,
            )
        )

    # ── Narrative generation ───────────────────────────────────────────────
    head_em = trajectory[-1].primary_emotion
    first_em = trajectory[0].primary_emotion
    max_drift_entry = max(drift, key=lambda d: d.drift) if drift else None
    narrative_parts = [
        f"This composition begins with a {first_em} character",
        f"and arrives at a {head_em} state at the head commit.",
    ]
    if max_drift_entry is not None:
        narrative_parts.append(
            f"The largest emotional shift occurs between commits "
            f"{max_drift_entry.from_commit[:8]} and {max_drift_entry.to_commit[:8]}, "
            f"with a {max_drift_entry.dominant_change} shift of {max_drift_entry.drift:.2f}."
        )
    narrative = " ".join(narrative_parts)

    # ── Source attribution ─────────────────────────────────────────────────
    source: Literal["explicit", "inferred", "mixed"] = "inferred" # Full implementation will check commit metadata for explicit tags

    logger.info("✅ emotion-map repo=%s ref=%s beats=%d commits=%d", repo_id[:8], ref, n, len(trajectory))
    return EmotionMapResponse(
        repo_id=repo_id,
        ref=ref,
        computed_at=now,
        filters_applied=AnalysisFilters(track=track, section=section),
        evolution=evolution,
        summary_vector=summary_vector,
        trajectory=trajectory,
        drift=drift,
        narrative=narrative,
        source=source,
    )


def compute_aggregate_analysis(
    *,
    repo_id: str,
    ref: str,
    track: str | None = None,
    section: str | None = None,
) -> AggregateAnalysisResponse:
    """Build a complete :class:`AggregateAnalysisResponse` for all 13 dimensions.

    This is the primary entry point for the aggregate endpoint. All 13
    dimensions are computed in a single call so agents can retrieve the full
    musical picture without issuing 13 sequential requests.

    Args:
        repo_id: Muse Hub repo UUID.
        ref: Muse commit ref.
        track: Optional track filter (applied to all dimensions).
        section: Optional section filter (applied to all dimensions).

    Returns:
        :class:`AggregateAnalysisResponse` with one entry per dimension.
    """
    now = _utc_now()
    dimensions = [
        AnalysisResponse(
            dimension=dim,
            ref=ref,
            computed_at=now,
            data=compute_dimension(dim, ref, track, section),
            filters_applied=AnalysisFilters(track=track, section=section),
        )
        for dim in ALL_DIMENSIONS
    ]
    logger.info("✅ analysis/aggregate repo=%s ref=%s dims=%d", repo_id[:8], ref, len(dimensions))
    return AggregateAnalysisResponse(
        ref=ref,
        repo_id=repo_id,
        computed_at=now,
        dimensions=dimensions,
        filters_applied=AnalysisFilters(track=track, section=section),
    )


# ---------------------------------------------------------------------------
# Dedicated harmony endpoint — muse harmony command
# ---------------------------------------------------------------------------

_ROMAN_NUMERALS_BY_MODE: dict[str, list[tuple[str, str, str, str]]] = {
    # (roman, quality, function, root-offset-label)
    # root-offset-label is relative; actual root derived from tonic + offset
    "major": [
        ("I", "major", "tonic", "P1"),
        ("IIm7", "minor", "pre-dominant", "M2"),
        ("IIIm", "minor", "tonic", "M3"),
        ("IV", "major", "subdominant", "P4"),
        ("V7", "dominant","dominant", "P5"),
        ("VIm", "minor", "tonic", "M6"),
        ("VIIø", "half-diminished", "dominant", "M7"),
    ],
    "minor": [
        ("Im", "minor", "tonic", "P1"),
        ("IIø", "half-diminished", "pre-dominant", "M2"),
        ("bIII", "major", "tonic", "m3"),
        ("IVm", "minor", "subdominant", "P4"),
        ("V7", "dominant","dominant", "P5"),
        ("bVI", "major", "subdominant", "m6"),
        ("bVII", "major", "subdominant", "m7"),
    ],
}

# Semitone offsets for scale degrees so we can compute the actual root pitch.
_SEMITONE_OFFSETS: dict[str, int] = {
    "P1": 0, "M2": 2, "M3": 4, "P4": 5, "P5": 7,
    "M6": 9, "M7": 11, "m3": 3, "m6": 8, "m7": 10,
}

_CHROMATIC_SCALE = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]

_CADENCE_TYPES = ["authentic", "half", "plagal", "deceptive", "perfect-authentic"]

_HARMONY_MODES = ["major", "minor"]


def _transpose_root(tonic: str, semitones: int) -> str:
    """Return the pitch class that is ``semitones`` above ``tonic``."""
    try:
        base_idx = _CHROMATIC_SCALE.index(tonic)
    except ValueError:
        # Fallback for flat spellings like Bb, Eb — map to sharp equivalent.
        _FLAT_TO_SHARP = {"Bb": "A#", "Eb": "D#", "Ab": "G#", "Db": "C#", "Gb": "F#"}
        base_idx = _CHROMATIC_SCALE.index(_FLAT_TO_SHARP.get(tonic, "C"))
    return _CHROMATIC_SCALE[(base_idx + semitones) % 12]


def compute_harmony_analysis(
    *,
    repo_id: str,
    ref: str,
    track: str | None = None,
    section: str | None = None,
) -> HarmonyAnalysisResponse:
    """Build a dedicated harmonic analysis for a Muse commit ref.

    Returns a Roman-numeral-centric view of the harmonic content. Unlike the
    generic ``harmony`` dimension (which returns :class:`HarmonyData` with raw
    chord symbols and a tension curve), this response is structured for tonal
    reasoning: Roman numerals with function labels, cadence positions, and
    detected modulations.

    Maps to the ``muse harmony --ref {ref}`` CLI command.

    The stub data is deterministic for a given ``ref`` so agents receive
    consistent responses across retries. Harmonic content is keyed on
    tonic/mode derived from the ref hash — the same tonic and mode that the
    generic harmony dimension uses, ensuring cross-endpoint consistency.

    Args:
        repo_id: Muse Hub repo UUID (used only for logging).
        ref: Muse commit ref (seeds the deterministic data).
        track: Optional track filter (recorded in response; stub ignores it).
        section: Optional section filter (recorded in response; stub ignores it).

    Returns:
        :class:`HarmonyAnalysisResponse` ready for the harmony endpoint.
    """
    seed = _ref_hash(ref)
    tonic = _pick(seed, _TONICS)
    mode = _pick(seed, _HARMONY_MODES, offset=1)
    key_label = f"{tonic} {mode}"

    # Build Roman numeral events — use the first 5 chords from the mode table.
    rn_table = _ROMAN_NUMERALS_BY_MODE.get(mode, _ROMAN_NUMERALS_BY_MODE["major"])
    chord_count = 4 + (seed % 3) # 4–6 chord events
    roman_numerals: list[RomanNumeralEvent] = []
    beat = 0.0
    for i in range(min(chord_count, len(rn_table))):
        roman, quality, function, offset_label = rn_table[i]
        semitones = _SEMITONE_OFFSETS.get(offset_label, 0)
        root = _transpose_root(tonic, semitones)
        roman_numerals.append(
            RomanNumeralEvent(
                beat=beat,
                chord=roman,
                root=root,
                quality=quality,
                function=function,
            )
        )
        beat += 4.0

    # Build cadences — one or two, positioned at phrase boundaries.
    cadence_type = _pick(seed, _CADENCE_TYPES, offset=2)
    cadence_beat = float((seed % 4) * 4 + 4)
    cadences: list[CadenceEvent] = [
        CadenceEvent.model_validate({"from": "V", "to": "I", "beat": cadence_beat, "type": cadence_type}),
    ]
    if seed % 3 == 0:
        cadences.append(
            CadenceEvent.model_validate({"from": "IV", "to": "I", "beat": cadence_beat + 16.0, "type": "plagal"}),
        )

    # Build modulations — 0 or 1, depending on ref seed.
    modulations: list[HarmonyModulationEvent] = []
    if seed % 4 == 0:
        dom_root = _transpose_root(tonic, 7) # dominant (P5)
        modulations.append(
            HarmonyModulationEvent(
                beat=32.0,
                from_key=key_label,
                to_key=f"{dom_root} {mode}",
                pivot_chord=dom_root,
            )
        )

    # Harmonic rhythm: chord changes per minute. Assumes ~120 BPM tempo.
    # With chords every 4 beats at 120 BPM → 30 chord changes per minute.
    # Varies slightly per ref to feel alive.
    base_rhythm = 2.0
    harmonic_rhythm_bpm = round(base_rhythm + (seed % 5) * 0.25, 2)

    logger.info("✅ harmony/analysis repo=%s ref=%s key=%s", repo_id[:8], ref[:8], key_label)
    return HarmonyAnalysisResponse(
        key=key_label,
        mode=mode,
        roman_numerals=roman_numerals,
        cadences=cadences,
        modulations=modulations,
        harmonic_rhythm_bpm=harmonic_rhythm_bpm,
    )


# ---------------------------------------------------------------------------
# Arrangement matrix
# ---------------------------------------------------------------------------

_ARRANGEMENT_INSTRUMENTS: list[str] = ["bass", "keys", "guitar", "drums", "lead", "pads"]
_ARRANGEMENT_SECTIONS: list[str] = ["intro", "verse_1", "chorus", "bridge", "outro"]

# Beat positions for each section (start, end). Realistic 4/4 structure with
# 8-bar sections at 120 BPM (32 beats per section).
_SECTION_BEATS: dict[str, tuple[float, float]] = {
    "intro": (0.0, 32.0),
    "verse_1": (32.0, 64.0),
    "chorus": (64.0, 96.0),
    "bridge": (96.0, 112.0),
    "outro": (112.0, 128.0),
}

# Probability that an instrument is active in a given section (realistic
# arrangement logic — drums always play, bass almost always, pads lighter).
_ACTIVE_PROBABILITY: dict[str, dict[str, float]] = {
    "bass": {"intro": 0.7, "verse_1": 1.0, "chorus": 1.0, "bridge": 0.8, "outro": 0.6},
    "keys": {"intro": 0.5, "verse_1": 0.8, "chorus": 1.0, "bridge": 0.7, "outro": 0.5},
    "guitar": {"intro": 0.3, "verse_1": 0.7, "chorus": 0.9, "bridge": 0.6, "outro": 0.3},
    "drums": {"intro": 0.5, "verse_1": 1.0, "chorus": 1.0, "bridge": 0.8, "outro": 0.4},
    "lead": {"intro": 0.2, "verse_1": 0.5, "chorus": 0.8, "bridge": 0.9, "outro": 0.3},
    "pads": {"intro": 0.8, "verse_1": 0.6, "chorus": 0.7, "bridge": 1.0, "outro": 0.9},
}


def compute_arrangement_matrix(*, repo_id: str, ref: str) -> ArrangementMatrixResponse:
    """Build a deterministic :class:`ArrangementMatrixResponse` for a Muse commit ref.

    Returns instrument × section density data so the arrangement matrix page can
    render a colour-coded grid without downloading any audio or MIDI files.

    The stub data is deterministically seeded by ``ref`` so that agents receive
    consistent responses across retries. Note counts and density values are
    drawn from realistic ranges for a 6-instrument soul/pop arrangement.

    Args:
        repo_id: Muse Hub repo UUID (used only for logging).
        ref: Muse commit ref (seeds the deterministic RNG).

    Returns:
        :class:`ArrangementMatrixResponse` ready for the arrange endpoint.
    """
    seed_int = int(hashlib.md5(ref.encode()).hexdigest()[:8], 16) # noqa: S324 — non-crypto

    instruments = _ARRANGEMENT_INSTRUMENTS
    sections = _ARRANGEMENT_SECTIONS

    cells: list[ArrangementCellData] = []
    raw_counts: dict[tuple[str, str], int] = {}

    # Generate note counts deterministically per (instrument, section).
    for i_idx, instrument in enumerate(instruments):
        for s_idx, section in enumerate(sections):
            prob = _ACTIVE_PROBABILITY.get(instrument, {}).get(section, 0.5)
            # Mix ref seed with cell position for per-cell variation.
            cell_seed = (seed_int + i_idx * 31 + s_idx * 97) % (2**32)
            # Deterministic "random" value in [0, 1) via cheap LCG step.
            lcg = (cell_seed * 1664525 + 1013904223) % (2**32)
            roll = lcg / (2**32)
            active = roll < prob
            if active:
                # Note count: 8–64, skewed toward busier instruments.
                note_count = 8 + int(roll * 56)
            else:
                note_count = 0
            raw_counts[(instrument, section)] = note_count

    # Normalise counts to [0, 1] density (max across the whole matrix).
    max_count = max(raw_counts.values()) or 1

    for i_idx, instrument in enumerate(instruments):
        for s_idx, section in enumerate(sections):
            note_count = raw_counts[(instrument, section)]
            beat_start, beat_end = _SECTION_BEATS[section]
            active = note_count > 0
            # Pitch range: realistic MIDI range per instrument.
            pitch_base = {"bass": 28, "keys": 48, "guitar": 40, "drums": 36, "lead": 60, "pads": 52}.get(
                instrument, 48
            )
            pitch_low = pitch_base if active else 0
            pitch_high = pitch_base + 24 if active else 0
            cells.append(
                ArrangementCellData(
                    instrument=instrument,
                    section=section,
                    note_count=note_count,
                    note_density=round(note_count / max_count, 4),
                    beat_start=beat_start,
                    beat_end=beat_end,
                    pitch_low=pitch_low,
                    pitch_high=pitch_high,
                    active=active,
                )
            )

    # Row summaries (per instrument).
    row_summaries: list[ArrangementRowSummary] = []
    for instrument in instruments:
        inst_cells = [c for c in cells if c.instrument == instrument]
        total = sum(c.note_count for c in inst_cells)
        active_secs = sum(1 for c in inst_cells if c.active)
        mean_d = round(sum(c.note_density for c in inst_cells) / len(inst_cells), 4) if inst_cells else 0.0
        row_summaries.append(
            ArrangementRowSummary(
                instrument=instrument,
                total_notes=total,
                active_sections=active_secs,
                mean_density=mean_d,
            )
        )

    # Column summaries (per section).
    column_summaries: list[ArrangementColumnSummary] = []
    for section in sections:
        sec_cells = [c for c in cells if c.section == section]
        total = sum(c.note_count for c in sec_cells)
        active_inst = sum(1 for c in sec_cells if c.active)
        beat_start, beat_end = _SECTION_BEATS[section]
        column_summaries.append(
            ArrangementColumnSummary(
                section=section,
                total_notes=total,
                active_instruments=active_inst,
                beat_start=beat_start,
                beat_end=beat_end,
            )
        )

    total_beats = max(end for _, end in _SECTION_BEATS.values())
    logger.info("✅ arrangement/matrix repo=%s ref=%s cells=%d", repo_id[:8], ref[:8], len(cells))
    return ArrangementMatrixResponse(
        repo_id=repo_id,
        ref=ref,
        instruments=instruments,
        sections=sections,
        cells=cells,
        row_summaries=row_summaries,
        column_summaries=column_summaries,
        total_beats=total_beats,
    )


# ---------------------------------------------------------------------------
# Semantic recall
# ---------------------------------------------------------------------------

_RECALL_DIMENSIONS: list[str] = ["harmony", "groove", "emotion", "motifs", "contour", "tempo"]

_RECALL_MESSAGES: list[str] = [
    "Add jazzy chord progression with swing feel",
    "Introduce minor-key bridge with tension build",
    "Refine melodic contour — ascending arch in chorus",
    "Adjust groove: add half-time feel in verse",
    "Add layered pad texture for emotional depth",
    "Modulate to dominant for climactic section",
    "Tighten rhythmic grid — straight feel throughout",
    "Add secondary dominant walkdown before chorus",
]

_RECALL_BRANCHES: list[str] = ["main", "feature/bridge", "feature/chorus", "experiment/jazz", "develop"]


def compute_recall(
    *,
    repo_id: str,
    ref: str,
    query: str,
    limit: int = 10,
) -> RecallResponse:
    """Query the musical feature vector space for commits semantically matching ``query``.

    Why this exists
    ---------------
    Agents and producers need to surface past commits that are musically relevant
    to a natural-language description (e.g. ``"a jazzy chord progression with swing
    groove"``). This endpoint bridges semantic intent and the vector index so that
    retrieval is based on musical meaning rather than exact keyword matching.

    Implementation note
    -------------------
    Returns deterministic stub results keyed on the XOR of the ref and query
    hashes so agents receive consistent responses across retries.

    Args:
        repo_id: Muse Hub repo UUID (used for scoping and logging).
        ref: Muse commit ref to scope the search to (only reachable commits).
        query: Natural-language search string, e.g. ``"swing groove with jazz harmony"``.
        limit: Maximum number of matches to return (default 10, max 50).

    Returns:
        :class:`RecallResponse` with a ranked list of :class:`RecallMatch` entries,
        sorted descending by cosine similarity score.
    """
    limit = max(1, min(limit, 50))
    q_seed = _ref_hash(query)
    r_seed = _ref_hash(ref)
    combined_seed = q_seed ^ r_seed

    # Deterministic total count — varies by query so results feel realistic.
    total_matches = 4 + (combined_seed % 12)
    n_to_return = min(limit, total_matches)

    matches: list[RecallMatch] = []
    for i in range(n_to_return):
        item_seed = combined_seed ^ (i * 0x9E3779B9)
        # Score: highest for i=0, decaying with rank (deterministic)
        base_score = 0.92 - i * 0.06
        noise = ((item_seed >> (i % 16)) % 8) / 100.0
        score = round(max(0.0, min(1.0, base_score - noise)), 4)

        commit_hash = hashlib.md5(f"{ref}:{query}:{i}".encode()).hexdigest()[:16] # noqa: S324
        message = _RECALL_MESSAGES[(combined_seed + i) % len(_RECALL_MESSAGES)]
        branch = _RECALL_BRANCHES[(combined_seed + i) % len(_RECALL_BRANCHES)]

        # Pick 1–3 matched dimensions — most relevant first.
        n_dims = 1 + i % 3
        dims = [
            _RECALL_DIMENSIONS[(combined_seed + i + k) % len(_RECALL_DIMENSIONS)]
            for k in range(n_dims)
        ]
        # Deduplicate while preserving order.
        seen: set[str] = set()
        unique_dims = [d for d in dims if not (d in seen or seen.add(d))] # type: ignore[func-returns-value]

        matches.append(
            RecallMatch(
                commit_id=commit_hash,
                commit_message=message,
                branch=branch,
                score=score,
                matched_dimensions=unique_dims,
            )
        )

    logger.info(
        "✅ recall repo=%s ref=%s query=%r matches=%d/%d",
        repo_id[:8], ref, query[:40], len(matches), total_matches,
    )
    return RecallResponse(
        repo_id=repo_id,
        ref=ref,
        query=query,
        matches=matches,
        total_matches=total_matches,
        embedding_dimensions=128,
    )


# ---------------------------------------------------------------------------
# Cross-ref similarity
# ---------------------------------------------------------------------------

# Interpretation thresholds: the weighted mean of 10 dimension scores maps
# to a qualitative label. Weights are equal (0.1 each) so the overall score
# is a simple mean, but kept in this lookup to support future re-weighting
# without changing the response shape.
_SIMILARITY_WEIGHTS: dict[str, float] = {
    "pitch_distribution": 0.10,
    "rhythm_pattern": 0.10,
    "tempo": 0.10,
    "dynamics": 0.10,
    "harmonic_content": 0.10,
    "form": 0.10,
    "instrument_blend": 0.10,
    "groove": 0.10,
    "contour": 0.10,
    "emotion": 0.10,
}


def _interpret_similarity(score: float, dims: RefSimilarityDimensions) -> str:
    """Generate a human-readable interpretation of a cross-ref similarity score.

    The interpretation names the dominant divergence axis when the overall
    score is below 0.9, giving agents and UIs actionable language without
    requiring further API calls.
    """
    dim_values = {
        "pitch distribution": dims.pitch_distribution,
        "rhythm pattern": dims.rhythm_pattern,
        "tempo": dims.tempo,
        "dynamics": dims.dynamics,
        "harmonic content": dims.harmonic_content,
        "form": dims.form,
        "instrument blend": dims.instrument_blend,
        "groove": dims.groove,
        "contour": dims.contour,
        "emotion": dims.emotion,
    }
    lowest_dim = min(dim_values, key=lambda k: dim_values[k])
    lowest_score = dim_values[lowest_dim]

    if score >= 0.90:
        return "Nearly identical arrangements — only subtle differences detected."
    if score >= 0.75:
        return (
            f"Highly similar arrangement with divergent {lowest_dim} choices "
            f"(score: {lowest_score:.2f})."
        )
    if score >= 0.55:
        return (
            f"Moderately similar — significant divergence in {lowest_dim} "
            f"(score: {lowest_score:.2f}) and related dimensions."
        )
    return (
        f"Low similarity — the two refs differ substantially, "
        f"especially in {lowest_dim} (score: {lowest_score:.2f})."
    )


def compute_ref_similarity(
    *,
    repo_id: str,
    base_ref: str,
    compare_ref: str,
) -> RefSimilarityResponse:
    """Compute cross-ref similarity between two Muse refs.

    Returns a :class:`~musehub.models.musehub_analysis.RefSimilarityResponse`
    with per-dimension scores and an overall weighted mean.

    Scores are deterministic stubs derived from both ref hashes so that:
    - The same pair always returns the same result (idempotent for agents).
    - Swapping base/compare yields scores of the same magnitude (symmetry).

    When real MIDI content analysis is available, replace the stub derivation
    below with actual per-dimension comparison logic while preserving this
    function's signature and return type.

    Args:
        repo_id: Muse repository identifier (used for log context only).
        base_ref: The baseline ref (branch name, tag, or commit hash).
        compare_ref: The ref to compare against ``base_ref``.

    Returns:
        :class:`~musehub.models.musehub_analysis.RefSimilarityResponse`
        containing 10 dimension scores, an overall similarity, and an
        auto-generated interpretation string.
    """
    base_seed = _ref_hash(base_ref)
    compare_seed = _ref_hash(compare_ref)

    def _dim_score(offset: int) -> float:
        """Derive a deterministic 0–1 similarity score for one dimension."""
        combined = (base_seed + compare_seed + offset) % (2**16)
        raw = (combined / (2**16 - 1))
        return round(0.50 + raw * 0.50, 4)

    dims = RefSimilarityDimensions(
        pitch_distribution=_dim_score(0),
        rhythm_pattern=_dim_score(1),
        tempo=_dim_score(2),
        dynamics=_dim_score(3),
        harmonic_content=_dim_score(4),
        form=_dim_score(5),
        instrument_blend=_dim_score(6),
        groove=_dim_score(7),
        contour=_dim_score(8),
        emotion=_dim_score(9),
    )

    overall = round(
        sum(
            getattr(dims, k.replace(" ", "_").replace("-", "_")) * w
            for k, w in {
                "pitch_distribution": 0.10,
                "rhythm_pattern": 0.10,
                "tempo": 0.10,
                "dynamics": 0.10,
                "harmonic_content": 0.10,
                "form": 0.10,
                "instrument_blend": 0.10,
                "groove": 0.10,
                "contour": 0.10,
                "emotion": 0.10,
            }.items()
        ),
        4,
    )

    interpretation = _interpret_similarity(overall, dims)
    logger.info(
        "✅ similarity repo=%s base=%s compare=%s overall=%.2f",
        repo_id[:8],
        base_ref[:8],
        compare_ref[:8],
        overall,
    )
    return RefSimilarityResponse(
        base_ref=base_ref,
        compare_ref=compare_ref,
        overall_similarity=overall,
        dimensions=dims,
        interpretation=interpretation,
    )


# ---------------------------------------------------------------------------
# Emotion diff
# ---------------------------------------------------------------------------

# Axis labels in declaration order — used for delta interpretation
_EMOTION_8D_AXES: list[str] = [
    "valence", "energy", "tension", "complexity",
    "warmth", "brightness", "darkness", "playfulness",
]


def _build_emotion_vector_8d(ref: str) -> EmotionVector8D:
    """Build a deterministic 8-axis emotion vector for a ref.

    All eight axes are derived from independent bit-slices of the ref hash so
    they vary independently across refs — avoids correlated stubs.

    Why 8D not 4D: the emotion-diff endpoint uses an extended radar chart that
    separates warmth/brightness/playfulness/complexity from the core
    valence/energy/tension/darkness axes used in the emotion-map endpoint.
    """
    seed = _ref_hash(ref)

    def _axis(shift: int, base: float = 0.1, spread: float = 0.8) -> float:
        return round(base + ((seed >> shift) % 100) * spread / 100, 4)

    return EmotionVector8D(
        valence=_axis(0),
        energy=_axis(8),
        tension=_axis(16),
        complexity=_axis(24),
        warmth=_axis(32),
        brightness=_axis(40),
        darkness=_axis(48),
        playfulness=_axis(56),
    )


def _clamp(value: float) -> float:
    """Clamp a delta value to [-1, 1] for the signed delta field."""
    return max(-1.0, min(1.0, round(value, 4)))


def compute_emotion_diff(
    *,
    repo_id: str,
    head_ref: str,
    base_ref: str,
) -> EmotionDiffResponse:
    """Compute an 8-axis emotional diff between two Muse commit refs.

    Returns the per-axis emotion vectors for ``base_ref`` and ``head_ref``,
    their signed delta (``head - base``), and a natural-language interpretation
    of the most significant shifts.

    Why this is separate from the generic emotion dimension
    -------------------------------------------------------
    The generic ``emotion`` dimension returns a single aggregate snapshot with
    a 2-axis (valence/arousal) model. This endpoint uses an extended 8-axis
    radar model and computes a *comparative* diff between two refs — the
    information the ``muse emotion-diff`` CLI command and the PR detail page
    need to answer "how did this commit change the emotional character?"

    Args:
        repo_id: Muse Hub repo UUID (used for logging).
        head_ref: The ref being evaluated (the head commit).
        base_ref: The ref used as comparison baseline (e.g. parent commit, ``main``).

    Returns:
        :class:`EmotionDiffResponse` with base, head, delta vectors, and interpretation.
    """
    base_vec = _build_emotion_vector_8d(base_ref)
    head_vec = _build_emotion_vector_8d(head_ref)

    delta = EmotionDelta8D(
        valence=_clamp(head_vec.valence - base_vec.valence),
        energy=_clamp(head_vec.energy - base_vec.energy),
        tension=_clamp(head_vec.tension - base_vec.tension),
        complexity=_clamp(head_vec.complexity - base_vec.complexity),
        warmth=_clamp(head_vec.warmth - base_vec.warmth),
        brightness=_clamp(head_vec.brightness - base_vec.brightness),
        darkness=_clamp(head_vec.darkness - base_vec.darkness),
        playfulness=_clamp(head_vec.playfulness - base_vec.playfulness),
    )

    raw_deltas: list[tuple[str, float]] = [
        ("valence", delta.valence),
        ("energy", delta.energy),
        ("tension", delta.tension),
        ("complexity", delta.complexity),
        ("warmth", delta.warmth),
        ("brightness", delta.brightness),
        ("darkness", delta.darkness),
        ("playfulness", delta.playfulness),
    ]
    sorted_deltas = sorted(raw_deltas, key=lambda x: abs(x[1]), reverse=True)
    dominant_axis, dominant_value = sorted_deltas[0]

    if abs(dominant_value) < 0.05:
        interpretation = (
            "This commit introduced minimal emotional change — the character of the "
            "piece is nearly identical to the base ref across all eight perceptual axes."
        )
    else:
        direction = "increased" if dominant_value > 0 else "decreased"
        secondary_parts: list[str] = []
        for axis, value in sorted_deltas[1:3]:
            if abs(value) >= 0.05:
                secondary_parts.append(
                    f"{axis} {'rose' if value > 0 else 'fell'} by {abs(value):.2f}"
                )
        secondary_text = (
            f" Notable secondary shifts: {', '.join(secondary_parts)}." if secondary_parts else ""
        )
        interpretation = (
            f"This commit {direction} {dominant_axis} by {abs(dominant_value):.2f} "
            f"(the dominant emotional shift).{secondary_text}"
        )

    logger.info(
        "✅ emotion-diff repo=%s head=%s base=%s dominant=%s delta=%.3f",
        repo_id[:8], head_ref, base_ref, dominant_axis, dominant_value,
    )
    return EmotionDiffResponse(
        repo_id=repo_id,
        base_ref=base_ref,
        head_ref=head_ref,
        computed_at=_utc_now(),
        base_emotion=base_vec,
        head_emotion=head_vec,
        delta=delta,
        interpretation=interpretation,
    )


# ---------------------------------------------------------------------------
# SSR aggregation functions — compare / divergence / context pages
# ---------------------------------------------------------------------------

_MUSICAL_DIMENSIONS = ["Melodic", "Harmonic", "Rhythmic", "Structural", "Dynamic"]


async def compare_refs(
    db: AsyncSession,
    repo_id: str,
    base: str,
    head: str,
) -> CompareResult:
    """Return a per-dimension comparison between two refs for SSR rendering.

    Produces deterministic stub scores keyed on the ref values.  Callers
    should treat this as a realistic approximation until Storpheus exposes
    per-ref MIDI introspection.

    The returned :class:`CompareResult` is consumed directly by
    ``pages/analysis/compare.html`` — no client-side fetch required.
    """
    base_seed = _ref_hash(base)
    head_seed = _ref_hash(head)
    dimensions: list[CompareDimension] = []
    for i, name in enumerate(_MUSICAL_DIMENSIONS):
        base_val = round(((base_seed + i * 31) % 100) / 100.0, 4)
        head_val = round(((head_seed + i * 31) % 100) / 100.0, 4)
        delta = round(head_val - base_val, 4)
        dimensions.append(CompareDimension(name=name, base_value=base_val, head_value=head_val, delta=delta))
    logger.info("✅ compare-refs repo=%s base=%s head=%s", repo_id[:8], base[:8], head[:8])
    return CompareResult(base=base, head=head, dimensions=dimensions)


async def compute_divergence(
    db: AsyncSession,
    repo_id: str,
    fork_repo_id: str | None = None,
) -> DivergenceResult:
    """Return a musical divergence score between a repo and its fork for SSR rendering.

    Produces deterministic stub scores.  When ``fork_repo_id`` is ``None`` the
    divergence is computed relative to the repo's own HEAD (self-comparison → score=0).

    Consumed directly by ``pages/analysis/divergence.html``.
    """
    seed = _ref_hash(fork_repo_id or repo_id)
    repo_seed = _ref_hash(repo_id)
    dimensions: list[DivergenceDimension] = []
    for i, name in enumerate(_MUSICAL_DIMENSIONS):
        raw = abs(((seed + i * 37) % 100) - ((repo_seed + i * 37) % 100)) / 100.0
        divergence = round(min(raw, 1.0), 4)
        dimensions.append(DivergenceDimension(name=name, divergence=divergence))
    overall = round(sum(d.divergence for d in dimensions) / len(dimensions), 4)
    logger.info(
        "✅ compute-divergence repo=%s fork=%s score=%.3f",
        repo_id[:8], (fork_repo_id or "self")[:8], overall,
    )
    return DivergenceResult(score=overall, dimensions=dimensions)


async def get_context(
    db: AsyncSession,
    repo_id: str,
    ref: str,
) -> ContextResult:
    """Return a musical context summary for the given ref for SSR rendering.

    Produces deterministic stub data keyed on the ref value.  Full LLM-generated
    summaries will replace this once the Muse pipeline is wired to the context
    endpoint.

    Consumed directly by ``pages/analysis/context.html``.
    """
    seed = _ref_hash(ref)
    missing_pool = [
        "bass line",
        "kick drum",
        "reverb tail",
        "chord voicings",
        "melodic counter-line",
        "dynamic variation",
    ]
    suggestion_pool = {
        "Groove": "Introduce a 16th-note hi-hat pattern to add rhythmic density.",
        "Harmony": "Extend the chord to a 9th to enrich the harmonic texture.",
        "Melody": "Add a pentatonic counter-melody in the upper register.",
        "Dynamics": "Apply a decrescendo into the final bar for a softer landing.",
    }
    n_missing = (seed % 3) + 1
    missing = missing_pool[: n_missing]
    n_suggestions = (seed % 3) + 2
    suggestions = dict(list(suggestion_pool.items())[:n_suggestions])
    summary = (
        f"Ref {ref[:8]} establishes a {_pick(seed, _MODES)}-mode foundation "
        f"at {60 + (seed % 60)} BPM with a {_pick(seed + 1, _GROOVES)} groove. "
        f"The arrangement currently features {5 - n_missing} of the expected core elements. "
        f"Muse suggests {n_suggestions} compositional refinements."
    )
    logger.info("✅ get-context repo=%s ref=%s", repo_id[:8], ref[:8])
    return ContextResult(summary=summary, missing_elements=missing, suggestions=suggestions)