musehub_credits.py
python
| 1 | """Credits aggregation service for MuseHub repos. |
| 2 | |
| 3 | Aggregates contributor information from commit history — think dynamic album |
| 4 | liner notes that update as the composition evolves. Every pushed commit |
| 5 | contributes an author name, a timestamp, and a message whose keywords are |
| 6 | used to infer contribution types (composer, arranger, producer, etc.). |
| 7 | |
| 8 | Design decisions: |
| 9 | - Pure DB read — no mutations, no side effects. |
| 10 | - Contribution types are inferred from commit message keywords, not stored |
| 11 | explicitly, so they evolve as musicians describe their work more richly. |
| 12 | - Sort options mirror what a label credit page would offer: by contribution |
| 13 | count (most prolific first), by recency (most recently active first), and |
| 14 | alphabetical (predictable scanning order). |
| 15 | """ |
| 16 | |
| 17 | import logging |
| 18 | from collections import defaultdict |
| 19 | from datetime import datetime |
| 20 | |
| 21 | from sqlalchemy import select |
| 22 | from sqlalchemy.ext.asyncio import AsyncSession |
| 23 | |
| 24 | from musehub.db import musehub_models as db |
| 25 | from musehub.models.musehub import ContributorCredits, CreditsResponse |
| 26 | |
| 27 | logger = logging.getLogger(__name__) |
| 28 | |
| 29 | # --------------------------------------------------------------------------- |
| 30 | # Role inference keyword map |
| 31 | # Keys are contribution-type labels; values are substrings to search for in |
| 32 | # the lower-cased commit message. Order matters: first match wins per token. |
| 33 | # --------------------------------------------------------------------------- |
| 34 | |
| 35 | _ROLE_KEYWORDS: dict[str, list[str]] = { |
| 36 | "composer": ["compos", "wrote", "writing", "melody", "theme", "motif"], |
| 37 | "arranger": ["arrang", "orchestrat", "voicing", "reharmoni"], |
| 38 | "producer": ["produc", "session", "master", "mix session", "track layout"], |
| 39 | "performer": ["perform", "record", "played", "guitar", "piano", "bass", "drum"], |
| 40 | "mixer": ["mix", "blend", "balance", "eq ", "equaliz", "compressor"], |
| 41 | "editor": ["edit", "cut", "splice", "trim", "clip"], |
| 42 | "lyricist": ["lyric", "word", "verse", "chorus", "hook", "lyric"], |
| 43 | "sound designer": ["synth", "sound design", "patch", "preset", "timbre"], |
| 44 | } |
| 45 | |
| 46 | |
| 47 | def _infer_roles(message: str) -> list[str]: |
| 48 | """Return contribution type labels detected from a commit message. |
| 49 | |
| 50 | Uses a simple keyword scan — sufficient for MVP. If no keywords match, |
| 51 | falls back to ``["contributor"]`` so every commit always carries a role. |
| 52 | """ |
| 53 | lower = message.lower() |
| 54 | found: list[str] = [] |
| 55 | for role, keywords in _ROLE_KEYWORDS.items(): |
| 56 | if any(kw in lower for kw in keywords): |
| 57 | found.append(role) |
| 58 | return found if found else ["contributor"] |
| 59 | |
| 60 | |
| 61 | def _sort_contributors( |
| 62 | contributors: list[ContributorCredits], sort: str |
| 63 | ) -> list[ContributorCredits]: |
| 64 | """Apply the requested sort order to the contributor list. |
| 65 | |
| 66 | Supported values: |
| 67 | - ``"count"`` — most prolific contributor first (default) |
| 68 | - ``"recency"`` — most recently active contributor first |
| 69 | - ``"alpha"`` — alphabetical by author name |
| 70 | """ |
| 71 | if sort == "recency": |
| 72 | return sorted(contributors, key=lambda c: c.last_active, reverse=True) |
| 73 | if sort == "alpha": |
| 74 | return sorted(contributors, key=lambda c: c.author.lower()) |
| 75 | # Default: sort by session count descending, then alpha for ties |
| 76 | return sorted(contributors, key=lambda c: (-c.session_count, c.author.lower())) |
| 77 | |
| 78 | |
| 79 | async def aggregate_credits( |
| 80 | session: AsyncSession, |
| 81 | repo_id: str, |
| 82 | *, |
| 83 | sort: str = "count", |
| 84 | ) -> CreditsResponse: |
| 85 | """Aggregate contributors across all commits in a repo. |
| 86 | |
| 87 | Reads every commit for the repo (no limit — credits need completeness, |
| 88 | not pagination). Groups by author string, counts sessions, infers roles |
| 89 | from commit messages, and records activity timestamps. |
| 90 | |
| 91 | Args: |
| 92 | session: Active async DB session. |
| 93 | repo_id: Target repo ID. |
| 94 | sort: Sort order for the contributor list — ``"count"`` (default), |
| 95 | ``"recency"``, or ``"alpha"``. |
| 96 | |
| 97 | Returns: |
| 98 | ``CreditsResponse`` with a complete contributor list and echoed sort. |
| 99 | """ |
| 100 | stmt = ( |
| 101 | select(db.MusehubCommit) |
| 102 | .where(db.MusehubCommit.repo_id == repo_id) |
| 103 | .order_by(db.MusehubCommit.timestamp) |
| 104 | ) |
| 105 | rows = (await session.execute(stmt)).scalars().all() |
| 106 | |
| 107 | # Per-author accumulators |
| 108 | counts: dict[str, int] = defaultdict(int) |
| 109 | roles_sets: dict[str, set[str]] = defaultdict(set) |
| 110 | first_active: dict[str, datetime] = {} |
| 111 | last_active: dict[str, datetime] = {} |
| 112 | |
| 113 | for row in rows: |
| 114 | author = row.author |
| 115 | counts[author] += 1 |
| 116 | for role in _infer_roles(row.message): |
| 117 | roles_sets[author].add(role) |
| 118 | ts = row.timestamp |
| 119 | if author not in first_active or ts < first_active[author]: |
| 120 | first_active[author] = ts |
| 121 | if author not in last_active or ts > last_active[author]: |
| 122 | last_active[author] = ts |
| 123 | |
| 124 | contributors = [ |
| 125 | ContributorCredits( |
| 126 | author=author, |
| 127 | session_count=counts[author], |
| 128 | contribution_types=sorted(roles_sets[author]), |
| 129 | first_active=first_active[author], |
| 130 | last_active=last_active[author], |
| 131 | ) |
| 132 | for author in counts |
| 133 | ] |
| 134 | |
| 135 | sorted_contributors = _sort_contributors(contributors, sort) |
| 136 | logger.debug( |
| 137 | "✅ Credits aggregated for repo %s: %d contributor(s), sort=%s", |
| 138 | repo_id, |
| 139 | len(sorted_contributors), |
| 140 | sort, |
| 141 | ) |
| 142 | return CreditsResponse( |
| 143 | repo_id=repo_id, |
| 144 | contributors=sorted_contributors, |
| 145 | sort=sort, |
| 146 | total_contributors=len(sorted_contributors), |
| 147 | ) |