gabriel / musehub public
musehub_credits.py python
147 lines 5.4 KB
6b53f1af feat: supercharge all pages, full SOC refactor, and Python 3.14 upgrade (#7) Gabriel Cardona <cgcardona@gmail.com> 5d ago
1 """Credits aggregation service for MuseHub repos.
2
3 Aggregates contributor information from commit history — think dynamic album
4 liner notes that update as the composition evolves. Every pushed commit
5 contributes an author name, a timestamp, and a message whose keywords are
6 used to infer contribution types (composer, arranger, producer, etc.).
7
8 Design decisions:
9 - Pure DB read — no mutations, no side effects.
10 - Contribution types are inferred from commit message keywords, not stored
11 explicitly, so they evolve as musicians describe their work more richly.
12 - Sort options mirror what a label credit page would offer: by contribution
13 count (most prolific first), by recency (most recently active first), and
14 alphabetical (predictable scanning order).
15 """
16
17 import logging
18 from collections import defaultdict
19 from datetime import datetime
20
21 from sqlalchemy import select
22 from sqlalchemy.ext.asyncio import AsyncSession
23
24 from musehub.db import musehub_models as db
25 from musehub.models.musehub import ContributorCredits, CreditsResponse
26
27 logger = logging.getLogger(__name__)
28
29 # ---------------------------------------------------------------------------
30 # Role inference keyword map
31 # Keys are contribution-type labels; values are substrings to search for in
32 # the lower-cased commit message. Order matters: first match wins per token.
33 # ---------------------------------------------------------------------------
34
35 _ROLE_KEYWORDS: dict[str, list[str]] = {
36 "composer": ["compos", "wrote", "writing", "melody", "theme", "motif"],
37 "arranger": ["arrang", "orchestrat", "voicing", "reharmoni"],
38 "producer": ["produc", "session", "master", "mix session", "track layout"],
39 "performer": ["perform", "record", "played", "guitar", "piano", "bass", "drum"],
40 "mixer": ["mix", "blend", "balance", "eq ", "equaliz", "compressor"],
41 "editor": ["edit", "cut", "splice", "trim", "clip"],
42 "lyricist": ["lyric", "word", "verse", "chorus", "hook", "lyric"],
43 "sound designer": ["synth", "sound design", "patch", "preset", "timbre"],
44 }
45
46
47 def _infer_roles(message: str) -> list[str]:
48 """Return contribution type labels detected from a commit message.
49
50 Uses a simple keyword scan — sufficient for MVP. If no keywords match,
51 falls back to ``["contributor"]`` so every commit always carries a role.
52 """
53 lower = message.lower()
54 found: list[str] = []
55 for role, keywords in _ROLE_KEYWORDS.items():
56 if any(kw in lower for kw in keywords):
57 found.append(role)
58 return found if found else ["contributor"]
59
60
61 def _sort_contributors(
62 contributors: list[ContributorCredits], sort: str
63 ) -> list[ContributorCredits]:
64 """Apply the requested sort order to the contributor list.
65
66 Supported values:
67 - ``"count"`` — most prolific contributor first (default)
68 - ``"recency"`` — most recently active contributor first
69 - ``"alpha"`` — alphabetical by author name
70 """
71 if sort == "recency":
72 return sorted(contributors, key=lambda c: c.last_active, reverse=True)
73 if sort == "alpha":
74 return sorted(contributors, key=lambda c: c.author.lower())
75 # Default: sort by session count descending, then alpha for ties
76 return sorted(contributors, key=lambda c: (-c.session_count, c.author.lower()))
77
78
79 async def aggregate_credits(
80 session: AsyncSession,
81 repo_id: str,
82 *,
83 sort: str = "count",
84 ) -> CreditsResponse:
85 """Aggregate contributors across all commits in a repo.
86
87 Reads every commit for the repo (no limit — credits need completeness,
88 not pagination). Groups by author string, counts sessions, infers roles
89 from commit messages, and records activity timestamps.
90
91 Args:
92 session: Active async DB session.
93 repo_id: Target repo ID.
94 sort: Sort order for the contributor list — ``"count"`` (default),
95 ``"recency"``, or ``"alpha"``.
96
97 Returns:
98 ``CreditsResponse`` with a complete contributor list and echoed sort.
99 """
100 stmt = (
101 select(db.MusehubCommit)
102 .where(db.MusehubCommit.repo_id == repo_id)
103 .order_by(db.MusehubCommit.timestamp)
104 )
105 rows = (await session.execute(stmt)).scalars().all()
106
107 # Per-author accumulators
108 counts: dict[str, int] = defaultdict(int)
109 roles_sets: dict[str, set[str]] = defaultdict(set)
110 first_active: dict[str, datetime] = {}
111 last_active: dict[str, datetime] = {}
112
113 for row in rows:
114 author = row.author
115 counts[author] += 1
116 for role in _infer_roles(row.message):
117 roles_sets[author].add(role)
118 ts = row.timestamp
119 if author not in first_active or ts < first_active[author]:
120 first_active[author] = ts
121 if author not in last_active or ts > last_active[author]:
122 last_active[author] = ts
123
124 contributors = [
125 ContributorCredits(
126 author=author,
127 session_count=counts[author],
128 contribution_types=sorted(roles_sets[author]),
129 first_active=first_active[author],
130 last_active=last_active[author],
131 )
132 for author in counts
133 ]
134
135 sorted_contributors = _sort_contributors(contributors, sort)
136 logger.debug(
137 "✅ Credits aggregated for repo %s: %d contributor(s), sort=%s",
138 repo_id,
139 len(sorted_contributors),
140 sort,
141 )
142 return CreditsResponse(
143 repo_id=repo_id,
144 contributors=sorted_contributors,
145 sort=sort,
146 total_contributors=len(sorted_contributors),
147 )