musehub/services/musehub_credits.py · gabriel/musehub

musehub_credits.py python

147 lines 5.4 KB

6b53f1af feat: supercharge all pages, full SOC refactor, and Python 3.14 upgrade (#7) Gabriel Cardona <cgcardona@gmail.com> 5d ago

1	"""Credits aggregation service for MuseHub repos.
2
3	Aggregates contributor information from commit history — think dynamic album
4	liner notes that update as the composition evolves. Every pushed commit
5	contributes an author name, a timestamp, and a message whose keywords are
6	used to infer contribution types (composer, arranger, producer, etc.).
7
8	Design decisions:
9	- Pure DB read — no mutations, no side effects.
10	- Contribution types are inferred from commit message keywords, not stored
11	explicitly, so they evolve as musicians describe their work more richly.
12	- Sort options mirror what a label credit page would offer: by contribution
13	count (most prolific first), by recency (most recently active first), and
14	alphabetical (predictable scanning order).
15	"""
16
17	import logging
18	from collections import defaultdict
19	from datetime import datetime
20
21	from sqlalchemy import select
22	from sqlalchemy.ext.asyncio import AsyncSession
23
24	from musehub.db import musehub_models as db
25	from musehub.models.musehub import ContributorCredits, CreditsResponse
26
27	logger = logging.getLogger(__name__)
28
29	# ---------------------------------------------------------------------------
30	# Role inference keyword map
31	# Keys are contribution-type labels; values are substrings to search for in
32	# the lower-cased commit message. Order matters: first match wins per token.
33	# ---------------------------------------------------------------------------
34
35	_ROLE_KEYWORDS: dict[str, list[str]] = {
36	"composer": ["compos", "wrote", "writing", "melody", "theme", "motif"],
37	"arranger": ["arrang", "orchestrat", "voicing", "reharmoni"],
38	"producer": ["produc", "session", "master", "mix session", "track layout"],
39	"performer": ["perform", "record", "played", "guitar", "piano", "bass", "drum"],
40	"mixer": ["mix", "blend", "balance", "eq ", "equaliz", "compressor"],
41	"editor": ["edit", "cut", "splice", "trim", "clip"],
42	"lyricist": ["lyric", "word", "verse", "chorus", "hook", "lyric"],
43	"sound designer": ["synth", "sound design", "patch", "preset", "timbre"],
44	}
45
46
47	def _infer_roles(message: str) -> list[str]:
48	"""Return contribution type labels detected from a commit message.
49
50	Uses a simple keyword scan — sufficient for MVP. If no keywords match,
51	falls back to ``["contributor"]`` so every commit always carries a role.
52	"""
53	lower = message.lower()
54	found: list[str] = []
55	for role, keywords in _ROLE_KEYWORDS.items():
56	if any(kw in lower for kw in keywords):
57	found.append(role)
58	return found if found else ["contributor"]
59
60
61	def _sort_contributors(
62	contributors: list[ContributorCredits], sort: str
63	) -> list[ContributorCredits]:
64	"""Apply the requested sort order to the contributor list.
65
66	Supported values:
67	- ``"count"`` — most prolific contributor first (default)
68	- ``"recency"`` — most recently active contributor first
69	- ``"alpha"`` — alphabetical by author name
70	"""
71	if sort == "recency":
72	return sorted(contributors, key=lambda c: c.last_active, reverse=True)
73	if sort == "alpha":
74	return sorted(contributors, key=lambda c: c.author.lower())
75	# Default: sort by session count descending, then alpha for ties
76	return sorted(contributors, key=lambda c: (-c.session_count, c.author.lower()))
77
78
79	async def aggregate_credits(
80	session: AsyncSession,
81	repo_id: str,
82	*,
83	sort: str = "count",
84	) -> CreditsResponse:
85	"""Aggregate contributors across all commits in a repo.
86
87	Reads every commit for the repo (no limit — credits need completeness,
88	not pagination). Groups by author string, counts sessions, infers roles
89	from commit messages, and records activity timestamps.
90
91	Args:
92	session: Active async DB session.
93	repo_id: Target repo ID.
94	sort: Sort order for the contributor list — ``"count"`` (default),
95	``"recency"``, or ``"alpha"``.
96
97	Returns:
98	``CreditsResponse`` with a complete contributor list and echoed sort.
99	"""
100	stmt = (
101	select(db.MusehubCommit)
102	.where(db.MusehubCommit.repo_id == repo_id)
103	.order_by(db.MusehubCommit.timestamp)
104	)
105	rows = (await session.execute(stmt)).scalars().all()
106
107	# Per-author accumulators
108	counts: dict[str, int] = defaultdict(int)
109	roles_sets: dict[str, set[str]] = defaultdict(set)
110	first_active: dict[str, datetime] = {}
111	last_active: dict[str, datetime] = {}
112
113	for row in rows:
114	author = row.author
115	counts[author] += 1
116	for role in _infer_roles(row.message):
117	roles_sets[author].add(role)
118	ts = row.timestamp
119	if author not in first_active or ts < first_active[author]:
120	first_active[author] = ts
121	if author not in last_active or ts > last_active[author]:
122	last_active[author] = ts
123
124	contributors = [
125	ContributorCredits(
126	author=author,
127	session_count=counts[author],
128	contribution_types=sorted(roles_sets[author]),
129	first_active=first_active[author],
130	last_active=last_active[author],
131	)
132	for author in counts
133	]
134
135	sorted_contributors = _sort_contributors(contributors, sort)
136	logger.debug(
137	"✅ Credits aggregated for repo %s: %d contributor(s), sort=%s",
138	repo_id,
139	len(sorted_contributors),
140	sort,
141	)
142	return CreditsResponse(
143	repo_id=repo_id,
144	contributors=sorted_contributors,
145	sort=sort,
146	total_contributors=len(sorted_contributors),
147	)

Content Address

Object ID (SHA-256)

15c62742cb94b1326452dd0c9c5f9352da8893b474618ef723e257a67f90d2ce

This file is immutable and content-addressed. The same SHA always refers to the same bytes, across every clone and every time.

File Info

Path musehub/services/musehub_credits.py

Lines 147

Size 5.4 KB

Language python

Ref 865974cc

Snapshot c4b01e509e92…

Last Modified

6b53f1af

feat: supercharge all pages, full SOC refactor, and Python 3.14 upgrade (#7)

Gabriel Cardona <cgcardona@gmail.com> 5d ago

View commit →

Links

Browse tree at 865974cc All commits View raw