gabriel / musehub public
seed_v2.py python
1444 lines 77.4 KB
7f1d07e8 feat: domains, MCP expansion, MIDI player, and production hardening (#8) Gabriel Cardona <cgcardona@gmail.com> 4d ago
1 """MuseHub V2 seed script — domain-agnostic showcase data.
2
3 Generates a rich, realistic dataset that demonstrates Muse's domain-agnostic
4 multi-dimensional state versioning across both MIDI and Code domains.
5
6 New in V2 (run this AFTER the base seed_musehub.py):
7 - Domain registry: @cgcardona/midi (21 dims), @cgcardona/code (10 langs)
8 - 8 MIDI showcase repos with REAL PLAYABLE .mid files on disk
9 (Bach WTC, Satie Gymnopédie, Chopin Nocturne, Beethoven Moonlight,
10 + 4 original multi-track grooves)
11 - 3 real GitHub repos cloned + ported to the Muse code domain:
12 • github.com/cgcardona/muse — VCS engine, ~198 commits
13 • github.com/cgcardona/agentception — multi-agent orchestration, 250 commits
14 • github.com/cgcardona/musehub — this platform itself, ~37 commits
15 Every real git commit is converted to a Muse commit with the original
16 author, timestamp, message, and parent DAG. HEAD file trees are stored
17 as content-addressed MusehubObjects on disk.
18 - Full MuseHub social layer: stars, issues, PRs, reviews, releases
19 - All repos linked via domain_id to the domain registry
20
21 Requires network access during seeding to clone the three GitHub repos.
22 If a clone fails the repo is skipped with a warning (MIDI repos are unaffected).
23
24 Run inside the container:
25 docker compose exec musehub python3 /app/scripts/seed_v2.py
26
27 Idempotent: pass --force to wipe V2 rows and re-insert.
28
29 Prerequisites:
30 - seed_musehub.py must have run first (users + base repos exist)
31 - OR pass --standalone to seed users + domains without the base data
32 """
33 from __future__ import annotations
34
35 import asyncio
36 import hashlib
37 import json
38 import os
39 import subprocess
40 import sys
41 import tempfile
42 import uuid
43 from datetime import datetime, timedelta, timezone
44 from pathlib import Path
45 from typing import Any, Callable
46
47 from sqlalchemy import text
48 from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
49 from sqlalchemy.orm import sessionmaker
50
51 # Add parent so we can import musehub.*
52 sys.path.insert(0, str(Path(__file__).parent.parent))
53
54 from musehub.config import settings
55 from musehub.db.models import AccessToken, User
56 from musehub.db.musehub_collaborator_models import MusehubCollaborator
57 from musehub.db.musehub_domain_models import MusehubDomain, MusehubDomainInstall
58 from musehub.db.musehub_label_models import (
59 MusehubIssueLabel,
60 MusehubLabel,
61 MusehubPRLabel,
62 )
63 from musehub.db.musehub_models import (
64 MusehubBranch,
65 MusehubCommit,
66 MusehubEvent,
67 MusehubFollow,
68 MusehubIssue,
69 MusehubIssueComment,
70 MusehubMilestone,
71 MusehubNotification,
72 MusehubObject,
73 MusehubPRComment,
74 MusehubPRReview,
75 MusehubProfile,
76 MusehubPullRequest,
77 MusehubReaction,
78 MusehubRelease,
79 MusehubReleaseAsset,
80 MusehubRepo,
81 MusehubStar,
82 MusehubViewEvent,
83 MusehubWatch,
84 )
85
86 # Import our MIDI generator
87 sys.path.insert(0, str(Path(__file__).parent))
88 from midi_generator import MIDI_GENERATORS
89
90 # ---------------------------------------------------------------------------
91 # Constants
92 # ---------------------------------------------------------------------------
93
94 UTC = timezone.utc
95
96 FORCE = "--force" in sys.argv
97 STANDALONE = "--standalone" in sys.argv
98
99
100 def _now(days: int = 0, hours: int = 0) -> datetime:
101 return datetime.now(tz=UTC) - timedelta(days=days, hours=hours)
102
103
104 def _sha(seed: str) -> str:
105 return hashlib.sha256(seed.encode()).hexdigest()
106
107
108 def _uid(seed: str) -> str:
109 return str(uuid.UUID(bytes=hashlib.md5(seed.encode()).digest()))
110
111
112 # ---------------------------------------------------------------------------
113 # Stable IDs — never change between re-seeds so URLs stay valid
114 # ---------------------------------------------------------------------------
115
116 # Domain IDs
117 DOMAIN_MIDI = "domain-midi-cgcardona-0001"
118 DOMAIN_CODE = "domain-code-cgcardona-0001"
119
120 # Users (reuse from seed_musehub.py — must already exist unless --standalone)
121 GABRIEL = "user-gabriel-001"
122 SOFIA = "user-sofia-002"
123 MARCUS = "user-marcus-003"
124 YUKI = "user-yuki-004"
125 AALIYA = "user-aaliya-005"
126 CHEN = "user-chen-006"
127 FATOU = "user-fatou-007"
128 PIERRE = "user-pierre-008"
129
130 # Historical composer users (also from base seed)
131 BACH = "user-bach-000000009"
132 CHOPIN = "user-chopin-00000010"
133 BEETHOVEN = "user-beethoven-000014"
134 SATIE = "user-satie-00000017"
135
136 # V2 MIDI showcase repos
137 REPO_V2_WTC = "repo-v2-wtc-prelude-001"
138 REPO_V2_GYMNO = "repo-v2-gymnopedie-0001"
139 REPO_V2_NOCTURNE = "repo-v2-nocturne-00001"
140 REPO_V2_MOONLIGHT = "repo-v2-moonlight-0001"
141 REPO_V2_NEO_SOUL = "repo-v2-neo-soul-00001"
142 REPO_V2_MODAL = "repo-v2-modal-jazz-0001"
143 REPO_V2_AFRO = "repo-v2-afrobeat-00001"
144 REPO_V2_CHANSON = "repo-v2-chanson-00001"
145
146 # V2 Code domain repos — real GitHub repos ported to Muse
147 REPO_V2_MUSE = "repo-v2-muse-vcs-00001" # cgcardona/muse (198 commits)
148 REPO_V2_AGENTCEPTION = "repo-v2-agentcept-00001" # cgcardona/agentception (1175 commits)
149 REPO_V2_MUSEHUB_SRC = "repo-v2-musehub-src-001" # cgcardona/musehub (37 commits)
150
151 ALL_CONTRIBUTORS = [
152 "gabriel", "sofia", "marcus", "yuki", "aaliya", "chen", "fatou", "pierre",
153 ]
154
155 # ---------------------------------------------------------------------------
156 # Domain capability definitions
157 # ---------------------------------------------------------------------------
158
159 _MIDI_CAPS = {
160 "dimensions": [
161 {"id": "harmonic", "name": "Harmonic", "description": "Pitch classes, chord progressions, key areas", "unit": "cents", "token": "--dim-a"},
162 {"id": "rhythmic", "name": "Rhythmic", "description": "Note timing, groove, quantisation", "unit": "ms", "token": "--dim-b"},
163 {"id": "melodic", "name": "Melodic", "description": "Melodic contour, intervals, ornaments", "unit": "semitones", "token": "--dim-c"},
164 {"id": "structural", "name": "Structural", "description": "Section boundaries, form, repetition", "unit": "bars", "token": "--dim-d"},
165 {"id": "dynamic", "name": "Dynamic", "description": "Velocity, expression, articulation", "unit": "MIDI vel", "token": "--dim-e"},
166 {"id": "timbral", "name": "Timbral", "description": "Instrument choice, program, sound design", "unit": "GM no.", "token": "--dim-a"},
167 {"id": "spatial", "name": "Spatial", "description": "Pan position, stereo field", "unit": "degrees", "token": "--dim-b"},
168 {"id": "textural", "name": "Textural", "description": "Density, polyphony, voice independence", "unit": "voices", "token": "--dim-c"},
169 {"id": "pedal", "name": "Pedal", "description": "Sustain, sostenuto, una corda events", "unit": "bool", "token": "--dim-d"},
170 {"id": "microtonal", "name": "Microtonal", "description": "Pitch bend, microtonality, tuning", "unit": "cents", "token": "--dim-e"},
171 {"id": "tempo", "name": "Tempo", "description": "BPM, rubato, tempo changes", "unit": "BPM", "token": "--dim-a"},
172 {"id": "time_sig", "name": "Meter", "description": "Time signature, metric modulation", "unit": "n/d", "token": "--dim-b"},
173 {"id": "phrase", "name": "Phrase", "description": "Phrase lengths, breath marks", "unit": "beats", "token": "--dim-c"},
174 {"id": "ornament", "name": "Ornament", "description": "Trills, mordents, turns, grace notes", "unit": "enum", "token": "--dim-d"},
175 {"id": "chord_type", "name": "Chord Type", "description": "Triad, seventh, extended, suspended", "unit": "type", "token": "--dim-e"},
176 {"id": "voice_lead", "name": "Voice Lead", "description": "Smoothness, parallel motion, voice crossing", "unit": "cents", "token": "--dim-a"},
177 {"id": "modulation", "name": "Modulation", "description": "Key change, pivot chord, tonicisation", "unit": "keys", "token": "--dim-b"},
178 {"id": "register", "name": "Register", "description": "Pitch range, tessitura", "unit": "MIDI range", "token": "--dim-c"},
179 {"id": "counterpoint","name": "Counterpoint","description": "Independence of voices, imitation, canon", "unit": "score", "token": "--dim-d"},
180 {"id": "form", "name": "Form", "description": "ABA, rondo, sonata, through-composed", "unit": "enum", "token": "--dim-e"},
181 {"id": "genre", "name": "Genre", "description": "Style tag: jazz, baroque, ambient, afrobeat", "unit": "tag", "token": "--dim-a"},
182 ],
183 "viewer_type": "piano_roll",
184 "merge_semantics": "ot",
185 "artifact_types": ["audio/midi", "audio/mpeg", "image/webp"],
186 "supported_commands": [
187 "muse piano-roll", "muse listen", "muse arrange",
188 "muse analyze", "muse groove-check", "muse export midi",
189 "muse export mp3", "muse diff --dim harmonic",
190 "muse render", "muse session start",
191 ],
192 "cli_help": "Muse MIDI domain — 21-dimensional musical state versioning",
193 }
194
195 _CODE_CAPS = {
196 "dimensions": [
197 {"id": "syntax", "name": "Syntax", "description": "AST structure, token diff", "unit": "nodes", "token": "--dim-a"},
198 {"id": "semantics", "name": "Semantics", "description": "Type correctness, binding, scope", "unit": "errors", "token": "--dim-b"},
199 {"id": "types", "name": "Types", "description": "Type signatures, generics, inference", "unit": "types", "token": "--dim-c"},
200 {"id": "tests", "name": "Tests", "description": "Test coverage, assertions, pass rate", "unit": "%", "token": "--dim-d"},
201 {"id": "docs", "name": "Docs", "description": "Docstring coverage, API docs", "unit": "%", "token": "--dim-e"},
202 {"id": "complexity", "name": "Complexity", "description": "Cyclomatic complexity, depth", "unit": "score", "token": "--dim-a"},
203 {"id": "deps", "name": "Deps", "description": "Import graph, dependency count", "unit": "edges", "token": "--dim-b"},
204 {"id": "security", "name": "Security", "description": "Vulnerability scan, CVE score", "unit": "CVE", "token": "--dim-c"},
205 {"id": "perf", "name": "Perf", "description": "Runtime benchmarks, big-O", "unit": "ms", "token": "--dim-d"},
206 {"id": "style", "name": "Style", "description": "Linter score, format compliance", "unit": "score", "token": "--dim-e"},
207 ],
208 "viewer_type": "symbol_graph",
209 "merge_semantics": "three_way",
210 "artifact_types": ["text/plain", "application/json", "text/x-python", "text/typescript", "text/x-rustsrc"],
211 "supported_commands": [
212 "muse diff --dim syntax", "muse diff --dim types",
213 "muse analyze --lang python", "muse graph",
214 "muse test-coverage", "muse lint",
215 "muse security-scan", "muse perf-profile",
216 ],
217 "supported_languages": [
218 "python", "typescript", "javascript", "rust", "go",
219 "java", "c", "cpp", "ruby", "swift",
220 ],
221 "cli_help": "Muse Code domain — multi-dimensional symbol-graph versioning",
222 }
223
224
225 # ---------------------------------------------------------------------------
226 # MIDI repo definitions
227 # ---------------------------------------------------------------------------
228
229 MIDI_REPOS: list[dict[str, Any]] = [
230 dict(
231 repo_id=REPO_V2_WTC, owner="bach", slug="wtc-prelude-v2",
232 owner_user_id=BACH, name="Well-Tempered Clavier — Prelude No.1",
233 visibility="public", star_count=142, fork_count=18, days_ago=400,
234 description=(
235 "Bach's Prelude No. 1 in C major from The Well-Tempered Clavier (BWV 846). "
236 "The definitive demonstration of equal temperament: 35 bars of pure arpeggiated harmony. "
237 "Full 4/4 at 72 BPM — rendered from Urtext (Public Domain)."
238 ),
239 tags=["genre:baroque", "key:C", "instrument:piano", "stage:released", "emotion:serene", "complexity:high"],
240 domain_meta={"key_signature": "C major", "tempo_bpm": 72, "time_signature": "4/4",
241 "composer": "J.S. Bach", "opus": "BWV 846", "period": "Baroque"},
242 midi_key="wtc_prelude_c",
243 midi_files=[("piano.mid", "wtc_prelude_c")],
244 ),
245 dict(
246 repo_id=REPO_V2_GYMNO, owner="satie", slug="gymnopedie-no1",
247 owner_user_id=SATIE, name="Gymnopédie No. 1",
248 visibility="public", star_count=238, fork_count=31, days_ago=380,
249 description=(
250 "Erik Satie's most iconic work. D major, 3/4 at 52 BPM — 'Lent et douloureux'. "
251 "Floating waltz chords in the left hand, a melody that seems to hover above time. "
252 "Two-track: Melody (RH) + Accompaniment (LH). Public Domain."
253 ),
254 tags=["genre:impressionism", "key:D", "instrument:piano", "stage:released",
255 "emotion:melancholic", "emotion:tender", "complexity:medium"],
256 domain_meta={"key_signature": "D major", "tempo_bpm": 52, "time_signature": "3/4",
257 "composer": "Erik Satie", "opus": "Gymnopédie No. 1", "period": "Impressionism"},
258 midi_key="gymnopedie_no1",
259 midi_files=[("melody.mid", "gymnopedie_no1"), ("accompaniment.mid", "gymnopedie_no1")],
260 ),
261 dict(
262 repo_id=REPO_V2_NOCTURNE, owner="chopin", slug="nocturne-op9-no2",
263 owner_user_id=CHOPIN, name="Nocturne Op. 9 No. 2 in Eb major",
264 visibility="public", star_count=195, fork_count=22, days_ago=350,
265 description=(
266 "Chopin's most celebrated nocturne. Eb major, 12/8 at 66 BPM. "
267 "Wide arpeggiated left hand over a singing cantabile melody. "
268 "Ornate RH rubato throughout. Two-track. Public Domain."
269 ),
270 tags=["genre:romantic", "key:Eb", "instrument:piano", "stage:released",
271 "emotion:tender", "emotion:melancholic", "complexity:high"],
272 domain_meta={"key_signature": "Eb major", "tempo_bpm": 66, "time_signature": "12/8",
273 "composer": "Frédéric Chopin", "opus": "Op. 9 No. 2", "period": "Romantic"},
274 midi_key="chopin_nocturne_op9",
275 midi_files=[("melody.mid", "chopin_nocturne_op9"), ("accompaniment.mid", "chopin_nocturne_op9")],
276 ),
277 dict(
278 repo_id=REPO_V2_MOONLIGHT, owner="beethoven", slug="moonlight-sonata-mvt1",
279 owner_user_id=BEETHOVEN, name="Moonlight Sonata — Mvt. I (Adagio)",
280 visibility="public", star_count=312, fork_count=45, days_ago=360,
281 description=(
282 "Beethoven's 'Moonlight' Sonata, Op. 27 No. 2, Mvt. I — Adagio sostenuto. "
283 "C# minor, 4/4 at 54 BPM. The iconic triplet arpeggios emerge from silence. "
284 "Melody floats above the churning accompaniment. Public Domain."
285 ),
286 tags=["genre:classical", "key:C# minor", "instrument:piano", "stage:released",
287 "emotion:melancholic", "emotion:mysterious", "complexity:high"],
288 domain_meta={"key_signature": "C# minor", "tempo_bpm": 54, "time_signature": "4/4",
289 "composer": "Ludwig van Beethoven", "opus": "Op. 27 No. 2", "period": "Classical"},
290 midi_key="moonlight_mvt1",
291 midi_files=[("melody.mid", "moonlight_mvt1"), ("triplet_arpeggio.mid", "moonlight_mvt1")],
292 ),
293 dict(
294 repo_id=REPO_V2_NEO_SOUL, owner="gabriel", slug="neo-soul-groove-v2",
295 owner_user_id=GABRIEL, name="Neo-Soul Groove in F# minor",
296 visibility="public", star_count=87, fork_count=11, days_ago=45,
297 description=(
298 "Original neo-soul composition in F# minor at 92 BPM. "
299 "Three-track: Rhodes comping with lush 9th voicings, syncopated bassline, "
300 "and a pocket-groove drum pattern. 16 bars. Domain-native original."
301 ),
302 tags=["genre:neo-soul", "key:F# minor", "instrument:rhodes", "instrument:bass",
303 "instrument:drums", "stage:draft", "emotion:tender", "complexity:medium"],
304 domain_meta={"key_signature": "F# minor", "tempo_bpm": 92, "time_signature": "4/4",
305 "composer": "gabriel", "tracks": 3},
306 midi_key="neo_soul",
307 midi_files=[("rhodes.mid", "neo_soul"), ("bass.mid", "neo_soul"), ("drums.mid", "neo_soul")],
308 ),
309 dict(
310 repo_id=REPO_V2_MODAL, owner="marcus", slug="modal-jazz-sketch",
311 owner_user_id=MARCUS, name="Modal Jazz Sketch in D Dorian",
312 visibility="public", star_count=63, fork_count=8, days_ago=30,
313 description=(
314 "Original modal jazz sketch in D Dorian at 120 BPM. "
315 "Piano shell voicings (3rd + 7th), walking bass, brushed snare. "
316 "12 bars. Classic Coltrane/Miles modal approach."
317 ),
318 tags=["genre:jazz", "key:D Dorian", "instrument:piano", "instrument:bass",
319 "instrument:drums", "stage:wip", "emotion:complex", "complexity:medium"],
320 domain_meta={"key_signature": "D Dorian", "tempo_bpm": 120, "time_signature": "4/4",
321 "composer": "marcus", "tracks": 3},
322 midi_key="modal_jazz",
323 midi_files=[("piano.mid", "modal_jazz"), ("bass.mid", "modal_jazz"), ("drums.mid", "modal_jazz")],
324 ),
325 dict(
326 repo_id=REPO_V2_AFRO, owner="aaliya", slug="afrobeat-pulse-g",
327 owner_user_id=AALIYA, name="Afrobeat Pulse in G major",
328 visibility="public", star_count=74, fork_count=9, days_ago=20,
329 description=(
330 "Original afrobeat groove in G major, 12/8 at 120 BPM. "
331 "Interlocking piano offbeats, anchored bass, and a djembe pattern "
332 "drawing from West African traditions. 8 bars, 3 tracks."
333 ),
334 tags=["genre:afrobeat", "key:G", "instrument:piano", "instrument:djembe",
335 "instrument:bass", "stage:draft", "emotion:energetic", "complexity:medium"],
336 domain_meta={"key_signature": "G major", "tempo_bpm": 120, "time_signature": "12/8",
337 "composer": "aaliya", "tracks": 3},
338 midi_key="afrobeat",
339 midi_files=[("piano.mid", "afrobeat"), ("bass.mid", "afrobeat"), ("djembe.mid", "afrobeat")],
340 ),
341 dict(
342 repo_id=REPO_V2_CHANSON, owner="pierre", slug="chanson-minimale-v2",
343 owner_user_id=PIERRE, name="Chanson Minimale in A major",
344 visibility="public", star_count=29, fork_count=3, days_ago=14,
345 description=(
346 "Original chanson minimale in A major, 3/4 at 52 BPM. "
347 "Waltz LH ostinato, folk-like RH melody. Solo piano, 48 bars. "
348 "Satie would approve."
349 ),
350 tags=["genre:chanson", "key:A", "instrument:piano", "stage:wip",
351 "emotion:tender", "complexity:low"],
352 domain_meta={"key_signature": "A major", "tempo_bpm": 52, "time_signature": "3/4",
353 "composer": "pierre", "tracks": 1},
354 midi_key="chanson",
355 midi_files=[("piano.mid", "chanson")],
356 ),
357 ]
358
359 # ---------------------------------------------------------------------------
360 # Code repo definitions — real GitHub repos, ported to the Muse code domain.
361 # Each entry includes a github_url; the seeder clones it, walks the full git
362 # DAG, and converts every commit + HEAD file tree to Muse objects/commits.
363 # max_commits caps the import for very large repos.
364 # ---------------------------------------------------------------------------
365
366 CODE_REPOS: list[dict[str, Any]] = [
367 dict(
368 repo_id=REPO_V2_MUSE,
369 owner="gabriel", slug="muse",
370 owner_user_id=GABRIEL,
371 name="Muse — domain-agnostic VCS engine",
372 visibility="public", star_count=214, fork_count=19, days_ago=180,
373 description=(
374 "A domain-agnostic version control system for multidimensional state. "
375 "Plugin architecture: snapshot, diff, merge, drift, apply, schema. "
376 "MIDI (21 dimensions) and Code (10 languages) are the reference implementations. "
377 "CRDT mode enables convergent multi-agent writes with no conflict state. "
378 "Ported from github.com/cgcardona/muse — full git history converted to Muse."
379 ),
380 tags=["python", "vcs", "domain-agnostic", "crdt", "midi", "code", "agents", "paradigm-shift"],
381 domain_meta={
382 "primary_language": "python", "license": "proprietary",
383 "test_framework": "pytest", "python_version": "3.14",
384 "source_repo": "github.com/cgcardona/muse",
385 },
386 github_url="https://github.com/cgcardona/muse",
387 max_commits=None, # import all ~198 commits
388 ),
389 dict(
390 repo_id=REPO_V2_AGENTCEPTION,
391 owner="gabriel", slug="agentception",
392 owner_user_id=GABRIEL,
393 name="AgentCeption — multi-agent orchestration",
394 visibility="public", star_count=312, fork_count=47, days_ago=240,
395 description=(
396 "Multi-agent orchestration system for AI-powered development workflows. "
397 "Brain dump → structured plan → GitHub issues → agent org tree → PRs → merged. "
398 "Each agent has a cognitive architecture (historical figures + archetypes + skill domains). "
399 "Supports Anthropic Claude and any Ollama-compatible local model. "
400 "Ported from github.com/cgcardona/agentception — first 250 commits of 1175."
401 ),
402 tags=["python", "typescript", "agents", "orchestration", "fastapi", "htmx", "ai", "llm"],
403 domain_meta={
404 "primary_language": "python", "secondary_language": "typescript",
405 "license": "MIT", "test_framework": "pytest",
406 "source_repo": "github.com/cgcardona/agentception",
407 },
408 github_url="https://github.com/cgcardona/agentception",
409 max_commits=250, # sample of the full 1175-commit history
410 ),
411 dict(
412 repo_id=REPO_V2_MUSEHUB_SRC,
413 owner="gabriel", slug="musehub",
414 owner_user_id=GABRIEL,
415 name="MuseHub — the platform (self-hosted)",
416 visibility="public", star_count=89, fork_count=12, days_ago=90,
417 description=(
418 "MuseHub itself — the domain-agnostic collaboration platform powered by Muse VCS. "
419 "FastAPI + SQLAlchemy + Jinja2 + HTMX + TypeScript. "
420 "Full MCP 2025-11-25 implementation: 32 tools, 20 resources, 8 prompts. "
421 "This very codebase, versioned on itself using the Muse code domain. "
422 "Ported from github.com/cgcardona/musehub — full git history."
423 ),
424 tags=["python", "typescript", "fastapi", "htmx", "mcp", "vcs", "platform", "meta"],
425 domain_meta={
426 "primary_language": "python", "secondary_language": "typescript",
427 "license": "proprietary", "test_framework": "pytest",
428 "source_repo": "github.com/cgcardona/musehub",
429 },
430 github_url="https://github.com/cgcardona/musehub",
431 max_commits=None, # import all ~37 commits
432 ),
433 ]
434
435 # ---------------------------------------------------------------------------
436 # Commit message templates
437 # ---------------------------------------------------------------------------
438
439 _MIDI_COMMITS: dict[str, list[tuple[str, str]]] = {
440 REPO_V2_WTC: [
441 ("init: Bach WTC Prelude No. 1 in C major — 4/4 at 72 BPM", "gabriel"),
442 ("feat(harmony): arpeggiated C major — 16 sixteenth notes per bar", "gabriel"),
443 ("feat(bars-1-8): complete first 8 bars — chord progression established", "gabriel"),
444 ("feat(bars-9-16): Dm7 and G7 approaches — circle-of-5ths descent", "sofia"),
445 ("feat(bars-17-24): secondary dominant seventh chain", "gabriel"),
446 ("refactor(velocity): humanize 16th note accents — beat 1 +12vel", "gabriel"),
447 ("feat(bars-25-35): climactic progression and final C major resolution", "chen"),
448 ("fix(bar-3): correct Bdim7 chord voicing — was missing the B1 bass", "gabriel"),
449 ("feat(pedal): add sustain pedal changes every 2 bars for resonance", "sofia"),
450 ("refactor(tempo): adjust to 72 BPM from 80 — more stately", "pierre"),
451 ("feat(dynamics): pp opening, gradual mf crescendo to bar 20", "gabriel"),
452 ("feat(ornamentation): add grace notes to inner voice bar 14", "sofia"),
453 ("fix(bar-24): V7 resolution voice-leading — remove parallel octaves", "gabriel"),
454 ("refactor(phrasing): 4-bar groupings marked with diminuendo signs", "chen"),
455 ("feat(tag): v1.0 — canonical WTC Prelude release", "gabriel"),
456 ("feat(transposition): add Ab major variant — bars 1-12 only", "sofia"),
457 ("fix(ab-variant): correct treble clef note B♭4→A♭4 in bar 3", "gabriel"),
458 ("feat(analysis): add harmonic Roman-numeral annotations in metadata", "yuki"),
459 ("refactor(notation): normalize all note durations to nearest 1/64", "gabriel"),
460 ("feat(v2): Book I Prelude — second encoding pass from Bärenreiter", "pierre"),
461 ("fix(v2): correct bar 23 chord — was Am/C, should be F/C", "gabriel"),
462 ("feat(visualization): add metadata for piano roll colour-by-dimension", "gabriel"),
463 ("refactor(merge): incorporate sofia's dynamics into main branch", "gabriel"),
464 ("fix(ledger-lines): correct MIDI pitch for bass voice in bars 29-30", "chen"),
465 ("feat(release-v2): final reviewed version — bar 35 fermata included", "gabriel"),
466 ],
467 REPO_V2_GYMNO: [
468 ("init: Gymnopédie No. 1 — D major, 3/4, 52 BPM (Lent et douloureux)", "pierre"),
469 ("feat(lh): waltz chord pattern — D major root / G major on beat 2-3", "pierre"),
470 ("feat(rh): opening melody — pickup A4 into bar 2 E♭4 descent", "sofia"),
471 ("feat(bars-1-8): first phrase — gentle rise and fall on A4", "pierre"),
472 ("refactor(velocity): reduce chord loudness — melody must sing above", "pierre"),
473 ("feat(bars-9-16): second phrase — higher register, slight variation", "sofia"),
474 ("feat(bars-17-24): development phrase — mode inflection on bar 21", "pierre"),
475 ("fix(bar-5): correct LH — G major chord, was seeding Gm", "pierre"),
476 ("feat(bars-25-32): restatement — pppp, dissolving into silence", "pierre"),
477 ("refactor(articulation): add tenuto marks on RH long notes", "pierre"),
478 ("feat(arpeggio-roll): LH chord rolls — 8ms stagger between voices", "sofia"),
479 ("fix(bar-20): melody note — F#4 was missing in RH track", "pierre"),
480 ("feat(pedal): una corda from bar 25 onward — ppp texture", "pierre"),
481 ("refactor(dynamics): mark mm.1-8 piano, mm.9-24 mp, mm.25-32 ppp", "sofia"),
482 ("feat(v1.0): first complete version — 32 bars fully rendered", "pierre"),
483 ("feat(rubato-marks): add tempo flexibility annotations to metadata", "gabriel"),
484 ("feat(orchestration): add strings variant — melody doubled by flute", "chen"),
485 ("fix(strings-var): correct flute range — moved octave up from bar 12", "chen"),
486 ("feat(orchestra): add harp arpeggio in strings arrangement", "sofia"),
487 ("refactor(strings): balance strings against piano in arrangement", "pierre"),
488 ("feat(tag-v2): v2.0 — orchestrated version complete", "pierre"),
489 ],
490 REPO_V2_NOCTURNE: [
491 ("init: Nocturne Op. 9 No. 2 — Eb major, 12/8, 66 BPM", "aaliya"),
492 ("feat(lh): arpeggiated Eb major chord — 8 notes per beat", "aaliya"),
493 ("feat(rh): iconic opening melody — bars 1-4 established", "aaliya"),
494 ("feat(lh-bass): wide-span bass — Eb2 to upper chord tones", "gabriel"),
495 ("feat(bars-5-8): ornate RH variations — turns and mordents added", "aaliya"),
496 ("refactor(lh-roll): humanize chord roll — random 5-15ms stagger", "aaliya"),
497 ("feat(bars-9-12): closing statement — pppp fade", "aaliya"),
498 ("fix(bar-6): melody note D♭ was missing F at top of chord", "gabriel"),
499 ("feat(dynamics): pp opening, cresc to mf at bar 5 climax", "aaliya"),
500 ("refactor(tempo): ritardando in last 2 bars — molto rubato", "pierre"),
501 ("feat(ornamentation): trills on bar 2 beat 3 — 6-note trill", "sofia"),
502 ("fix(ornament): trill starts on upper note per Chopin convention", "aaliya"),
503 ("feat(pedal): broad sustain throughout — clearing only at LH change", "aaliya"),
504 ("feat(tag-v1): Op. 9 No. 2 — v1.0 complete", "gabriel"),
505 ("feat(extended): extend to Op. 9 No. 1 — Bb minor added", "aaliya"),
506 ("fix(ext): Bb minor LH voicing corrected — was Bb3 not Bb2", "aaliya"),
507 ("refactor(extended): unify dynamics across both nocturnes", "sofia"),
508 ("feat(tag-v2): Nocturnes v2.0 — two nocturnes complete", "aaliya"),
509 ],
510 REPO_V2_MOONLIGHT: [
511 ("init: Moonlight Sonata Mvt. I — C# minor, 4/4, 54 BPM (Adagio)", "sofia"),
512 ("feat(lh): C# minor triplet arpeggio — 12 triplet 8ths per bar", "gabriel"),
513 ("feat(rh): bare RH entry on bar 4 — long sustained Cs4+12", "sofia"),
514 ("feat(bars-1-4): atmospheric opening — arpeggio only, no melody", "gabriel"),
515 ("feat(bars-5-8): melody enters — F# minor region", "sofia"),
516 ("refactor(lh): even triplet spacing — 1/3 beat per note", "gabriel"),
517 ("feat(bars-9-12): A major colouring — shift to relative brightness", "sofia"),
518 ("feat(bars-13-16): final resolution back to C# minor", "gabriel"),
519 ("fix(bar-7): LH pitch — C#dim7 was missing the G#", "sofia"),
520 ("feat(dynamics): ppp opening, gradual swell to mf at bar 9", "gabriel"),
521 ("feat(pedal): sostenuto indication throughout — Beethoven instruction", "pierre"),
522 ("refactor(tempo): Adagio quarter = 54 confirmed — no rallentando", "sofia"),
523 ("fix(bar-13): RH Cs4+12 held note — was cut short by 1 beat", "gabriel"),
524 ("feat(ornament): accent marks on triplet beat 1 of each bar", "sofia"),
525 ("feat(tag-v1): Moonlight Mvt. I — v1.0 complete (16 bars)", "gabriel"),
526 ("feat(mvt2): Mvt. II (Allegretto) — 8 bars stub added", "sofia"),
527 ("feat(mvt2): Mvt. II melody — Ab major, graceful dance character", "sofia"),
528 ("fix(mvt2): Allegretto tempo marker — set to 100 BPM not 60", "gabriel"),
529 ("feat(tag-v2): v2.0 — two movements complete", "sofia"),
530 ("refactor(unified): merge Mvt. I and II into single multi-track file", "gabriel"),
531 ],
532 REPO_V2_NEO_SOUL: [
533 ("init: Neo-Soul Groove in F# minor — 92 BPM, 4/4", "gabriel"),
534 ("feat(rhodes): F# minor 9 shell voicings — rootless A-C#-E-G#", "gabriel"),
535 ("feat(bass): syncopated bassline — root-to-5th-to-octave movement", "gabriel"),
536 ("feat(drums): kick-snare-hat pocket groove — ghost notes on snare", "gabriel"),
537 ("feat(comp): offbeat chord stabs — beats 2.5 and 3.75", "marcus"),
538 ("refactor(rhodes): velocity humanization — ±12 vel on beat 1", "gabriel"),
539 ("feat(bars-5-8): harmonic shift — A major colour for 2 bars", "gabriel"),
540 ("feat(bass-fill): 16th note fill into bar 9 — chromatic approach", "marcus"),
541 ("fix(drums): hi-hat pattern — was double-hitting on beat 3.5", "gabriel"),
542 ("feat(dynamics): verse pp → chorus mf build over 4 bars", "gabriel"),
543 ("feat(bars-9-12): D major escape chord — neo-soul characteristic", "marcus"),
544 ("refactor(bass): add sub-bass doublings on root notes", "gabriel"),
545 ("feat(bars-13-16): return to F# minor — resolution and outro", "gabriel"),
546 ("fix(outro): rhodes chord on bar 15 — was E minor, corrected to Cs", "gabriel"),
547 ("feat(v1): 16-bar loop complete — groove locked", "gabriel"),
548 ],
549 REPO_V2_MODAL: [
550 ("init: Modal Jazz in D Dorian — 120 BPM, shell voicings", "marcus"),
551 ("feat(piano): Dm7 shell — F + C as 3rd and 7th, no root", "marcus"),
552 ("feat(bass): walking bass — D2-E2-F2-G2 ascending Dorian", "marcus"),
553 ("feat(drums): brushed snare ride pattern — jazz feel", "marcus"),
554 ("feat(comp): reharmonize bar 3 — Gm7sus for colour", "gabriel"),
555 ("refactor(piano): vary voicings each 4 bars — avoid static texture", "marcus"),
556 ("feat(bars-5-8): add Am7 colour — vi degree of Dorian", "marcus"),
557 ("fix(bass): bar 6 walking line was chromatically wrong — corrected", "marcus"),
558 ("feat(bars-9-12): climax — higher register piano voicings", "gabriel"),
559 ("refactor(drums): add ride cymbal pings on beats 2 and 4", "marcus"),
560 ("fix(piano): bar 11 top voice — Bb was out of Dorian mode", "marcus"),
561 ("feat(v1): 12-bar modal sketch complete", "gabriel"),
562 ],
563 REPO_V2_AFRO: [
564 ("init: Afrobeat Pulse in G major — 12/8, 120 BPM", "aaliya"),
565 ("feat(piano): offbeat chord stabs on .5 of each dotted-quarter beat", "aaliya"),
566 ("feat(bass): one-drop bass — G2 on beat 1, D3 on beat 1.5", "fatou"),
567 ("feat(djembe): bass tone-slap-tone-slap West African pattern", "fatou"),
568 ("refactor(piano): interlocking — must NOT clash with bass rhythm", "aaliya"),
569 ("feat(bars-3-4): add Ab colour chord — Lagos jazz inflection", "aaliya"),
570 ("fix(djembe): slap note velocity — was too quiet at 40, boosted to 80", "fatou"),
571 ("feat(bars-5-8): call-and-response between piano and djembe", "aaliya"),
572 ("refactor(bass): add octave jump at bar 5 — huge groove moment", "fatou"),
573 ("fix(piano): bar 6 chord — F# was wrong note for G Dorian context", "aaliya"),
574 ("feat(v1): 8-bar loop ready for extension", "gabriel"),
575 ],
576 REPO_V2_CHANSON: [
577 ("init: Chanson Minimale — A major, 3/4, 52 BPM", "pierre"),
578 ("feat(lh): waltz ostinato — A2-E3-A3 arpeggiated pattern", "pierre"),
579 ("feat(rh): folk melody opening — E4 to Cs4 descent", "pierre"),
580 ("feat(bars-1-8): first verse phrase — simple, direct", "pierre"),
581 ("refactor(melody): add passing tones — Cs4→B3 smooth leading", "pierre"),
582 ("feat(bars-9-16): development — reach up to A4 climax", "pierre"),
583 ("fix(bar-12): RH note was Bb4 — should be A4 in A major", "pierre"),
584 ("feat(bars-17-24): secondary phrase — rhythmic augmentation", "sofia"),
585 ("feat(bars-25-32): return to opening — now marked ppp", "pierre"),
586 ("refactor(lh): reduce LH velocity in bars 25-32 to pppp", "pierre"),
587 ("feat(coda): sustained A3 chord — six bars, fade to silence", "pierre"),
588 ("fix(coda): chord sustain was 2 bars — extended to 6 per intent", "pierre"),
589 ("feat(v1): solo piano version complete — 48 bars", "pierre"),
590 ],
591 }
592
593 # ---------------------------------------------------------------------------
594 # Issue templates per repo
595 # ---------------------------------------------------------------------------
596
597 _ISSUES: dict[str, list[dict[str, Any]]] = {
598 REPO_V2_WTC: [
599 {"title": "Add Book II preludes (BWV 870-893)", "body": "Currently only Book I is encoded. Book II should be added systematically.", "state": "open", "labels": ["enhancement"]},
600 {"title": "Incorrect trill in Prelude No. 7 bar 3", "body": "The ornament should be a mordent (lower), not a trill starting from above.", "state": "closed", "labels": ["bug"]},
601 {"title": "Add MIDI velocity curve analysis", "body": "The dynamic profile across 35 bars could be visualized as a dimension plot.", "state": "open", "labels": ["enhancement", "analysis"]},
602 {"title": "Export to MusicXML format", "body": "Some users need MusicXML for notation software import.", "state": "open", "labels": ["feature-request"]},
603 {"title": "Parallel octaves in Fugue No. 5 bar 14", "body": "Voice-leading violation in the subject entry needs correction.", "state": "closed", "labels": ["bug"]},
604 ],
605 REPO_V2_GYMNO: [
606 {"title": "Add Gymnopédie No. 2 and No. 3", "body": "Complete the set — No. 2 in C major and No. 3 in G major.", "state": "open", "labels": ["enhancement"]},
607 {"title": "Slow the tempo slightly — 48 BPM feels more authentic", "body": "Period recordings by Reinbert de Leeuw suggest 48-50 BPM.", "state": "open", "labels": ["discussion"]},
608 {"title": "Staccato on LH beat 1 breaking waltz feel", "body": "Beat 1 should be legatissimo, not staccato — reverting.", "state": "closed", "labels": ["bug"]},
609 {"title": "Add orchestral arrangement by Debussy", "body": "Debussy famously orchestrated Gymnopédie 1 and 3 — add as variant.", "state": "open", "labels": ["enhancement"]},
610 ],
611 REPO_V2_NEO_SOUL: [
612 {"title": "Extend to 32 bars with bridge section", "body": "The groove needs a contrasting B-section — consider D major area.", "state": "open", "labels": ["enhancement"]},
613 {"title": "Add percussion variations — hi-hat pattern too repetitive", "body": "Bar 4 and 8 could have open hi-hat fills for variety.", "state": "open", "labels": ["enhancement"]},
614 {"title": "Rhodes voicing clashes with bass on bar 9", "body": "The A3 in the chord conflicts with the A2 bass note — revoice.", "state": "closed", "labels": ["bug"]},
615 {"title": "Export stems as individual tracks", "body": "Need separate MIDI files per instrument for DAW import.", "state": "open", "labels": ["feature-request"]},
616 {"title": "BPM feels rushed — try 88 BPM", "body": "Classic neo-soul sits at 85-90 BPM. 92 is slightly fast for this groove.", "state": "open", "labels": ["discussion"]},
617 ],
618 REPO_V2_MUSE: [
619 {"title": "Add WebAssembly domain plugin", "body": "A Wasm plugin would let browser-native state versioning work without a server round-trip. The plugin interface is clean enough to support this.", "state": "open", "labels": ["enhancement", "performance"]},
620 {"title": "MIDI merge conflict on simultaneous pitch-bend edits in same track", "body": "When two branches each modify pitch_bend in the same bar, the three-way merge should use the OT delta algebra, but currently falls back to 'ours'. Steps to reproduce: muse checkout -b a; edit pitch bend bar 4; muse checkout main; edit pitch bend bar 4 differently; muse merge a.", "state": "open", "labels": ["bug"]},
621 {"title": "muse log --graph rendering breaks at 100+ commits", "body": "The ASCII graph renderer overflows the terminal at wide histories. This is a known issue in the graph module.", "state": "closed", "labels": ["bug"]},
622 {"title": "CRDT join is O(n²) for large note sequences", "body": "RGA merge in _crdt_notes.py iterates the full tombstone list for every insert. Should switch to a skip-list or B-tree for the tombstone index.", "state": "open", "labels": ["performance"]},
623 {"title": "Add support for ABC music notation format as a third domain", "body": "ABC notation is widely used in folk music. A domain plugin would let folk musicians use Muse.", "state": "open", "labels": ["feature-request"]},
624 {"title": "muse revert fails when parent commit has a CRDT merge", "body": "Reverting a commit that was produced by a CRDT join panics in the merge engine because the inverse delta cannot be computed for tombstoned notes.", "state": "closed", "labels": ["bug"]},
625 {"title": "Document the MuseDomainPlugin.schema() return type", "body": "The DomainSchema TypedDict is not fully documented — what keys are required vs optional? The plugin authoring guide is incomplete here.", "state": "open", "labels": ["docs"]},
626 ],
627 REPO_V2_AGENTCEPTION: [
628 {"title": "Agent tree hangs when coordinator returns an empty plan", "body": "If the coordinator agent's LLM response parses to an empty PlanSpec (no phases), the orchestration loop spins indefinitely waiting for work items that never arrive.", "state": "closed", "labels": ["bug"]},
629 {"title": "Mission Control board doesn't update when PR is merged outside AgentCeption", "body": "If a PR is merged manually on GitHub (not via the MC merge button), the issue card stays in PR_OPEN state indefinitely until the next polling cycle (5 min).", "state": "open", "labels": ["bug"]},
630 {"title": "Add support for Claude claude-opus-4-6 as coordinator model", "body": "claude-opus-4-6 has a much larger context window which helps coordinators reason about full codebases. Should be selectable per-node in the org designer.", "state": "open", "labels": ["enhancement"]},
631 {"title": "Local LLM connection drops after 30 minutes of inactivity", "body": "When using LOCAL_LLM_PROVIDER=ollama, long-running worker agents lose their HTTP connection after idle timeout. Should retry with exponential backoff.", "state": "open", "labels": ["bug", "local-llm"]},
632 {"title": "Worker agents don't respect .gitattributes merge strategy", "body": "Workers create PRs that sometimes contain merge conflicts in files marked with 'merge=ours' in .gitattributes. The worktree setup doesn't configure the merge driver.", "state": "closed", "labels": ["bug"]},
633 {"title": "Add Muse VCS as an alternative backend to Git worktrees", "body": "Since AgentCeption and Muse are from the same author, it would be a natural fit to let agents commit to a Muse repo instead of a Git worktree. Would unlock multi-domain agent collaboration.", "state": "open", "labels": ["feature-request", "muse-integration"]},
634 {"title": "Cognitive architecture presets not persisted across container restarts", "body": "Custom org presets saved in the UI are stored in memory only. They should be written to the org-presets.yaml file or to the DB.", "state": "closed", "labels": ["bug"]},
635 ],
636 REPO_V2_MUSEHUB_SRC: [
637 {"title": "Piano roll doesn't render MIDI files larger than 500KB", "body": "The /parse-midi endpoint times out on dense orchestral MIDI. Need streaming parse or a size-gated fast path.", "state": "open", "labels": ["bug", "performance"]},
638 {"title": "MCP resource musehub://trending returns 500 when no repos are starred", "body": "The trending query performs ORDER BY star_count DESC — if the result set is empty, the serializer raises AttributeError on a None row.", "state": "closed", "labels": ["bug"]},
639 {"title": "Domain install count doesn't decrement on uninstall", "body": "Calling DELETE /domains/{id}/install correctly removes the MusehubDomainInstall row but never decrements install_count on the domain row.", "state": "open", "labels": ["bug"]},
640 {"title": "Code domain symbol graph viewer not yet implemented", "body": "The @cgcardona/code domain declares viewer_type='symbol_graph' but the frontend only has piano_roll.html. Need a symbol_graph.html template + TypeScript renderer.", "state": "open", "labels": ["enhancement", "frontend"]},
641 {"title": "Search results don't include repo descriptions in the match", "body": "Full-text search only indexes repo name and tags. Descriptions should be included in the tsvector so searching 'multidimensional' finds relevant repos.", "state": "open", "labels": ["enhancement"]},
642 {"title": "elicitation tools silently fail when Mcp-Session-Id is missing", "body": "Interactive tools (compose planner, PR review) call ctx.elicit() which raises if no session exists, but the error is swallowed and the tool returns an empty response.", "state": "closed", "labels": ["bug"]},
643 ],
644 }
645
646 # ---------------------------------------------------------------------------
647 # Git → Muse importer
648 # ---------------------------------------------------------------------------
649
650 # Source-code file extensions we store as Muse objects. Lock files, generated
651 # output, and binaries are excluded.
652 _CODE_INCLUDE_EXT = {
653 ".py", ".pyi", ".ts", ".tsx", ".js", ".jsx", ".mjs",
654 ".rs", ".go", ".rb", ".java", ".c", ".cpp", ".h", ".hpp", ".cs",
655 ".kt", ".swift", ".sh", ".bash", ".zsh",
656 ".md", ".rst", ".txt",
657 ".toml", ".yaml", ".yml", ".json", ".ini", ".cfg", ".env.example",
658 ".html", ".css", ".scss", ".sql",
659 }
660
661 _CODE_EXCLUDE_NAMES = {
662 "package-lock.json", "poetry.lock", "Cargo.lock", "yarn.lock",
663 "pnpm-lock.yaml", "Pipfile.lock", "composer.lock",
664 }
665
666 _CODE_EXCLUDE_DIRS = {
667 "node_modules", "__pycache__", ".git", "dist", "build",
668 ".venv", "venv", ".mypy_cache", ".pytest_cache", ".ruff_cache",
669 "target", # Rust build artefacts
670 }
671
672
673 def _git_code_filter(path: str) -> bool:
674 """Return True if this repo path should be stored as a Muse object."""
675 p = Path(path)
676 if p.name in _CODE_EXCLUDE_NAMES:
677 return False
678 if any(part in _CODE_EXCLUDE_DIRS for part in p.parts):
679 return False
680 return p.suffix in _CODE_INCLUDE_EXT
681
682
683 async def _import_github_repo(
684 db: AsyncSession,
685 r: dict[str, Any],
686 domain_id: str,
687 ) -> tuple[int, int]:
688 """Clone a GitHub repo, walk its full git DAG, and import into Muse.
689
690 For each git commit we create one MusehubCommit, preserving the real
691 message, author name, timestamp, and parent graph. HEAD file contents
692 are written to disk and linked as MusehubObjects.
693
694 Returns (commits_inserted, objects_inserted).
695 """
696 repo_id = r["repo_id"]
697 github_url = r["github_url"]
698 max_commits: int | None = r.get("max_commits")
699
700 print(f" 🌐 Cloning {github_url}…", flush=True)
701
702 with tempfile.TemporaryDirectory() as tmpdir:
703 repo_path = Path(tmpdir) / "repo"
704
705 # Shallow-ish clone for big repos, full for small ones
706 depth_args = ["--depth", str(max_commits + 50)] if max_commits else []
707 clone = subprocess.run(
708 ["git", "clone", "--quiet", "--no-single-branch"] + depth_args + [github_url, str(repo_path)],
709 capture_output=True, text=True, timeout=180,
710 )
711 if clone.returncode != 0:
712 print(f" ⚠️ Clone failed ({clone.stderr[:120].strip()}) — skipping {r['slug']}")
713 return 0, 0
714
715 def _git(*args: str) -> str:
716 return subprocess.run(
717 ["git"] + list(args),
718 cwd=repo_path, capture_output=True, text=True, check=True,
719 ).stdout.strip()
720
721 # ── Commit log (oldest → newest) ────────────────────────────────────
722 # %x1f = ASCII unit-separator — safe delimiter inside commit messages
723 raw_log = _git("log", "--format=%H\x1f%an\x1f%at\x1f%s", "--all", "--reverse")
724 commits_raw = [l for l in raw_log.split("\n") if l.strip()]
725 if max_commits:
726 commits_raw = commits_raw[:max_commits]
727
728 # ── Parent map ───────────────────────────────────────────────────────
729 parent_lines = _git("log", "--format=%H %P", "--all", "--reverse").split("\n")
730 parent_map: dict[str, list[str]] = {}
731 for line in parent_lines:
732 parts = line.strip().split()
733 if parts:
734 parent_map[parts[0]] = parts[1:] if len(parts) > 1 else []
735
736 # ── Branch names + commit-to-branch map ─────────────────────────────
737 # Fetch both local and remote-tracking branches after a --no-single-branch clone.
738 branches_raw = _git("branch", "-a", "--format=%(refname:short)").split("\n")
739 branch_names_raw = [b.strip() for b in branches_raw if b.strip()]
740 # Normalise remote-tracking refs (origin/foo → foo), drop HEAD pointers
741 branch_names_set: set[str] = set()
742 for b in branch_names_raw:
743 if "HEAD" in b:
744 continue
745 b = b.removeprefix("origin/").removeprefix("remotes/origin/")
746 if b:
747 branch_names_set.add(b)
748 branch_names = list(branch_names_set) or ["main"]
749
750 try:
751 head_branch = _git("rev-parse", "--abbrev-ref", "HEAD") or "main"
752 except Exception:
753 head_branch = branch_names[0]
754
755 # Build a map: git_hash → branch name.
756 # For each branch, walk "git log <branch> --not <other_branches...>" to find
757 # commits exclusive to that branch; shared (merge-base) commits fall back to
758 # head_branch so every commit gets exactly one label.
759 print(f" 🔀 Mapping {len(branch_names)} branch(es)…", flush=True)
760 commit_branch_map: dict[str, str] = {}
761 for bname in branch_names:
762 try:
763 other_excludes = [f"^{ob}" for ob in branch_names if ob != bname]
764 branch_log = _git(
765 "log", f"origin/{bname}", "--format=%H",
766 *other_excludes,
767 )
768 for gh in branch_log.split("\n"):
769 gh = gh.strip()
770 if gh and gh not in commit_branch_map:
771 commit_branch_map[gh] = bname
772 except Exception:
773 pass # branch may not have a remote counterpart — fall through
774
775 # ── File objects — store HEAD files for main + all branch tips ───────
776 # We store the main HEAD plus unique files from each branch tip so that
777 # the diff viewer can show code for any branch.
778 print(f" 📁 Storing files (HEAD + branch tips)…", flush=True)
779 objects_count = 0
780 refs_to_store = ["HEAD"] + [f"origin/{b}" for b in branch_names if b != head_branch]
781
782 async def _store_files_at_ref(ref: str) -> int:
783 stored = 0
784 try:
785 file_list = _git("ls-tree", "-r", "--name-only", ref).split("\n")
786 except Exception:
787 return 0
788 for fpath in file_list:
789 fpath = fpath.strip()
790 if not fpath or not _git_code_filter(fpath):
791 continue
792 try:
793 blob = subprocess.run(
794 ["git", "show", f"{ref}:{fpath}"],
795 cwd=repo_path, capture_output=True, timeout=10,
796 )
797 if blob.returncode != 0:
798 continue
799 content_bytes = blob.stdout
800 if len(content_bytes) > 300_000:
801 continue
802
803 obj_id = "sha256:" + hashlib.sha256(content_bytes).hexdigest()
804 ext = Path(fpath).suffix or ".txt"
805 dest = _objects_dir() / f"{obj_id.replace(':', '_')}{ext}"
806 if not dest.exists():
807 dest.write_bytes(content_bytes)
808
809 await db.execute(
810 text("""
811 INSERT INTO musehub_objects
812 (object_id, repo_id, path, size_bytes, disk_path, created_at)
813 VALUES (:oid, :rid, :path, :size, :dpath, now())
814 ON CONFLICT (object_id) DO NOTHING
815 """),
816 {
817 "oid": obj_id, "rid": repo_id,
818 "path": fpath,
819 "size": len(content_bytes),
820 "dpath": str(dest),
821 },
822 )
823 stored += 1
824 except Exception:
825 pass
826 return stored
827
828 for ref in refs_to_store:
829 objects_count += await _store_files_at_ref(ref)
830
831 # ── Commit import ────────────────────────────────────────────────────
832 print(f" 📜 Importing {len(commits_raw)} commits…", flush=True)
833 git_to_muse: dict[str, str] = {}
834 commits_count = 0
835 last_commit_id: str | None = None
836
837 for line in commits_raw:
838 parts = line.split("\x1f", 3)
839 if len(parts) < 4:
840 continue
841 git_hash, author_name, timestamp_str, subject = parts
842
843 try:
844 ts = datetime.fromtimestamp(int(timestamp_str), tz=UTC)
845 except (ValueError, OverflowError, OSError):
846 ts = datetime.now(tz=UTC)
847
848 parent_git = parent_map.get(git_hash, [])
849 parent_muse = [git_to_muse[p] for p in parent_git if p in git_to_muse]
850
851 muse_cid = _sha(f"git-import-{repo_id}-{git_hash}")
852 git_to_muse[git_hash] = muse_cid
853
854 # Use the branch we mapped for this commit; fall back to head_branch
855 commit_branch = commit_branch_map.get(git_hash, head_branch)
856
857 await db.execute(
858 text("""
859 INSERT INTO musehub_commits
860 (commit_id, repo_id, branch, parent_ids, message, author, timestamp, snapshot_id)
861 VALUES (:cid, :rid, :branch, :parents, :msg, :author, :ts, :snap)
862 ON CONFLICT (commit_id) DO NOTHING
863 """),
864 {
865 "cid": muse_cid,
866 "rid": repo_id,
867 "branch": commit_branch,
868 "parents": json.dumps(parent_muse),
869 "msg": subject[:1000],
870 "author": author_name[:120],
871 "ts": ts,
872 "snap": _sha(f"snap-git-{repo_id}-{git_hash}"),
873 },
874 )
875 last_commit_id = muse_cid
876 commits_count += 1
877
878 # ── Branches ─────────────────────────────────────────────────────────
879 for bname in branch_names:
880 try:
881 branch_head_git = _git("rev-parse", bname)
882 branch_head_muse = git_to_muse.get(branch_head_git, last_commit_id)
883 except Exception:
884 branch_head_muse = last_commit_id
885
886 if branch_head_muse:
887 await db.execute(
888 text("""
889 INSERT INTO musehub_branches
890 (branch_id, repo_id, name, head_commit_id)
891 VALUES (:bid, :rid, :name, :hcid)
892 ON CONFLICT (branch_id) DO UPDATE
893 SET head_commit_id = EXCLUDED.head_commit_id
894 """),
895 {
896 "bid": _uid(f"branch-git-{repo_id}-{bname}"),
897 "rid": repo_id,
898 "name": bname,
899 "hcid": branch_head_muse,
900 },
901 )
902
903 print(
904 f" ✅ {commits_count} commits, {objects_count} objects "
905 f"({len(branch_names)} branch{'es' if len(branch_names) != 1 else ''})"
906 )
907 return commits_count, objects_count
908
909
910 # ---------------------------------------------------------------------------
911 # Disk storage helpers
912 # ---------------------------------------------------------------------------
913
914 def _objects_dir() -> Path:
915 d = Path(getattr(settings, "musehub_objects_dir", "/data/objects"))
916 d.mkdir(parents=True, exist_ok=True)
917 return d
918
919
920 def _write_midi_object(midi_bytes: bytes, repo_id: str, fname: str) -> tuple[str, Path]:
921 """Write MIDI bytes to disk. Returns (object_id, disk_path)."""
922 obj_id = "sha256:" + hashlib.sha256(midi_bytes).hexdigest()
923 dest = _objects_dir() / f"{obj_id.replace(':', '_')}.mid"
924 if not dest.exists():
925 dest.write_bytes(midi_bytes)
926 return obj_id, dest
927
928
929 def _write_code_object(content: str, repo_id: str, fname: str) -> tuple[str, Path]:
930 """Write source code text to disk. Returns (object_id, disk_path)."""
931 content_bytes = content.encode("utf-8")
932 obj_id = "sha256:" + hashlib.sha256(content_bytes).hexdigest()
933 ext = Path(fname).suffix or ".txt"
934 dest = _objects_dir() / f"{obj_id.replace(':', '_')}{ext}"
935 if not dest.exists():
936 dest.write_bytes(content_bytes)
937 return obj_id, dest
938
939
940 # ---------------------------------------------------------------------------
941 # Main seed function
942 # ---------------------------------------------------------------------------
943
944 async def _table_exists(db: AsyncSession, table: str) -> bool:
945 """Return True if the given table exists in the public schema."""
946 result = await db.execute(
947 text(
948 "SELECT 1 FROM information_schema.tables "
949 "WHERE table_schema = 'public' AND table_name = :t"
950 ),
951 {"t": table},
952 )
953 return result.fetchone() is not None
954
955
956 async def _safe_delete(db: AsyncSession, table: str, where_sql: str, params: dict) -> None:
957 """Delete rows only if the table exists — silently skips if it does not."""
958 if await _table_exists(db, table):
959 await db.execute(text(f"DELETE FROM {table} WHERE {where_sql}"), params)
960
961
962 async def seed(db: AsyncSession) -> None:
963 print("\n🌱 MuseHub V2 seed — domain-agnostic multi-dimensional showcase")
964 print("=" * 60)
965
966 # ── 0. Pre-flight: verify the V2 migration has been applied ───────────
967 if not await _table_exists(db, "musehub_domains"):
968 print()
969 print(" ❌ ERROR: musehub_domains table does not exist.")
970 print(" Run the V2 Alembic migration first:")
971 print()
972 print(" docker compose exec musehub alembic upgrade head")
973 print()
974 print(" Then re-run this script.")
975 print()
976 raise SystemExit(1)
977
978 # ── 1. Wipe V2 data if --force ────────────────────────────────────────
979 if FORCE:
980 print(" ⚠️ --force: wiping V2 rows…")
981 for repo_id in ([r["repo_id"] for r in MIDI_REPOS] +
982 [r["repo_id"] for r in CODE_REPOS]):
983 await _safe_delete(db, "musehub_repos", "repo_id = :rid", {"rid": repo_id})
984 # Delete by stable author/slug rather than potentially-mismatched IDs
985 await _safe_delete(
986 db, "musehub_domain_installs",
987 "domain_id IN (SELECT domain_id FROM musehub_domains "
988 "WHERE author_slug='cgcardona' AND slug IN ('midi','code'))",
989 {},
990 )
991 await _safe_delete(
992 db, "musehub_domains",
993 "author_slug='cgcardona' AND slug IN ('midi','code')",
994 {},
995 )
996 await db.flush()
997 print(" ✅ V2 rows cleared")
998
999 # ── 2. Domain registry ────────────────────────────────────────────────
1000 print("\n 📦 Domain registry…")
1001 _midi_caps_json = json.dumps(_MIDI_CAPS, sort_keys=True)
1002 _code_caps_json = json.dumps(_CODE_CAPS, sort_keys=True)
1003 midi_hash = hashlib.sha256(_midi_caps_json.encode()).hexdigest()
1004 code_hash = hashlib.sha256(_code_caps_json.encode()).hexdigest()
1005
1006 for stable_id, author_slug, slug, display, desc, caps, hash_, viewer, install_count in [
1007 (DOMAIN_MIDI, "cgcardona", "midi", "MIDI",
1008 "21-dimensional MIDI state versioning. Tracks harmonic, rhythmic, melodic, structural, "
1009 "dynamic, timbral, spatial, textural, pedal, microtonal, and 11 more dimensions. "
1010 "Piano roll viewer. OT merge semantics. Supports piano, orchestra, electronic, and any MIDI instrument.",
1011 _MIDI_CAPS, midi_hash, "piano_roll", len(MIDI_REPOS)),
1012 (DOMAIN_CODE, "cgcardona", "code", "Code",
1013 "10-language symbol-graph code versioning. Tracks syntax, semantics, types, tests, docs, "
1014 "complexity, dependencies, security, performance, and style dimensions. "
1015 "Symbol graph viewer. Three-way merge. Python, TypeScript, Rust, Go, and more.",
1016 _CODE_CAPS, code_hash, "symbol_graph", len(CODE_REPOS)),
1017 ]:
1018 # Try inserting with the stable ID; if the migration already seeded a row
1019 # with a different ID, the ON CONFLICT on (author_slug, slug) will update
1020 # the metadata but keep whatever ID is already there.
1021 await db.execute(
1022 text("""
1023 INSERT INTO musehub_domains
1024 (domain_id, author_slug, slug, display_name, description, version,
1025 manifest_hash, capabilities, viewer_type, install_count, is_verified, created_at, updated_at)
1026 VALUES
1027 (:did, :author, :slug, :name, :desc, '2.0.0',
1028 :hash, CAST(:caps AS jsonb), :viewer, :installs, true, now(), now())
1029 ON CONFLICT (author_slug, slug) DO UPDATE
1030 SET manifest_hash = EXCLUDED.manifest_hash,
1031 capabilities = EXCLUDED.capabilities,
1032 install_count = EXCLUDED.install_count,
1033 version = EXCLUDED.version,
1034 updated_at = now()
1035 """),
1036 {
1037 "did": stable_id, "author": author_slug, "slug": slug,
1038 "name": display, "desc": desc, "hash": hash_,
1039 "caps": json.dumps(caps), "viewer": viewer, "installs": install_count,
1040 },
1041 )
1042 await db.flush()
1043
1044 # Resolve the actual domain_ids from the DB — the migration may have seeded
1045 # with different UUIDs; we join by (author_slug, slug) which is the stable key.
1046 row_midi = (await db.execute(
1047 text("SELECT domain_id FROM musehub_domains WHERE author_slug='cgcardona' AND slug='midi'")
1048 )).fetchone()
1049 row_code = (await db.execute(
1050 text("SELECT domain_id FROM musehub_domains WHERE author_slug='cgcardona' AND slug='code'")
1051 )).fetchone()
1052
1053 if not row_midi or not row_code:
1054 raise RuntimeError("Domain registry insert failed — rows not found after upsert.")
1055
1056 actual_midi_id = row_midi[0]
1057 actual_code_id = row_code[0]
1058
1059 print(f" ✅ @cgcardona/midi → {actual_midi_id}")
1060 print(f" ✅ @cgcardona/code → {actual_code_id}")
1061
1062 # ── 3. MIDI showcase repos ────────────────────────────────────────────
1063 print("\n 🎹 MIDI showcase repos…")
1064 midi_repo_count = 0
1065 midi_object_count = 0
1066 midi_commit_count = 0
1067
1068 for r in MIDI_REPOS:
1069 repo_id = r["repo_id"]
1070
1071 # Check idempotency
1072 existing = await db.execute(
1073 text("SELECT 1 FROM musehub_repos WHERE repo_id = :rid"), {"rid": repo_id}
1074 )
1075 if existing.fetchone() and not FORCE:
1076 print(f" ⏭ {r['owner']}/{r['slug']} exists — skipping")
1077 continue
1078
1079 # Insert repo
1080 await db.execute(
1081 text("""
1082 INSERT INTO musehub_repos
1083 (repo_id, owner, owner_user_id, slug, name, description, visibility,
1084 tags, domain_id, domain_meta, created_at)
1085 VALUES
1086 (:rid, :owner, :uid, :slug, :name, :desc, :vis,
1087 CAST(:tags AS json), :did, CAST(:dmeta AS json), :created)
1088 ON CONFLICT (repo_id) DO NOTHING
1089 """),
1090 {
1091 "rid": repo_id, "owner": r["owner"], "uid": r["owner_user_id"],
1092 "slug": r["slug"], "name": r["name"], "desc": r["description"],
1093 "vis": r["visibility"],
1094 "did": actual_midi_id, "dmeta": json.dumps(r["domain_meta"]),
1095 "tags": json.dumps(r["tags"]),
1096 "created": _now(days=r["days_ago"]),
1097 },
1098 )
1099 midi_repo_count += 1
1100
1101 # Generate and write MIDI files
1102 for fname, gen_key in r["midi_files"]:
1103 try:
1104 midi_bytes = MIDI_GENERATORS[gen_key]()
1105 except Exception as exc:
1106 print(f" ⚠️ MIDI generation failed for {gen_key}: {exc}")
1107 midi_bytes = b""
1108
1109 if midi_bytes:
1110 obj_id, disk_path = _write_midi_object(midi_bytes, repo_id, fname)
1111 await db.execute(
1112 text("""
1113 INSERT INTO musehub_objects (object_id, repo_id, path, size_bytes, disk_path, created_at)
1114 VALUES (:oid, :rid, :path, :size, :dpath, now())
1115 ON CONFLICT (object_id) DO NOTHING
1116 """),
1117 {
1118 "oid": obj_id, "rid": repo_id,
1119 "path": f"tracks/{fname}",
1120 "size": len(midi_bytes),
1121 "dpath": str(disk_path),
1122 },
1123 )
1124 midi_object_count += 1
1125
1126 # Commits
1127 commit_templates = _MIDI_COMMITS.get(repo_id, [])
1128 prev_commit: str | None = None
1129 for i, (msg, author) in enumerate(commit_templates):
1130 cid = _sha(f"v2-midi-commit-{repo_id}-{i}")
1131 days = max(0, r["days_ago"] - i * 3)
1132 await db.execute(
1133 text("""
1134 INSERT INTO musehub_commits
1135 (commit_id, repo_id, branch, parent_ids, message, author, timestamp, snapshot_id)
1136 VALUES (:cid, :rid, 'main', :parents, :msg, :author, :ts, :snap)
1137 ON CONFLICT (commit_id) DO NOTHING
1138 """),
1139 {
1140 "cid": cid, "rid": repo_id,
1141 "parents": json.dumps([prev_commit] if prev_commit else []),
1142 "msg": msg, "author": author,
1143 "ts": _now(days=days),
1144 "snap": _sha(f"snap-v2-{repo_id}-{i}"),
1145 },
1146 )
1147 prev_commit = cid
1148 midi_commit_count += 1
1149
1150 # Main branch
1151 if prev_commit:
1152 await db.execute(
1153 text("""
1154 INSERT INTO musehub_branches (branch_id, repo_id, name, head_commit_id)
1155 VALUES (:bid, :rid, 'main', :hcid)
1156 ON CONFLICT (branch_id) DO UPDATE SET head_commit_id = EXCLUDED.head_commit_id
1157 """),
1158 {"bid": _uid(f"branch-v2-{repo_id}-main"), "rid": repo_id, "hcid": prev_commit},
1159 )
1160
1161 # Stars from community
1162 for j, uname in enumerate(ALL_CONTRIBUTORS[:min(r["star_count"] % 8 + 3, 8)]):
1163 uid = f"user-{uname.lower()}-00" + str(j + 1).zfill(4)[-2:]
1164 await db.execute(
1165 text("INSERT INTO musehub_stars (star_id, repo_id, user_id, created_at) VALUES (:sid, :rid, :uid, now()) ON CONFLICT (star_id) DO NOTHING"),
1166 {"sid": _uid(f"star-v2-{repo_id}-{uname}"), "rid": repo_id, "uid": uid},
1167 )
1168
1169 print(f" ✅ {r['owner']}/{r['slug']}")
1170
1171 await db.flush()
1172 print(f"\n ✅ MIDI: {midi_repo_count} repos, {midi_object_count} .mid files, {midi_commit_count} commits")
1173
1174 # ── 4. Code domain repos — imported from real GitHub repos ───────────
1175 print("\n 💻 Code domain repos (cloning from GitHub…)")
1176 code_repo_count = 0
1177 code_object_count = 0
1178 code_commit_count = 0
1179
1180 for r in CODE_REPOS:
1181 repo_id = r["repo_id"]
1182 print(f"\n ▶ {r['owner']}/{r['slug']}", flush=True)
1183
1184 existing = await db.execute(
1185 text("SELECT 1 FROM musehub_repos WHERE repo_id = :rid"), {"rid": repo_id}
1186 )
1187 if existing.fetchone() and not FORCE:
1188 print(f" ⏭ exists — skipping")
1189 continue
1190
1191 # Create the repo row
1192 await db.execute(
1193 text("""
1194 INSERT INTO musehub_repos
1195 (repo_id, owner, owner_user_id, slug, name, description, visibility,
1196 tags, domain_id, domain_meta, created_at)
1197 VALUES
1198 (:rid, :owner, :uid, :slug, :name, :desc, :vis,
1199 CAST(:tags AS json), :did, CAST(:dmeta AS json), :created)
1200 ON CONFLICT (repo_id) DO NOTHING
1201 """),
1202 {
1203 "rid": repo_id, "owner": r["owner"], "uid": r["owner_user_id"],
1204 "slug": r["slug"], "name": r["name"], "desc": r["description"],
1205 "vis": r["visibility"],
1206 "did": actual_code_id, "dmeta": json.dumps(r["domain_meta"]),
1207 "tags": json.dumps(r["tags"]),
1208 "created": _now(days=r["days_ago"]),
1209 },
1210 )
1211 code_repo_count += 1
1212
1213 # Clone the real GitHub repo and import its full git history
1214 n_commits, n_objects = await _import_github_repo(db, r, actual_code_id)
1215 code_commit_count += n_commits
1216 code_object_count += n_objects
1217
1218 # Stars
1219 for j, uname in enumerate(ALL_CONTRIBUTORS[:min(r["star_count"] % 8 + 2, 8)]):
1220 uid_num = str(j + 1).zfill(3)
1221 await db.execute(
1222 text("INSERT INTO musehub_stars (star_id, repo_id, user_id, created_at) VALUES (:sid, :rid, :uid, now()) ON CONFLICT (star_id) DO NOTHING"),
1223 {"sid": _uid(f"star-v2-code-{repo_id}-{uname}"), "rid": repo_id,
1224 "uid": _uid(f"user-{uname}-{uid_num}")},
1225 )
1226
1227 await db.flush()
1228 print(f"\n ✅ Code: {code_repo_count} repos, {code_object_count} source files, {code_commit_count} commits")
1229
1230 # ── 5. Issues for all V2 repos ────────────────────────────────────────
1231 print("\n 🐛 Issues…")
1232 issue_count = 0
1233 all_v2_repos = MIDI_REPOS + CODE_REPOS
1234
1235 for r in all_v2_repos:
1236 repo_id = r["repo_id"]
1237 issue_list = _ISSUES.get(repo_id, [])
1238 for i, iss in enumerate(issue_list):
1239 issue_id = _uid(f"issue-v2-{repo_id}-{i}")
1240 await db.execute(
1241 text("""
1242 INSERT INTO musehub_issues
1243 (issue_id, repo_id, number, title, body, state, author, created_at, updated_at)
1244 VALUES (:iid, :rid, :num, :title, :body, :state, :author, :ts, :ts)
1245 ON CONFLICT (issue_id) DO NOTHING
1246 """),
1247 {
1248 "iid": issue_id, "rid": repo_id,
1249 "num": i + 1, "title": iss["title"],
1250 "body": iss["body"], "state": iss["state"],
1251 "author": r["owner"],
1252 "ts": _now(days=r["days_ago"] - i * 5),
1253 },
1254 )
1255 issue_count += 1
1256
1257 await db.flush()
1258 print(f" ✅ Issues: {issue_count}")
1259
1260 # ── 6. Pull requests ──────────────────────────────────────────────────
1261 print("\n 🔀 Pull requests…")
1262 pr_count = 0
1263
1264 _PR_TEMPLATES: list[dict[str, str]] = [
1265 {"title": "feat: add extended 32-bar variation", "body": "Extends the main loop with a contrasting B-section.", "state": "open"},
1266 {"title": "fix: velocity humanization — beat 1 accent corrected", "body": "Beat 1 was not receiving the +12 velocity boost.", "state": "merged"},
1267 {"title": "feat: orchestral arrangement variant", "body": "Full orchestra version derived from the solo piano base.", "state": "merged"},
1268 {"title": "refactor: split into individual track objects", "body": "Separates each instrument into its own .mid file for DAW import.", "state": "open"},
1269 ]
1270 for r in all_v2_repos:
1271 for i, pr_tmpl in enumerate(_PR_TEMPLATES[:3]):
1272 pr_id = _uid(f"pr-v2-{r['repo_id']}-{i}")
1273 author = ALL_CONTRIBUTORS[(i + hash(r["repo_id"])) % len(ALL_CONTRIBUTORS)]
1274 ts = _now(days=r["days_ago"] - i * 7 - 3)
1275 is_merged = pr_tmpl["state"] == "merged"
1276 await db.execute(
1277 text("""
1278 INSERT INTO musehub_pull_requests
1279 (pr_id, repo_id, title, body, state, author,
1280 from_branch, to_branch, created_at, merged_at)
1281 VALUES (:pid, :rid, :title, :body, :state, :author,
1282 :head, 'main', :ts, :merged_at)
1283 ON CONFLICT (pr_id) DO NOTHING
1284 """),
1285 {
1286 "pid": pr_id, "rid": r["repo_id"],
1287 "title": pr_tmpl["title"],
1288 "body": pr_tmpl["body"], "state": pr_tmpl["state"],
1289 "author": author,
1290 "head": f"feat/v2-extension-{i}",
1291 "ts": ts,
1292 "merged_at": ts if is_merged else None,
1293 },
1294 )
1295 pr_count += 1
1296
1297 await db.flush()
1298 print(f" ✅ Pull requests: {pr_count}")
1299
1300 # ── 7. Releases ───────────────────────────────────────────────────────
1301 print("\n 🏷 Releases…")
1302 rel_count = 0
1303 for r in all_v2_repos:
1304 for v_major, v_minor, days_offset in [(1, 0, 10), (1, 1, 5), (2, 0, 1)]:
1305 rel_id = _uid(f"release-v2-{r['repo_id']}-{v_major}-{v_minor}")
1306 tag = f"v{v_major}.{v_minor}"
1307 await db.execute(
1308 text("""
1309 INSERT INTO musehub_releases
1310 (release_id, repo_id, tag, title, body, author,
1311 is_prerelease, is_draft, created_at)
1312 VALUES (:rid, :repo, :tag, :title, :body, :author,
1313 false, false, :ts)
1314 ON CONFLICT (release_id) DO NOTHING
1315 """),
1316 {
1317 "rid": rel_id, "repo": r["repo_id"],
1318 "tag": tag, "title": f"{r['name']} {tag}",
1319 "body": f"Release {tag} — see commit log for changes.",
1320 "author": r["owner"],
1321 "ts": _now(days=days_offset),
1322 },
1323 )
1324 rel_count += 1
1325
1326 await db.flush()
1327 print(f" ✅ Releases: {rel_count}")
1328
1329 # ── 8. Muse VCS layer (muse_objects + muse_snapshots + muse_commits) ──
1330 print("\n 🔗 Muse VCS layer…")
1331 vcs_commit_count = 0
1332
1333 for r in all_v2_repos:
1334 repo_id = r["repo_id"]
1335 commits = _MIDI_COMMITS.get(repo_id) or []
1336 prev_muse_id: str | None = None
1337
1338 for i, (msg, author) in enumerate(commits):
1339 snap_seed = f"muse-snap-v2-{repo_id}-{i}"
1340 snap_id = _sha(snap_seed)
1341 committed_at = _now(days=max(0, r["days_ago"] - i * 2))
1342
1343 # Snapshot manifest — minimal for VCS graph
1344 manifest = {f"state/{i:04d}.dat": _sha(f"obj-v2-{repo_id}-{i}")}
1345 await db.execute(
1346 text("""
1347 INSERT INTO muse_snapshots (snapshot_id, manifest, created_at)
1348 VALUES (:sid, :manifest, :ca)
1349 ON CONFLICT (snapshot_id) DO NOTHING
1350 """),
1351 {"sid": snap_id, "manifest": json.dumps(manifest), "ca": committed_at},
1352 )
1353
1354 parent2: str | None = None
1355 if i >= 7 and i % 7 == 0 and prev_muse_id:
1356 # Merge commit — grab commit 6 back
1357 parent2 = _sha(f"muse-c-v2-{_sha(f'muse-snap-v2-{repo_id}-{max(0, i-6)}')}")
1358
1359 commit_id = _sha(f"muse-c-v2-{snap_id}-{prev_muse_id or ''}-{msg}")
1360 await db.execute(
1361 text("""
1362 INSERT INTO muse_commits
1363 (commit_id, repo_id, branch, parent_commit_id, parent2_commit_id,
1364 snapshot_id, message, author, committed_at, created_at, metadata)
1365 VALUES
1366 (:cid, :rid, 'main', :pid, :p2id,
1367 :sid, :msg, :author, :cat, :cat, :meta)
1368 ON CONFLICT (commit_id) DO NOTHING
1369 """),
1370 {
1371 "cid": commit_id, "rid": repo_id, "pid": prev_muse_id, "p2id": parent2,
1372 "sid": snap_id, "msg": msg, "author": author, "cat": committed_at,
1373 "meta": json.dumps(r.get("domain_meta", {})),
1374 },
1375 )
1376 prev_muse_id = commit_id
1377 vcs_commit_count += 1
1378
1379 await db.flush()
1380 print(f" ✅ Muse commits: {vcs_commit_count}")
1381
1382 # ── 9. Domain installs ────────────────────────────────────────────────
1383 print("\n 📥 Domain installs…")
1384 install_count = 0
1385 user_ids = {
1386 "gabriel": GABRIEL, "sofia": SOFIA, "marcus": MARCUS, "yuki": YUKI,
1387 "aaliya": AALIYA, "chen": CHEN, "fatou": FATOU, "pierre": PIERRE,
1388 }
1389 for r in MIDI_REPOS:
1390 uid = user_ids.get(r["owner"])
1391 if uid:
1392 await db.execute(
1393 text("""
1394 INSERT INTO musehub_domain_installs (install_id, user_id, domain_id, created_at)
1395 VALUES (:iid, :uid, :did, now())
1396 ON CONFLICT (install_id) DO NOTHING
1397 """),
1398 {"iid": _uid(f"install-v2-midi-{uid}"), "uid": uid, "did": actual_midi_id},
1399 )
1400 install_count += 1
1401 for r in CODE_REPOS:
1402 uid = user_ids.get(r["owner"])
1403 if uid:
1404 await db.execute(
1405 text("""
1406 INSERT INTO musehub_domain_installs (install_id, user_id, domain_id, created_at)
1407 VALUES (:iid, :uid, :did, now())
1408 ON CONFLICT (install_id) DO NOTHING
1409 """),
1410 {"iid": _uid(f"install-v2-code-{uid}"), "uid": uid, "did": actual_code_id},
1411 )
1412 install_count += 1
1413
1414 await db.flush()
1415 print(f" ✅ Domain installs: {install_count}")
1416
1417 print("\n" + "=" * 60)
1418 print("🎉 MuseHub V2 seed complete!")
1419 print(f" Domains: 2 (@cgcardona/midi, @cgcardona/code)")
1420 print(f" MIDI repos: {midi_repo_count} (with playable .mid files)")
1421 print(f" Code repos: {code_repo_count} (with real source files)")
1422 print(f" VCS commits: {vcs_commit_count}")
1423 print(f" Issues/PRs: {issue_count}/{pr_count}")
1424 print(f" Releases: {rel_count}")
1425 print()
1426
1427
1428 # ---------------------------------------------------------------------------
1429 # Entry point
1430 # ---------------------------------------------------------------------------
1431
1432 async def main() -> None:
1433 engine = create_async_engine(settings.database_url, echo=False)
1434 async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
1435
1436 async with async_session() as session:
1437 async with session.begin():
1438 await seed(session)
1439
1440 await engine.dispose()
1441
1442
1443 if __name__ == "__main__":
1444 asyncio.run(main())