musehub_piano_roll_renderer.py
python
| 1 | """Server-side MIDI-to-PNG piano roll renderer for MuseHub. |
| 2 | |
| 3 | Converts raw MIDI bytes into a static piano roll image (PNG) without any |
| 4 | browser or external image library dependency. Uses ``mido`` (already a |
| 5 | project dependency) to parse MIDI and stdlib ``zlib``/``struct`` to encode |
| 6 | a minimal PNG. |
| 7 | |
| 8 | The piano roll image layout: |
| 9 | - Width : ``MAX_WIDTH_PX`` (clamped), representing the MIDI timeline. |
| 10 | - Height: ``NOTE_ROWS`` (128), one row per MIDI pitch (0 = bottom, 127 = top). |
| 11 | - Background: dark charcoal (``BG_COLOR``). |
| 12 | - Octave boundary lines: slightly lighter horizontal rule at every C note. |
| 13 | - Notes: colored rectangles, colour-coded by MIDI channel. |
| 14 | |
| 15 | Design constraints: |
| 16 | - Zero external image dependencies (no Pillow, no cairo, no Node). |
| 17 | - Deterministic: same MIDI bytes → same PNG bytes for a given render width. |
| 18 | - Graceful degradation: a blank canvas is returned when the MIDI has no |
| 19 | note events, so callers always receive a valid PNG. |
| 20 | |
| 21 | Result type: ``PianoRollRenderResult`` — registered in docs/reference/type_contracts.md. |
| 22 | """ |
| 23 | |
| 24 | import io |
| 25 | import logging |
| 26 | import struct |
| 27 | import zlib |
| 28 | from dataclasses import dataclass |
| 29 | from pathlib import Path |
| 30 | |
| 31 | import mido |
| 32 | |
| 33 | logger = logging.getLogger(__name__) |
| 34 | |
| 35 | # --------------------------------------------------------------------------- |
| 36 | # Constants |
| 37 | # --------------------------------------------------------------------------- |
| 38 | |
| 39 | NOTE_ROWS: int = 128 # MIDI pitch range: 0–127 |
| 40 | MAX_WIDTH_PX: int = 1920 # maximum render width in pixels |
| 41 | MIN_WIDTH_PX: int = 64 # minimum render width (very short clips) |
| 42 | NOTE_ROW_HEIGHT: int = 2 # height in px of each pitch row |
| 43 | IMAGE_HEIGHT: int = NOTE_ROWS * NOTE_ROW_HEIGHT # total image height in pixels |
| 44 | |
| 45 | # Background colour (dark charcoal) |
| 46 | BG_COLOR: tuple[int, int, int] = (28, 28, 34) |
| 47 | # Octave-C boundary line colour (slightly lighter) |
| 48 | BOUNDARY_COLOR: tuple[int, int, int] = (60, 60, 72) |
| 49 | # Per-channel note colours (MIDI channels 0–15); cycles if channel > 15. |
| 50 | _CHANNEL_COLORS: list[tuple[int, int, int]] = [ |
| 51 | (100, 220, 130), # ch 0 — green (bass) |
| 52 | (100, 160, 220), # ch 1 — blue (keys) |
| 53 | (220, 140, 100), # ch 2 — orange (lead) |
| 54 | (200, 100, 220), # ch 3 — purple |
| 55 | (220, 220, 100), # ch 4 — yellow |
| 56 | (100, 220, 220), # ch 5 — cyan |
| 57 | (220, 100, 100), # ch 6 — red |
| 58 | (140, 220, 100), # ch 7 — lime |
| 59 | (180, 120, 220), # ch 8 — lavender |
| 60 | (220, 180, 100), # ch 9 — gold (often drums — colour differently) |
| 61 | (100, 200, 180), # ch 10 — teal |
| 62 | (220, 120, 180), # ch 11 — rose |
| 63 | (180, 200, 100), # ch 12 — olive |
| 64 | (120, 180, 220), # ch 13 — sky |
| 65 | (220, 160, 120), # ch 14 — peach |
| 66 | (160, 120, 200), # ch 15 — indigo |
| 67 | ] |
| 68 | |
| 69 | # Minimum note-render width so very short notes are always visible |
| 70 | _MIN_NOTE_PX: int = 1 |
| 71 | |
| 72 | |
| 73 | # --------------------------------------------------------------------------- |
| 74 | # Public result type |
| 75 | # --------------------------------------------------------------------------- |
| 76 | |
| 77 | |
| 78 | @dataclass(frozen=True, slots=True) |
| 79 | class PianoRollRenderResult: |
| 80 | """Outcome of a single piano roll render operation. |
| 81 | |
| 82 | Attributes: |
| 83 | output_path: Absolute path of the PNG file written to disk. |
| 84 | width_px: Actual render width in pixels. |
| 85 | note_count: Number of MIDI note events rendered. |
| 86 | track_index: Zero-based MIDI track index that was rendered. |
| 87 | stubbed: True when the MIDI contained no note events and a blank |
| 88 | canvas was returned. |
| 89 | """ |
| 90 | |
| 91 | output_path: Path |
| 92 | width_px: int |
| 93 | note_count: int |
| 94 | track_index: int |
| 95 | stubbed: bool |
| 96 | |
| 97 | |
| 98 | # --------------------------------------------------------------------------- |
| 99 | # MIDI parsing helpers |
| 100 | # --------------------------------------------------------------------------- |
| 101 | |
| 102 | |
| 103 | @dataclass |
| 104 | class _NoteEvent: |
| 105 | """A resolved note-on / note-off pair in absolute tick time.""" |
| 106 | |
| 107 | pitch: int |
| 108 | channel: int |
| 109 | start_tick: int |
| 110 | end_tick: int |
| 111 | |
| 112 | |
| 113 | def _parse_note_events(midi: mido.MidiFile) -> list[_NoteEvent]: |
| 114 | """Extract note-on/off pairs from all tracks, returning absolute-tick events. |
| 115 | |
| 116 | Pairs each note-on with the next note-off (or note-on with velocity 0) for |
| 117 | the same pitch+channel combination. Orphaned note-ons (no matching note-off) |
| 118 | are extended to the end of the track. |
| 119 | |
| 120 | Args: |
| 121 | midi: Parsed ``mido.MidiFile`` object. |
| 122 | |
| 123 | Returns: |
| 124 | List of ``_NoteEvent`` objects with resolved start/end tick positions. |
| 125 | """ |
| 126 | events: list[_NoteEvent] = [] |
| 127 | |
| 128 | for track in midi.tracks: |
| 129 | # Track absolute tick alongside each message |
| 130 | pending: dict[tuple[int, int], int] = {} # (pitch, channel) → start_tick |
| 131 | abs_tick = 0 |
| 132 | last_tick = 0 |
| 133 | |
| 134 | for msg in track: |
| 135 | abs_tick += msg.time |
| 136 | last_tick = abs_tick |
| 137 | |
| 138 | if msg.type == "note_on" and msg.velocity > 0: |
| 139 | key = (msg.note, msg.channel) |
| 140 | if key not in pending: |
| 141 | pending[key] = abs_tick |
| 142 | |
| 143 | elif msg.type == "note_off" or (msg.type == "note_on" and msg.velocity == 0): |
| 144 | key = (msg.note, msg.channel) |
| 145 | if key in pending: |
| 146 | start = pending.pop(key) |
| 147 | events.append( |
| 148 | _NoteEvent( |
| 149 | pitch=msg.note, |
| 150 | channel=msg.channel, |
| 151 | start_tick=start, |
| 152 | end_tick=abs_tick, |
| 153 | ) |
| 154 | ) |
| 155 | |
| 156 | # Close any orphaned note-ons at the track end |
| 157 | for (pitch, channel), start in pending.items(): |
| 158 | events.append( |
| 159 | _NoteEvent( |
| 160 | pitch=pitch, |
| 161 | channel=channel, |
| 162 | start_tick=start, |
| 163 | end_tick=last_tick if last_tick > start else start + 1, |
| 164 | ) |
| 165 | ) |
| 166 | |
| 167 | return events |
| 168 | |
| 169 | |
| 170 | # --------------------------------------------------------------------------- |
| 171 | # PNG encoder (pure stdlib — no Pillow) |
| 172 | # --------------------------------------------------------------------------- |
| 173 | |
| 174 | # PNG magic bytes |
| 175 | _PNG_SIGNATURE: bytes = b"\x89PNG\r\n\x1a\n" |
| 176 | |
| 177 | |
| 178 | def _png_chunk(chunk_type: bytes, data: bytes) -> bytes: |
| 179 | """Encode a single PNG chunk (length + type + data + CRC).""" |
| 180 | crc = zlib.crc32(chunk_type + data) & 0xFFFFFFFF |
| 181 | return struct.pack(">I", len(data)) + chunk_type + data + struct.pack(">I", crc) |
| 182 | |
| 183 | |
| 184 | def _encode_png(pixels: list[bytearray], width: int, height: int) -> bytes: |
| 185 | """Encode a list of RGB scanlines as a minimal PNG byte string. |
| 186 | |
| 187 | Args: |
| 188 | pixels: ``height`` bytearrays, each of length ``width * 3`` (RGB). |
| 189 | width: Image width in pixels. |
| 190 | height: Image height in pixels. |
| 191 | |
| 192 | Returns: |
| 193 | Complete PNG file bytes. |
| 194 | """ |
| 195 | # IHDR: width, height, bit-depth=8, colour-type=2 (RGB), compression=0, |
| 196 | # filter-method=0, interlace=0 |
| 197 | ihdr_data = struct.pack(">IIBBBBB", width, height, 8, 2, 0, 0, 0) |
| 198 | ihdr = _png_chunk(b"IHDR", ihdr_data) |
| 199 | |
| 200 | # IDAT: each scanline prefixed with filter byte 0 (None) |
| 201 | raw_rows = b"".join(b"\x00" + row for row in pixels) |
| 202 | idat = _png_chunk(b"IDAT", zlib.compress(raw_rows, 6)) |
| 203 | |
| 204 | iend = _png_chunk(b"IEND", b"") |
| 205 | |
| 206 | return _PNG_SIGNATURE + ihdr + idat + iend |
| 207 | |
| 208 | |
| 209 | # --------------------------------------------------------------------------- |
| 210 | # Render logic |
| 211 | # --------------------------------------------------------------------------- |
| 212 | |
| 213 | |
| 214 | def _build_canvas(width: int) -> list[bytearray]: |
| 215 | """Allocate a blank RGB canvas of size ``width × IMAGE_HEIGHT``. |
| 216 | |
| 217 | Rows are stored bottom-first (MIDI pitch 0 at index 0) but will be |
| 218 | written top-first (inverted) when encoding the PNG. |
| 219 | |
| 220 | Returns: |
| 221 | ``IMAGE_HEIGHT`` bytearrays each of ``width * 3`` bytes filled with |
| 222 | ``BG_COLOR``. |
| 223 | """ |
| 224 | row_template = bytearray(BG_COLOR * width) |
| 225 | |
| 226 | # Draw octave-C boundary lines (every 12 semitones starting at C0 = pitch 0) |
| 227 | rows: list[bytearray] = [] |
| 228 | for note in range(NOTE_ROWS): |
| 229 | if note % 12 == 0: |
| 230 | rows.append(bytearray(BOUNDARY_COLOR * width)) |
| 231 | else: |
| 232 | rows.append(bytearray(row_template)) |
| 233 | |
| 234 | return rows |
| 235 | |
| 236 | |
| 237 | def _draw_note( |
| 238 | rows: list[bytearray], |
| 239 | note: _NoteEvent, |
| 240 | total_ticks: int, |
| 241 | width: int, |
| 242 | ) -> None: |
| 243 | """Paint a single note event onto the canvas (in-place). |
| 244 | |
| 245 | Args: |
| 246 | rows: Canvas from ``_build_canvas`` (bottom-first ordering). |
| 247 | note: Note event with absolute tick positions. |
| 248 | total_ticks: Total MIDI duration in ticks (used to map ticks → pixels). |
| 249 | width: Canvas width in pixels. |
| 250 | """ |
| 251 | if total_ticks <= 0: |
| 252 | return |
| 253 | |
| 254 | color = _CHANNEL_COLORS[note.channel % len(_CHANNEL_COLORS)] |
| 255 | |
| 256 | # Map tick → pixel column |
| 257 | x_start = int(note.start_tick / total_ticks * width) |
| 258 | x_end = max(x_start + _MIN_NOTE_PX, int(note.end_tick / total_ticks * width)) |
| 259 | x_end = min(x_end, width) |
| 260 | |
| 261 | # Pitch row (bottom-first) |
| 262 | row_base = note.pitch * NOTE_ROW_HEIGHT |
| 263 | for dy in range(NOTE_ROW_HEIGHT): |
| 264 | row_idx = row_base + dy |
| 265 | if row_idx >= len(rows): |
| 266 | continue |
| 267 | row = rows[row_idx] |
| 268 | for x in range(x_start, x_end): |
| 269 | offset = x * 3 |
| 270 | row[offset] = color[0] |
| 271 | row[offset + 1] = color[1] |
| 272 | row[offset + 2] = color[2] |
| 273 | |
| 274 | |
| 275 | def render_piano_roll( |
| 276 | midi_bytes: bytes, |
| 277 | output_path: Path, |
| 278 | track_index: int = 0, |
| 279 | target_width: int = MAX_WIDTH_PX, |
| 280 | ) -> PianoRollRenderResult: |
| 281 | """Render raw MIDI bytes as a piano roll PNG image. |
| 282 | |
| 283 | Parses all tracks from the MIDI file, paints each note as a coloured |
| 284 | rectangle proportional to its duration, and writes a PNG file. |
| 285 | |
| 286 | Args: |
| 287 | midi_bytes: Raw bytes of a Standard MIDI File (.mid). |
| 288 | output_path: Destination path for the output PNG file. |
| 289 | track_index: Logical track index for the result metadata (informational |
| 290 | only — all tracks are rendered into a single composite image). |
| 291 | target_width: Desired render width in pixels. Clamped to |
| 292 | ``[MIN_WIDTH_PX, MAX_WIDTH_PX]``. |
| 293 | |
| 294 | Returns: |
| 295 | ``PianoRollRenderResult`` describing what was written. |
| 296 | |
| 297 | Raises: |
| 298 | OSError: If the output directory cannot be created or the file written. |
| 299 | """ |
| 300 | width = max(MIN_WIDTH_PX, min(target_width, MAX_WIDTH_PX)) |
| 301 | |
| 302 | # Parse MIDI |
| 303 | try: |
| 304 | midi = mido.MidiFile(file=io.BytesIO(midi_bytes)) |
| 305 | except Exception as exc: |
| 306 | logger.warning("⚠️ Failed to parse MIDI for piano roll: %s", exc) |
| 307 | # Write blank canvas so callers always get a valid PNG |
| 308 | canvas = _build_canvas(width) |
| 309 | png_bytes = _encode_png(list(reversed(canvas)), width, IMAGE_HEIGHT) |
| 310 | output_path.parent.mkdir(parents=True, exist_ok=True) |
| 311 | output_path.write_bytes(png_bytes) |
| 312 | return PianoRollRenderResult( |
| 313 | output_path=output_path, |
| 314 | width_px=width, |
| 315 | note_count=0, |
| 316 | track_index=track_index, |
| 317 | stubbed=True, |
| 318 | ) |
| 319 | |
| 320 | note_events = _parse_note_events(midi) |
| 321 | |
| 322 | if not note_events: |
| 323 | logger.info("ℹ️ MIDI has no note events — writing blank piano roll at %s", output_path) |
| 324 | canvas = _build_canvas(width) |
| 325 | png_bytes = _encode_png(list(reversed(canvas)), width, IMAGE_HEIGHT) |
| 326 | output_path.parent.mkdir(parents=True, exist_ok=True) |
| 327 | output_path.write_bytes(png_bytes) |
| 328 | return PianoRollRenderResult( |
| 329 | output_path=output_path, |
| 330 | width_px=width, |
| 331 | note_count=0, |
| 332 | track_index=track_index, |
| 333 | stubbed=True, |
| 334 | ) |
| 335 | |
| 336 | total_ticks = max(ev.end_tick for ev in note_events) |
| 337 | canvas = _build_canvas(width) |
| 338 | |
| 339 | for ev in note_events: |
| 340 | _draw_note(canvas, ev, total_ticks, width) |
| 341 | |
| 342 | # PNG rows are top-first; MIDI pitches are bottom-first, so reverse. |
| 343 | png_bytes = _encode_png(list(reversed(canvas)), width, IMAGE_HEIGHT) |
| 344 | output_path.parent.mkdir(parents=True, exist_ok=True) |
| 345 | output_path.write_bytes(png_bytes) |
| 346 | |
| 347 | logger.info( |
| 348 | "✅ Piano roll rendered: %d notes → %s (%dx%d px)", |
| 349 | len(note_events), |
| 350 | output_path, |
| 351 | width, |
| 352 | IMAGE_HEIGHT, |
| 353 | ) |
| 354 | |
| 355 | return PianoRollRenderResult( |
| 356 | output_path=output_path, |
| 357 | width_px=width, |
| 358 | note_count=len(note_events), |
| 359 | track_index=track_index, |
| 360 | stubbed=False, |
| 361 | ) |