muse/plugins/music/services/muse_validate.py · cgcardona/muse

1

"""muse validate — musical integrity checks for the working tree.

2

3

This module provides the core validation logic that ``muse validate`` invokes.

4

It is intentionally kept separate from the CLI layer so the checks can be

5

called from tests and future automation pipelines without spawning a subprocess.

6

7

Named result types registered in ``docs/reference/type_contracts.md``:

8

- ``ValidationSeverity``

9

- ``ValidationIssue``

10

- ``ValidationCheckResult``

11

- ``MuseValidateResult``

12

13

Exit-code contract (mirrors git-fsck conventions):

14

- 0 — all checks passed (no errors, no warnings)

15

- 1 — one or more ERROR issues found

16

- 2 — one or more WARN issues found and ``--strict`` was requested

17

"""

18

from __future__ import annotations

import dataclasses

import enum

import json

import logging

import pathlib

import re

import struct

logger = logging.getLogger(__name__)

29

30

# ---------------------------------------------------------------------------

31

# Types

32

# ---------------------------------------------------------------------------

33

34

ALLOWED_EMOTION_TAGS: frozenset[str] = frozenset(

[

"happy",

"sad",

"energetic",

"calm",

"tense",

"relaxed",

"dark",

"bright",

"melancholic",

"triumphant",

"mysterious",

"playful",

"romantic",

"aggressive",

"peaceful",

]

)

#: Regex for well-formed section directory names: e.g. "verse", "chorus-01", "bridge_02"

55

_SECTION_NAME_RE = re.compile(r"^[a-z][a-z0-9_-]*$")

56

57

58

class ValidationSeverity(str, enum.Enum):

59

"""Severity level for a single validation issue."""

ERROR = "error"

WARN = "warn"

INFO = "info"

@dataclasses.dataclass

67

class ValidationIssue:

68

"""A single finding produced by a validation check.

69

70

Agents should treat ERROR severity as a blocker for ``muse commit``.

71

WARN severity is informational unless ``--strict`` mode is active.

72

"""

73

74

severity: ValidationSeverity

check: str

path: str

message: str

def to_dict(self) -> dict[str, str]:

80

return {

81

"severity": self.severity.value,

82

"check": self.check,

83

"path": self.path,

84

"message": self.message,

}

@dataclasses.dataclass

89

class ValidationCheckResult:

90

"""Outcome of a single named check category.

91

92

``passed`` is True only when ``issues`` is empty for this check.

"""

name: str

passed: bool

issues: list[ValidationIssue]

98

99

def to_dict(self) -> dict[str, object]:

100

return {

101

"name": self.name,

102

"passed": self.passed,

103

"issues": [i.to_dict() for i in self.issues],

}

@dataclasses.dataclass

108

class MuseValidateResult:

109

"""Aggregated result of all validation checks run against the working tree.

110

111

``clean`` is True iff every check passed (no issues of any severity).

112

``has_errors`` is True iff at least one ERROR-severity issue was found.

113

``has_warnings`` is True iff at least one WARN-severity issue was found.

"""

clean: bool

has_errors: bool

has_warnings: bool

checks: list[ValidationCheckResult]

120

fixes_applied: list[str]

121

122

def to_dict(self) -> dict[str, object]:

123

return {

124

"clean": self.clean,

125

"has_errors": self.has_errors,

126

"has_warnings": self.has_warnings,

127

"checks": [c.to_dict() for c in self.checks],

128

"fixes_applied": self.fixes_applied,

}

# ---------------------------------------------------------------------------

133

# MIDI integrity check

134

# ---------------------------------------------------------------------------

135

136

def _is_valid_midi(path: pathlib.Path) -> bool:

137

"""Return True iff *path* begins with the Standard MIDI File header (MThd).

138

139

This is a fast structural check — it verifies the 4-byte magic header and

140

the header chunk length (always 6 bytes for SMF). Full parse correctness

141

is left to ``mido`` in the import pipeline; here we just reject obviously

142

corrupt or truncated files so agents get an actionable error before commit.

143

"""

144

try:

145

with path.open("rb") as fh:

magic = fh.read(4)

if magic != b"MThd":

return False

chunk_len_bytes = fh.read(4)

150

if len(chunk_len_bytes) < 4:

151

return False

152

chunk_len: int = struct.unpack(">I", chunk_len_bytes)[0]

153

return chunk_len == 6

except OSError:

return False

def check_midi_integrity(

159

workdir: pathlib.Path,

160

track_filter: str | None = None,

161

) -> ValidationCheckResult:

162

"""Verify that every .mid/.midi file in *workdir* has a valid MIDI header.

163

164

Agents use this to detect corruption introduced by partial writes, failed

165

exports, or bit-rot before the file is committed to Muse VCS history.

166

167

Args:

168

workdir: The ``muse-work/`` directory to scan.

169

track_filter: If given, only MIDI files whose relative path contains

170

this string (case-insensitive) are validated.

171

172

Returns:

173

ValidationCheckResult with check name ``"midi_integrity"``.

174

"""

175

issues: list[ValidationIssue] = []

176

if not workdir.exists():

177

return ValidationCheckResult(name="midi_integrity", passed=True, issues=[])

178

179

for midi_path in sorted(workdir.rglob("*.mid")) + sorted(workdir.rglob("*.midi")):

180

if not midi_path.is_file():

181

continue

182

rel = midi_path.relative_to(workdir).as_posix()

183

if track_filter and track_filter.lower() not in rel.lower():

184

continue

185

if not _is_valid_midi(midi_path):

186

issues.append(

187

ValidationIssue(

188

severity=ValidationSeverity.ERROR,

189

check="midi_integrity",

190

path=rel,

191

message=f"Invalid or corrupted MIDI file: missing or malformed MThd header.",

192

)

193

)

194

logger.warning("❌ MIDI integrity failure: %s", rel)

195

196

return ValidationCheckResult(

197

name="midi_integrity",

198

passed=len(issues) == 0,

issues=issues,

)

# ---------------------------------------------------------------------------

204

# Manifest consistency check

205

# ---------------------------------------------------------------------------

206

207

def check_manifest_consistency(

208

root: pathlib.Path,

209

track_filter: str | None = None,

210

) -> ValidationCheckResult:

211

"""Compare the committed snapshot manifest against the actual working tree.

212

213

Detects orphaned files (in the manifest but missing from disk) and

214

unregistered files (on disk but absent from the manifest). These indicate

215

that the working tree has drifted from the last commit — potentially from

216

manual edits or a failed ``muse checkout``.

217

218

Args:

219

root: Repository root (contains ``.muse/`` and ``muse-work/``).

220

track_filter: Scope validation to paths containing this string.

221

222

Returns:

223

ValidationCheckResult with check name ``"manifest_consistency"``.

224

"""

225

issues: list[ValidationIssue] = []

226

muse_dir = root / ".muse"

227

workdir = root / "muse-work"

228

229

# Resolve HEAD commit and its snapshot manifest

230

head_path = muse_dir / "HEAD"

231

if not head_path.exists():

232

return ValidationCheckResult(name="manifest_consistency", passed=True, issues=[])

233

234

head_ref = head_path.read_text().strip()

235

ref_file = muse_dir / pathlib.Path(head_ref)

236

if not ref_file.exists() or not ref_file.read_text().strip():

237

# No commits yet — nothing to compare against

238

return ValidationCheckResult(name="manifest_consistency", passed=True, issues=[])

239

240

# Load the committed snapshot manifest from the muse-work objects area

241

# The manifest is stored alongside objects in .muse/objects/ as a JSON side-car,

242

# but in this implementation commits reference snapshots stored in DB.

243

# We read the on-disk snapshot cache if available (written by muse commit).

244

snapshot_cache = muse_dir / "snapshot_manifest.json"

245

if not snapshot_cache.exists():

246

# No cached manifest — check is not possible without DB access

247

return ValidationCheckResult(name="manifest_consistency", passed=True, issues=[])

248

249

try:

250

committed_manifest: dict[str, str] = json.loads(snapshot_cache.read_text())

251

except (json.JSONDecodeError, OSError) as exc:

252

issues.append(

253

ValidationIssue(

254

severity=ValidationSeverity.ERROR,

255

check="manifest_consistency",

256

path=".muse/snapshot_manifest.json",

257

message=f"Cannot read cached snapshot manifest: {exc}",

258

)

259

)

260

return ValidationCheckResult(name="manifest_consistency", passed=False, issues=issues)

261

262

if not workdir.exists():

263

# All committed files are orphaned

264

for path in sorted(committed_manifest):

265

if track_filter and track_filter.lower() not in path.lower():

continue

issues.append(

ValidationIssue(

severity=ValidationSeverity.ERROR,

270

check="manifest_consistency",

271

path=path,

272

message="File is in committed manifest but muse-work/ does not exist.",

273

)

274

)

275

return ValidationCheckResult(

276

name="manifest_consistency",

277

passed=len(issues) == 0,

issues=issues,

)

# Build current working-tree manifest

282

from muse.core.snapshot import walk_workdir, hash_file

283

284

current_manifest = walk_workdir(workdir)

285

286

committed_paths = set(committed_manifest.keys())

287

current_paths = set(current_manifest.keys())

288

289

for path in sorted(committed_paths - current_paths):

290

if track_filter and track_filter.lower() not in path.lower():

continue

issues.append(

ValidationIssue(

severity=ValidationSeverity.ERROR,

295

check="manifest_consistency",

296

path=path,

297

message="File in committed manifest is missing from working tree (orphaned).",

)

)

for path in sorted(current_paths - committed_paths):

302

if track_filter and track_filter.lower() not in path.lower():

continue

issues.append(

ValidationIssue(

severity=ValidationSeverity.WARN,

307

check="manifest_consistency",

308

path=path,

309

message="File in working tree is not recorded in committed manifest (unregistered).",

)

)

return ValidationCheckResult(

314

name="manifest_consistency",

315

passed=len(issues) == 0,

issues=issues,

)

# ---------------------------------------------------------------------------

321

# Duplicate tracks check

322

# ---------------------------------------------------------------------------

323

324

def check_no_duplicate_tracks(

325

workdir: pathlib.Path,

326

track_filter: str | None = None,

327

) -> ValidationCheckResult:

328

"""Detect duplicate instrument-role definitions in the working tree.

329

330

A duplicate is defined as two or more MIDI files sharing the same

331

instrument role name (the stem of their filename, excluding the extension

332

and any numeric suffix). For example: ``bass.mid`` and ``bass_alt.mid``

333

both define a bass role.

334

335

Agents use this to prevent ambiguous track assignments that would cause

336

Storpheus to generate for the wrong instrument during composition.

337

338

Args:

339

workdir: The ``muse-work/`` directory to scan.

340

track_filter: If given, only roles whose name contains this string

341

(case-insensitive) are evaluated.

342

343

Returns:

344

ValidationCheckResult with check name ``"no_duplicate_tracks"``.

345

"""

346

issues: list[ValidationIssue] = []

347

if not workdir.exists():

348

return ValidationCheckResult(name="no_duplicate_tracks", passed=True, issues=[])

349

350

from collections import defaultdict

351

role_to_paths: dict[str, list[str]] = defaultdict(list)

352

353

for midi_path in sorted(workdir.rglob("*.mid")) + sorted(workdir.rglob("*.midi")):

354

if not midi_path.is_file():

355

continue

356

rel = midi_path.relative_to(workdir).as_posix()

357

if track_filter and track_filter.lower() not in rel.lower():

358

continue

359

# Derive role: strip extension, strip trailing digits/underscores/hyphens

360

stem = midi_path.stem.lower()

361

role = re.sub(r"[_\-]?\d+$", "", stem)

362

role_to_paths[role].append(rel)

363

364

for role, paths in sorted(role_to_paths.items()):

if len(paths) > 1:

issues.append(

ValidationIssue(

severity=ValidationSeverity.WARN,

369

check="no_duplicate_tracks",

370

path=", ".join(paths),

371

message=f"Duplicate instrument role '{role}' defined by {len(paths)} files.",

372

)

373

)

374

logger.warning("⚠️ Duplicate track role: %s → %s", role, paths)

375

376

return ValidationCheckResult(

377

name="no_duplicate_tracks",

378

passed=len(issues) == 0,

issues=issues,

)

# ---------------------------------------------------------------------------

384

# Section naming convention check

385

# ---------------------------------------------------------------------------

386

387

def check_section_naming(

388

workdir: pathlib.Path,

389

section_filter: str | None = None,

390

) -> ValidationCheckResult:

391

"""Verify that section subdirectories follow the expected naming convention.

392

393

Section directories must match ``[a-z][a-z0-9_-]*`` (lowercase, starting

394

with a letter, using only alphanumeric chars, hyphens, or underscores).

395

This constraint ensures consistent referencing by AI agents and avoids

396

shell quoting issues.

397

398

Args:

399

workdir: The ``muse-work/`` directory to scan.

400

section_filter: If given, only directories whose name contains this

401

string (case-insensitive) are evaluated.

402

403

Returns:

404

ValidationCheckResult with check name ``"section_naming"``.

405

"""

406

issues: list[ValidationIssue] = []

407

if not workdir.exists():

408

return ValidationCheckResult(name="section_naming", passed=True, issues=[])

409

410

for entry in sorted(workdir.iterdir()):

411

if not entry.is_dir():

412

continue

413

name = entry.name

414

if section_filter and section_filter.lower() not in name.lower():

415

continue

416

if not _SECTION_NAME_RE.match(name):

417

issues.append(

418

ValidationIssue(

419

severity=ValidationSeverity.WARN,

420

check="section_naming",

421

path=name,

422

message=(

423

f"Section directory '{name}' does not follow naming convention "

424

f"[a-z][a-z0-9_-]* (lowercase, no spaces or uppercase letters)."

),

)

)

logger.warning("⚠️ Section naming violation: %s", name)

429

430

return ValidationCheckResult(

431

name="section_naming",

432

passed=len(issues) == 0,

issues=issues,

)

# ---------------------------------------------------------------------------

438

# Emotion tags check

439

# ---------------------------------------------------------------------------

440

441

def check_emotion_tags(

442

root: pathlib.Path,

443

track_filter: str | None = None,

444

) -> ValidationCheckResult:

445

"""Verify that emotion tags in commit metadata are from the allowed vocabulary.

446

447

Reads ``.muse/commit_metadata.json`` if present (written by ``muse tag``).

448

Any tag not in :data:`ALLOWED_EMOTION_TAGS` is flagged as a warning so

449

agents know they may be working with an unrecognised emotional label that

450

Maestro's mood model has not been trained on.

451

452

Args:

453

root: Repository root.

454

track_filter: Unused for this check (included for API symmetry).

455

456

Returns:

457

ValidationCheckResult with check name ``"emotion_tags"``.

458

"""

459

issues: list[ValidationIssue] = []

460

muse_dir = root / ".muse"

461

tag_cache = muse_dir / "tags.json"

462

463

if not tag_cache.exists():

464

return ValidationCheckResult(name="emotion_tags", passed=True, issues=[])

465

466

try:

467

tags_data: object = json.loads(tag_cache.read_text())

468

except (json.JSONDecodeError, OSError) as exc:

469

issues.append(

470

ValidationIssue(

471

severity=ValidationSeverity.WARN,

472

check="emotion_tags",

473

path=".muse/tags.json",

474

message=f"Cannot read tag cache: {exc}",

475

)

476

)

477

return ValidationCheckResult(name="emotion_tags", passed=False, issues=issues)

478

479

if not isinstance(tags_data, list):

480

return ValidationCheckResult(name="emotion_tags", passed=True, issues=[])

481

482

for entry in tags_data:

483

if not isinstance(entry, dict):

484

continue

485

tag_name: object = entry.get("tag")

486

if not isinstance(tag_name, str):

487

continue

488

tag_lower = tag_name.lower()

489

if tag_lower not in ALLOWED_EMOTION_TAGS:

490

issues.append(

491

ValidationIssue(

492

severity=ValidationSeverity.WARN,

493

check="emotion_tags",

494

path=".muse/tags.json",

495

message=(

496

f"Emotion tag '{tag_name}' is not in the allowed vocabulary. "

497

f"Allowed: {', '.join(sorted(ALLOWED_EMOTION_TAGS))}"

),

)

)

logger.warning("⚠️ Unknown emotion tag: %s", tag_name)

502

503

return ValidationCheckResult(

504

name="emotion_tags",

505

passed=len(issues) == 0,

issues=issues,

)

# ---------------------------------------------------------------------------

511

# Auto-fix: quantise slightly off-grid notes (stub — full impl requires mido)

512

# ---------------------------------------------------------------------------

513

514

def apply_fixes(

515

workdir: pathlib.Path,

516

issues: list[ValidationIssue],

517

) -> list[str]:

518

"""Apply automatic corrections for fixable issues.

519

520

Currently supports:

521

- Re-writing malformed MIDI files is not auto-fixable (data-loss risk).

522

- Section naming: no auto-rename (would break references in other files).

523

- Duplicate tracks: no auto-remove (ambiguous which to keep).

524

525

The function is intentionally conservative — it only fixes issues that

526

cannot cause data loss and where the correct fix is unambiguous.

527

528

Args:

529

workdir: The ``muse-work/`` working tree directory.

530

issues: The full list of issues found during validation.

531

532

Returns:

533

List of human-readable strings describing each fix applied.

534

"""

535

applied: list[str] = []

536

537

# Future: quantise off-grid MIDI notes using mido when mido is available.

538

# For now, emit an informational note if any fixable categories were found.

539

fixable_checks = {"manifest_consistency"}

540

fixable_issues = [i for i in issues if i.check in fixable_checks]

541

if fixable_issues:

542

logger.info(

543

"⚠️ --fix: %d fixable issue(s) found but no auto-fix logic is "

544

"implemented yet for check categories: %s",

545

len(fixable_issues),

546

{i.check for i in fixable_issues},

)

return applied

# ---------------------------------------------------------------------------

553

# Orchestrator

554

# ---------------------------------------------------------------------------

def run_validate(

root: pathlib.Path,

*,

strict: bool = False,

560

track_filter: str | None = None,

561

section_filter: str | None = None,

562

auto_fix: bool = False,

563

) -> MuseValidateResult:

564

"""Run all integrity checks against the working tree at *root*.

565

566

This is the single entry point for the validate subsystem. It runs

567

checks in dependency order and aggregates results into a single

568

:class:`MuseValidateResult`.

569

570

Args:

571

root: Repository root (contains ``.muse/`` and ``muse-work/``).

572

strict: Treat WARN-severity issues as fatal (exit 2 in CLI).

573

track_filter: Restrict checks to files/paths containing this string.

574

section_filter: Restrict section-naming check to dirs matching this.

575

auto_fix: Attempt to auto-correct fixable issues before reporting.

576

577

Returns:

578

MuseValidateResult with all check outcomes and any fixes applied.

579

"""

580

workdir = root / "muse-work"

581

582

check_results: list[ValidationCheckResult] = [

583

check_midi_integrity(workdir, track_filter=track_filter),

584

check_manifest_consistency(root, track_filter=track_filter),

585

check_no_duplicate_tracks(workdir, track_filter=track_filter),

586

check_section_naming(workdir, section_filter=section_filter),

587

check_emotion_tags(root, track_filter=track_filter),

588

]

589

590

all_issues: list[ValidationIssue] = [

591

issue for result in check_results for issue in result.issues

592

]

593

594

fixes_applied: list[str] = []

595

if auto_fix and all_issues:

596

fixes_applied = apply_fixes(workdir, all_issues)

597

598

has_errors = any(i.severity == ValidationSeverity.ERROR for i in all_issues)

599

has_warnings = any(i.severity == ValidationSeverity.WARN for i in all_issues)

600

clean = not has_errors and not has_warnings

601

602

logger.info(

603

"✅ muse validate: %d check(s), errors=%s, warnings=%s",

len(check_results),

has_errors,

has_warnings,

)

return MuseValidateResult(

610

clean=clean,

611

has_errors=has_errors,

612

has_warnings=has_warnings,

613

checks=check_results,

614

fixes_applied=fixes_applied,

615

)