musehub/services/musehub_mcp_executor.py · gabriel/musehub

1

"""MuseHub MCP tool executor — server-side logic for all musehub_* MCP tools.

2

3

This module is the execution backend for MuseHub browsing tools exposed via

4

MCP. Each public function opens its own DB session via ``AsyncSessionLocal``,

5

delegates to ``musehub_repository`` for persistence access, and returns a

6

typed ``MusehubToolResult``.

Design contract

---------------

- All functions are async and return ``MusehubToolResult`` on both success

11

and failure (no exceptions propagate to the MCP server).

12

- ``MusehubToolResult.ok`` distinguishes success from failure.

13

- ``MusehubToolResult.error_code`` is one of: ``"not_found"``,

14

``"invalid_dimension"``, ``"invalid_mode"``, ``"db_unavailable"``.

15

- Callers (``MuseMCPServer._execute_musehub_tool``) pattern-match on

16

these codes to build appropriate ``MCPContentBlock`` responses.

17

- This module must NOT import MCP protocol types — it is pure service layer.

18

19

``AsyncSessionLocal`` is imported at module level so tests can patch it as

20

``musehub.services.musehub_mcp_executor.AsyncSessionLocal``.

21

"""

22

from __future__ import annotations

import logging

import mimetypes

import os

from dataclasses import dataclass, field

28

from typing import Literal

29

30

from musehub.contracts.json_types import JSONValue

31

from musehub.db.database import AsyncSessionLocal

32

from musehub.services import musehub_repository

33

34

logger = logging.getLogger(__name__)

35

36

# ---------------------------------------------------------------------------

37

# Result types

38

# ---------------------------------------------------------------------------

39

40

MusehubErrorCode = Literal[

"not_found",

"invalid_dimension",

"invalid_mode",

"db_unavailable",

]

"""Enumeration of error codes returned by MuseHub MCP executors.

47

48

Callers pattern-match on these to build appropriate error messages:

49

- ``not_found`` — repo or object does not exist

50

- ``invalid_dimension`` — unrecognised analysis dimension

51

- ``invalid_mode`` — unrecognised search mode

52

- ``db_unavailable`` — DB session factory not initialised (startup race)

"""

@dataclass(frozen=True)

57

class MusehubToolResult:

58

"""Result of executing a single musehub_* MCP tool.

59

60

``ok`` is the primary success/failure signal. On success, ``data``

61

holds the JSON-serialisable payload for the MCP content block. On

62

failure, ``error_code`` and ``error_message`` describe what went wrong.

63

64

This type is the contract between the executor functions and the MCP

65

server's routing layer — do not bypass it with raw exceptions.

"""

ok: bool

data: dict[str, JSONValue] = field(default_factory=dict)

70

error_code: MusehubErrorCode | None = None

71

error_message: str | None = None

72

73

74

# ---------------------------------------------------------------------------

75

# Internal helpers

76

# ---------------------------------------------------------------------------

77

78

79

def _check_db_available() -> MusehubToolResult | None:

80

"""Return a ``db_unavailable`` error if the session factory is not ready.

81

82

The MCP stdio server runs outside the FastAPI lifespan, so ``init_db()``

83

may not have been called yet. Call this at the top of every executor that

84

opens a DB session so the caller receives a structured error instead of an

85

unhandled ``RuntimeError``.

86

"""

87

from musehub.db import database # local import to avoid circular reference

88

89

if database._async_session_factory is None:

90

return MusehubToolResult(

91

ok=False,

92

error_code="db_unavailable",

93

error_message=(

94

"Database session factory is not initialised. "

95

"Ensure DATABASE_URL is set and the service has started up."

),

)

return None

_EXTRA_MIME: dict[str, str] = {

102

".mid": "audio/midi",

103

".midi": "audio/midi",

104

".mp3": "audio/mpeg",

105

".webp": "image/webp",

}

def _mime_for_path(path: str) -> str:

110

"""Resolve MIME type from a file path extension."""

111

ext = os.path.splitext(path)[1].lower()

112

if ext in _EXTRA_MIME:

113

return _EXTRA_MIME[ext]

114

guessed, _ = mimetypes.guess_type(path)

115

return guessed or "application/octet-stream"

116

117

118

# ---------------------------------------------------------------------------

119

# Tool executors

120

# ---------------------------------------------------------------------------

121

122

123

async def execute_browse_repo(repo_id: str) -> MusehubToolResult:

124

"""Return repo metadata, branch list, and the 10 most recent commits.

125

126

This is the entry-point tool for orienting an agent before it calls more

127

specific tools. It aggregates three repository queries into one response

128

to minimise round-trips for the common "explore a new repo" case.

129

130

Args:

131

repo_id: UUID of the target MuseHub repository.

132

133

Returns:

134

``MusehubToolResult`` with ``data`` containing ``repo``, ``branches``,

135

and ``recent_commits`` keys, or ``error_code="not_found"`` if the

136

repo does not exist.

137

"""

138

if (err := _check_db_available()) is not None:

139

return err

140

141

async with AsyncSessionLocal() as session:

142

repo = await musehub_repository.get_repo(session, repo_id)

143

if repo is None:

144

return MusehubToolResult(

145

ok=False,

146

error_code="not_found",

147

error_message=f"Repository '{repo_id}' not found.",

148

)

149

150

branches = await musehub_repository.list_branches(session, repo_id)

151

commits, total = await musehub_repository.list_commits(

152

session, repo_id, limit=10

153

)

154

155

data: dict[str, JSONValue] = {

156

"repo": {

157

"repo_id": repo.repo_id,

158

"name": repo.name,

159

"visibility": repo.visibility,

160

"owner_user_id": repo.owner_user_id,

161

"clone_url": repo.clone_url,

162

"created_at": repo.created_at.isoformat(),

},

"branches": [

{

"branch_id": b.branch_id,

167

"name": b.name,

168

"head_commit_id": b.head_commit_id,

}

for b in branches

],

"recent_commits": [

{

"commit_id": c.commit_id,

175

"branch": c.branch,

176

"message": c.message,

177

"author": c.author,

178

"timestamp": c.timestamp.isoformat(),

}

for c in commits

],

"total_commits": total,

183

"branch_count": len(branches),

184

}

185

return MusehubToolResult(ok=True, data=data)

186

187

188

async def execute_list_branches(repo_id: str) -> MusehubToolResult:

189

"""Return all branches for a MuseHub repository.

190

191

Agents call this before ``execute_list_commits`` to discover available

192

branch names and their current head commit IDs.

193

194

Args:

195

repo_id: UUID of the target MuseHub repository.

196

197

Returns:

198

``MusehubToolResult`` with ``data.branches`` as a list of branch

199

dicts, or ``error_code="not_found"`` if the repo does not exist.

200

"""

201

if (err := _check_db_available()) is not None:

202

return err

203

204

async with AsyncSessionLocal() as session:

205

repo = await musehub_repository.get_repo(session, repo_id)

206

if repo is None:

207

return MusehubToolResult(

208

ok=False,

209

error_code="not_found",

210

error_message=f"Repository '{repo_id}' not found.",

211

)

212

213

branches = await musehub_repository.list_branches(session, repo_id)

214

data: dict[str, JSONValue] = {

"repo_id": repo_id,

"branches": [

{

"branch_id": b.branch_id,

219

"name": b.name,

220

"head_commit_id": b.head_commit_id,

}

for b in branches

],

"branch_count": len(branches),

225

}

226

return MusehubToolResult(ok=True, data=data)

227

228

229

async def execute_list_commits(

230

repo_id: str,

231

branch: str | None = None,

232

limit: int = 20,

233

) -> MusehubToolResult:

234

"""Return paginated commits for a MuseHub repository, newest first.

235

236

Args:

237

repo_id: UUID of the target MuseHub repository.

238

branch: Optional branch name filter; None returns across all branches.

239

limit: Maximum commits to return (clamped to 1–100).

240

241

Returns:

242

``MusehubToolResult`` with ``data.commits`` and ``data.total``,

243

or ``error_code="not_found"`` if the repo does not exist.

244

"""

245

limit = max(1, min(limit, 100))

246

247

if (err := _check_db_available()) is not None:

248

return err

249

250

async with AsyncSessionLocal() as session:

251

repo = await musehub_repository.get_repo(session, repo_id)

252

if repo is None:

253

return MusehubToolResult(

254

ok=False,

255

error_code="not_found",

256

error_message=f"Repository '{repo_id}' not found.",

257

)

258

259

commits, total = await musehub_repository.list_commits(

260

session, repo_id, branch=branch, limit=limit

261

)

262

263

commit_list: list[JSONValue] = []

264

for c in commits:

265

# parent_ids is list[str]; build list[JSONValue] explicitly (list invariance).

266

parent_ids_json: list[JSONValue] = []

267

for pid in c.parent_ids:

268

parent_ids_json.append(pid)

269

commit_list.append({

270

"commit_id": c.commit_id,

271

"branch": c.branch,

272

"parent_ids": parent_ids_json,

273

"message": c.message,

274

"author": c.author,

275

"timestamp": c.timestamp.isoformat(),

276

"snapshot_id": c.snapshot_id,

277

})

278

279

data: dict[str, JSONValue] = {

280

"repo_id": repo_id,

281

"branch_filter": branch,

282

"commits": commit_list,

283

"returned": len(commits),

284

"total": total,

285

}

286

return MusehubToolResult(ok=True, data=data)

287

288

289

async def execute_read_file(repo_id: str, object_id: str) -> MusehubToolResult:

290

"""Return metadata for a stored artifact in a MuseHub repo.

291

292

Returns path, size_bytes, mime_type, and object_id. Binary content is

293

intentionally excluded — MCP tool responses must be text-safe JSON.

294

Agents that need the raw bytes should use the HTTP objects endpoint.

295

296

Args:

297

repo_id: UUID of the target MuseHub repository.

298

object_id: Content-addressed ID (e.g. ``sha256:abc...``).

299

300

Returns:

301

``MusehubToolResult`` with file metadata, or ``error_code="not_found"``

302

if the repo or object does not exist.

303

"""

304

if (err := _check_db_available()) is not None:

305

return err

306

307

async with AsyncSessionLocal() as session:

308

repo = await musehub_repository.get_repo(session, repo_id)

309

if repo is None:

310

return MusehubToolResult(

311

ok=False,

312

error_code="not_found",

313

error_message=f"Repository '{repo_id}' not found.",

314

)

315

316

obj = await musehub_repository.get_object_row(session, repo_id, object_id)

317

if obj is None:

318

return MusehubToolResult(

319

ok=False,

320

error_code="not_found",

321

error_message=f"Object '{object_id}' not found in repository '{repo_id}'.",

322

)

323

324

data: dict[str, JSONValue] = {

325

"object_id": obj.object_id,

326

"repo_id": repo_id,

327

"path": obj.path,

328

"size_bytes": obj.size_bytes,

329

"mime_type": _mime_for_path(obj.path),

330

"created_at": obj.created_at.isoformat(),

331

}

332

return MusehubToolResult(ok=True, data=data)

333

334

335

async def execute_get_analysis(

336

repo_id: str,

337

dimension: str = "overview",

338

) -> MusehubToolResult:

339

"""Return structured analysis for a MuseHub repository.

340

341

Supported dimensions:

342

- ``overview`` — repo stats: branch count, commit count, object count,

343

most active author, most recent commit timestamp.

344

- ``commits`` — commit activity: total, per-branch breakdown, author

345

distribution, and a sample of the most recent messages.

346

- ``objects`` — artifact inventory: total size, per-MIME-type counts

347

and sizes, and a sample of object paths.

348

349

MIDI audio analysis (key, tempo, harmonic content) requires Storpheus

350

integration and is not yet available; those fields will be ``null``.

351

352

Args:

353

repo_id: UUID of the target MuseHub repository.

354

dimension: Analysis dimension — one of ``overview``, ``commits``,

``objects``.

Returns:

``MusehubToolResult`` with analysis data, or an error code on failure.

359

"""

360

valid_dimensions = {"overview", "commits", "objects"}

361

if dimension not in valid_dimensions:

362

return MusehubToolResult(

363

ok=False,

364

error_code="invalid_dimension",

365

error_message=(

366

f"Unknown dimension '{dimension}'. "

367

f"Valid values: {', '.join(sorted(valid_dimensions))}."

),

)

if (err := _check_db_available()) is not None:

372

return err

373

374

async with AsyncSessionLocal() as session:

375

repo = await musehub_repository.get_repo(session, repo_id)

376

if repo is None:

377

return MusehubToolResult(

378

ok=False,

379

error_code="not_found",

380

error_message=f"Repository '{repo_id}' not found.",

381

)

382

383

if dimension == "overview":

384

branches = await musehub_repository.list_branches(session, repo_id)

385

commits, total_commits = await musehub_repository.list_commits(

386

session, repo_id, limit=1

387

)

388

objects = await musehub_repository.list_objects(session, repo_id)

389

390

last_commit_at: JSONValue = None

391

most_recent_author: JSONValue = None

392

if commits:

393

last_commit_at = commits[0].timestamp.isoformat()

394

most_recent_author = commits[0].author

395

396

data: dict[str, JSONValue] = {

397

"repo_id": repo_id,

398

"dimension": "overview",

399

"repo_name": repo.name,

400

"visibility": repo.visibility,

401

"branch_count": len(branches),

402

"commit_count": total_commits,

403

"object_count": len(objects),

404

"last_commit_at": last_commit_at,

405

"most_recent_author": most_recent_author,

406

"midi_analysis": None,

407

}

408

return MusehubToolResult(ok=True, data=data)

409

410

if dimension == "commits":

411

all_commits, total = await musehub_repository.list_commits(

412

session, repo_id, limit=100

413

)

414

415

by_branch: dict[str, int] = {}

416

by_author: dict[str, int] = {}

417

for c in all_commits:

418

by_branch[c.branch] = by_branch.get(c.branch, 0) + 1

419

by_author[c.author] = by_author.get(c.author, 0) + 1

data = {

"repo_id": repo_id,

"dimension": "commits",

424

"total_commits": total,

425

"commits_in_sample": len(all_commits),

426

"by_branch": {k: v for k, v in by_branch.items()},

427

"by_author": {k: v for k, v in by_author.items()},

428

"recent_messages": [c.message for c in all_commits[:5]],

429

}

430

return MusehubToolResult(ok=True, data=data)

431

432

# dimension == "objects"

433

objects = await musehub_repository.list_objects(session, repo_id)

434

435

by_mime: dict[str, int] = {}

436

size_by_mime: dict[str, int] = {}

437

total_size = 0

438

for obj in objects:

439

mime = _mime_for_path(obj.path)

440

by_mime[mime] = by_mime.get(mime, 0) + 1

441

size_by_mime[mime] = size_by_mime.get(mime, 0) + obj.size_bytes

442

total_size += obj.size_bytes

data = {

"repo_id": repo_id,

"dimension": "objects",

447

"total_objects": len(objects),

448

"total_size_bytes": total_size,

449

"by_mime_type": {k: v for k, v in by_mime.items()},

450

"size_by_mime_type": {k: v for k, v in size_by_mime.items()},

451

"sample_paths": [obj.path for obj in objects[:10]],

452

}

453

return MusehubToolResult(ok=True, data=data)

454

455

456

async def execute_search(

repo_id: str,

query: str,

mode: str = "path",

) -> MusehubToolResult:

461

"""Search within a MuseHub repository by substring match.

462

463

Search is case-insensitive substring matching. Two modes are supported:

464

- ``path`` — matches object file paths (e.g. ``tracks/jazz_4b.mid``).

465

- ``commit`` — matches commit messages (e.g. ``add bass intro``).

466

467

The search operates on the full in-memory dataset (no DB-level LIKE query)

468

so results are consistent across database backends. For very large repos

469

(>10 k objects/commits) this may be slow — index-backed search is a

470

planned future enhancement.

471

472

Args:

473

repo_id: UUID of the target MuseHub repository.

474

query: Case-insensitive substring to search for.

475

mode: ``"path"`` or ``"commit"``.

476

477

Returns:

478

``MusehubToolResult`` with ``data.results`` list, or an error on failure.

479

"""

480

valid_modes = {"path", "commit"}

481

if mode not in valid_modes:

482

return MusehubToolResult(

483

ok=False,

484

error_code="invalid_mode",

485

error_message=(

486

f"Unknown search mode '{mode}'. "

487

f"Valid values: {', '.join(sorted(valid_modes))}."

),

)

if (err := _check_db_available()) is not None:

return err

q = query.lower()

async with AsyncSessionLocal() as session:

497

repo = await musehub_repository.get_repo(session, repo_id)

498

if repo is None:

499

return MusehubToolResult(

500

ok=False,

501

error_code="not_found",

502

error_message=f"Repository '{repo_id}' not found.",

)

if mode == "path":

objects = await musehub_repository.list_objects(session, repo_id)

507

results: list[JSONValue] = [

508

{

509

"object_id": obj.object_id,

510

"path": obj.path,

511

"size_bytes": obj.size_bytes,

512

"mime_type": _mime_for_path(obj.path),

513

}

514

for obj in objects

515

if q in obj.path.lower()

516

]

517

else: # mode == "commit"

518

commits, _ = await musehub_repository.list_commits(

519

session, repo_id, limit=100

)

results = [

{

"commit_id": c.commit_id,

524

"branch": c.branch,

525

"message": c.message,

526

"author": c.author,

527

"timestamp": c.timestamp.isoformat(),

528

}

529

for c in commits

530

if q in c.message.lower()

531

]

532

533

data: dict[str, JSONValue] = {

"repo_id": repo_id,

"query": query,

"mode": mode,

"result_count": len(results),

538

"results": results,

539

}

540

return MusehubToolResult(ok=True, data=data)

541

542

543

async def execute_get_context(repo_id: str) -> MusehubToolResult:

544

"""Return the full AI context document for a MuseHub repository.

545

546

This is the primary read-side interface for music generation agents.

547

It aggregates repo metadata, all branches, the 10 most recent commits

548

across all branches, and the full artifact inventory into a single

549

structured document — ready to paste into an agent's context window.

550

551

Feed this document to the agent before generating new music to ensure

552

harmonic and structural coherence with existing work in the repository.

553

554

Musical analysis fields (key, tempo, time_signature) are ``null`` until

555

Storpheus MIDI analysis integration is complete.

556

557

Args:

558

repo_id: UUID of the target MuseHub repository.

559

560

Returns:

561

``MusehubToolResult`` with ``data.context`` (the full context doc),

562

or ``error_code="not_found"`` if the repo does not exist.

563

"""

564

if (err := _check_db_available()) is not None:

565

return err

566

567

async with AsyncSessionLocal() as session:

568

repo = await musehub_repository.get_repo(session, repo_id)

569

if repo is None:

570

return MusehubToolResult(

571

ok=False,

572

error_code="not_found",

573

error_message=f"Repository '{repo_id}' not found.",

574

)

575

576

branches = await musehub_repository.list_branches(session, repo_id)

577

commits, total_commits = await musehub_repository.list_commits(

578

session, repo_id, limit=10

579

)

580

objects = await musehub_repository.list_objects(session, repo_id)

581

582

by_mime: dict[str, int] = {}

583

for obj in objects:

584

mime = _mime_for_path(obj.path)

585

by_mime[mime] = by_mime.get(mime, 0) + 1

586

587

context: dict[str, JSONValue] = {

588

"repo": {

589

"repo_id": repo.repo_id,

590

"name": repo.name,

591

"visibility": repo.visibility,

592

"owner_user_id": repo.owner_user_id,

593

"created_at": repo.created_at.isoformat(),

},

"branches": [

{

"name": b.name,

"head_commit_id": b.head_commit_id,

}

for b in branches

],

"recent_commits": [

{

"commit_id": c.commit_id,

605

"branch": c.branch,

606

"message": c.message,

607

"author": c.author,

608

"timestamp": c.timestamp.isoformat(),

}

for c in commits

],

"commit_stats": {

"total": total_commits,

614

"shown": len(commits),

615

},

616

"artifacts": {

617

"total_count": len(objects),

618

"by_mime_type": {k: v for k, v in by_mime.items()},

619

"paths": [obj.path for obj in objects],

620

},

621

"musical_analysis": {

622

"key": None,

623

"tempo": None,

624

"time_signature": None,

625

"note": (

626

"Musical analysis requires Storpheus MIDI integration "

627

"(not yet available — fields will be populated in a future release)."

),

},

}

data: dict[str, JSONValue] = {

"repo_id": repo_id,

"context": context,

}

return MusehubToolResult(ok=True, data=data)