gabriel / musehub public
musehub_mcp_executor.py python
636 lines 22.5 KB
e6fad116 Remove all Stori, Maestro, and AgentCeption references; rebrand to Muse VCS Gabriel Cardona <gabriel@tellurstori.com> 6d ago
1 """MuseHub MCP tool executor — server-side logic for all musehub_* MCP tools.
2
3 This module is the execution backend for MuseHub browsing tools exposed via
4 MCP. Each public function opens its own DB session via ``AsyncSessionLocal``,
5 delegates to ``musehub_repository`` for persistence access, and returns a
6 typed ``MusehubToolResult``.
7
8 Design contract
9 ---------------
10 - All functions are async and return ``MusehubToolResult`` on both success
11 and failure (no exceptions propagate to the MCP server).
12 - ``MusehubToolResult.ok`` distinguishes success from failure.
13 - ``MusehubToolResult.error_code`` is one of: ``"not_found"``,
14 ``"invalid_dimension"``, ``"invalid_mode"``, ``"db_unavailable"``.
15 - Callers (``MuseMCPServer._execute_musehub_tool``) pattern-match on
16 these codes to build appropriate ``MCPContentBlock`` responses.
17 - This module must NOT import MCP protocol types — it is pure service layer.
18
19 ``AsyncSessionLocal`` is imported at module level so tests can patch it as
20 ``musehub.services.musehub_mcp_executor.AsyncSessionLocal``.
21 """
22 from __future__ import annotations
23
24 import logging
25 import mimetypes
26 import os
27 from dataclasses import dataclass, field
28 from typing import Literal
29
30 from musehub.contracts.json_types import JSONValue
31 from musehub.db.database import AsyncSessionLocal
32 from musehub.services import musehub_repository
33
34 logger = logging.getLogger(__name__)
35
36 # ---------------------------------------------------------------------------
37 # Result types
38 # ---------------------------------------------------------------------------
39
40 MusehubErrorCode = Literal[
41 "not_found",
42 "invalid_dimension",
43 "invalid_mode",
44 "db_unavailable",
45 ]
46 """Enumeration of error codes returned by MuseHub MCP executors.
47
48 Callers pattern-match on these to build appropriate error messages:
49 - ``not_found`` — repo or object does not exist
50 - ``invalid_dimension`` — unrecognised analysis dimension
51 - ``invalid_mode`` — unrecognised search mode
52 - ``db_unavailable`` — DB session factory not initialised (startup race)
53 """
54
55
56 @dataclass(frozen=True)
57 class MusehubToolResult:
58 """Result of executing a single musehub_* MCP tool.
59
60 ``ok`` is the primary success/failure signal. On success, ``data``
61 holds the JSON-serialisable payload for the MCP content block. On
62 failure, ``error_code`` and ``error_message`` describe what went wrong.
63
64 This type is the contract between the executor functions and the MCP
65 server's routing layer — do not bypass it with raw exceptions.
66 """
67
68 ok: bool
69 data: dict[str, JSONValue] = field(default_factory=dict)
70 error_code: MusehubErrorCode | None = None
71 error_message: str | None = None
72
73
74 # ---------------------------------------------------------------------------
75 # Internal helpers
76 # ---------------------------------------------------------------------------
77
78
79 def _check_db_available() -> MusehubToolResult | None:
80 """Return a ``db_unavailable`` error if the session factory is not ready.
81
82 The MCP stdio server runs outside the FastAPI lifespan, so ``init_db()``
83 may not have been called yet. Call this at the top of every executor that
84 opens a DB session so the caller receives a structured error instead of an
85 unhandled ``RuntimeError``.
86 """
87 from musehub.db import database # local import to avoid circular reference
88
89 if database._async_session_factory is None:
90 return MusehubToolResult(
91 ok=False,
92 error_code="db_unavailable",
93 error_message=(
94 "Database session factory is not initialised. "
95 "Ensure DATABASE_URL is set and the service has started up."
96 ),
97 )
98 return None
99
100
101 _EXTRA_MIME: dict[str, str] = {
102 ".mid": "audio/midi",
103 ".midi": "audio/midi",
104 ".mp3": "audio/mpeg",
105 ".webp": "image/webp",
106 }
107
108
109 def _mime_for_path(path: str) -> str:
110 """Resolve MIME type from a file path extension."""
111 ext = os.path.splitext(path)[1].lower()
112 if ext in _EXTRA_MIME:
113 return _EXTRA_MIME[ext]
114 guessed, _ = mimetypes.guess_type(path)
115 return guessed or "application/octet-stream"
116
117
118 # ---------------------------------------------------------------------------
119 # Tool executors
120 # ---------------------------------------------------------------------------
121
122
123 async def execute_browse_repo(repo_id: str) -> MusehubToolResult:
124 """Return repo metadata, branch list, and the 10 most recent commits.
125
126 This is the entry-point tool for orienting an agent before it calls more
127 specific tools. It aggregates three repository queries into one response
128 to minimise round-trips for the common "explore a new repo" case.
129
130 Args:
131 repo_id: UUID of the target MuseHub repository.
132
133 Returns:
134 ``MusehubToolResult`` with ``data`` containing ``repo``, ``branches``,
135 and ``recent_commits`` keys, or ``error_code="not_found"`` if the
136 repo does not exist.
137 """
138 if (err := _check_db_available()) is not None:
139 return err
140
141 async with AsyncSessionLocal() as session:
142 repo = await musehub_repository.get_repo(session, repo_id)
143 if repo is None:
144 return MusehubToolResult(
145 ok=False,
146 error_code="not_found",
147 error_message=f"Repository '{repo_id}' not found.",
148 )
149
150 branches = await musehub_repository.list_branches(session, repo_id)
151 commits, total = await musehub_repository.list_commits(
152 session, repo_id, limit=10
153 )
154
155 data: dict[str, JSONValue] = {
156 "repo": {
157 "repo_id": repo.repo_id,
158 "name": repo.name,
159 "visibility": repo.visibility,
160 "owner_user_id": repo.owner_user_id,
161 "clone_url": repo.clone_url,
162 "created_at": repo.created_at.isoformat(),
163 },
164 "branches": [
165 {
166 "branch_id": b.branch_id,
167 "name": b.name,
168 "head_commit_id": b.head_commit_id,
169 }
170 for b in branches
171 ],
172 "recent_commits": [
173 {
174 "commit_id": c.commit_id,
175 "branch": c.branch,
176 "message": c.message,
177 "author": c.author,
178 "timestamp": c.timestamp.isoformat(),
179 }
180 for c in commits
181 ],
182 "total_commits": total,
183 "branch_count": len(branches),
184 }
185 return MusehubToolResult(ok=True, data=data)
186
187
188 async def execute_list_branches(repo_id: str) -> MusehubToolResult:
189 """Return all branches for a MuseHub repository.
190
191 Agents call this before ``execute_list_commits`` to discover available
192 branch names and their current head commit IDs.
193
194 Args:
195 repo_id: UUID of the target MuseHub repository.
196
197 Returns:
198 ``MusehubToolResult`` with ``data.branches`` as a list of branch
199 dicts, or ``error_code="not_found"`` if the repo does not exist.
200 """
201 if (err := _check_db_available()) is not None:
202 return err
203
204 async with AsyncSessionLocal() as session:
205 repo = await musehub_repository.get_repo(session, repo_id)
206 if repo is None:
207 return MusehubToolResult(
208 ok=False,
209 error_code="not_found",
210 error_message=f"Repository '{repo_id}' not found.",
211 )
212
213 branches = await musehub_repository.list_branches(session, repo_id)
214 data: dict[str, JSONValue] = {
215 "repo_id": repo_id,
216 "branches": [
217 {
218 "branch_id": b.branch_id,
219 "name": b.name,
220 "head_commit_id": b.head_commit_id,
221 }
222 for b in branches
223 ],
224 "branch_count": len(branches),
225 }
226 return MusehubToolResult(ok=True, data=data)
227
228
229 async def execute_list_commits(
230 repo_id: str,
231 branch: str | None = None,
232 limit: int = 20,
233 ) -> MusehubToolResult:
234 """Return paginated commits for a MuseHub repository, newest first.
235
236 Args:
237 repo_id: UUID of the target MuseHub repository.
238 branch: Optional branch name filter; None returns across all branches.
239 limit: Maximum commits to return (clamped to 1–100).
240
241 Returns:
242 ``MusehubToolResult`` with ``data.commits`` and ``data.total``,
243 or ``error_code="not_found"`` if the repo does not exist.
244 """
245 limit = max(1, min(limit, 100))
246
247 if (err := _check_db_available()) is not None:
248 return err
249
250 async with AsyncSessionLocal() as session:
251 repo = await musehub_repository.get_repo(session, repo_id)
252 if repo is None:
253 return MusehubToolResult(
254 ok=False,
255 error_code="not_found",
256 error_message=f"Repository '{repo_id}' not found.",
257 )
258
259 commits, total = await musehub_repository.list_commits(
260 session, repo_id, branch=branch, limit=limit
261 )
262
263 commit_list: list[JSONValue] = []
264 for c in commits:
265 # parent_ids is list[str]; build list[JSONValue] explicitly (list invariance).
266 parent_ids_json: list[JSONValue] = []
267 for pid in c.parent_ids:
268 parent_ids_json.append(pid)
269 commit_list.append({
270 "commit_id": c.commit_id,
271 "branch": c.branch,
272 "parent_ids": parent_ids_json,
273 "message": c.message,
274 "author": c.author,
275 "timestamp": c.timestamp.isoformat(),
276 "snapshot_id": c.snapshot_id,
277 })
278
279 data: dict[str, JSONValue] = {
280 "repo_id": repo_id,
281 "branch_filter": branch,
282 "commits": commit_list,
283 "returned": len(commits),
284 "total": total,
285 }
286 return MusehubToolResult(ok=True, data=data)
287
288
289 async def execute_read_file(repo_id: str, object_id: str) -> MusehubToolResult:
290 """Return metadata for a stored artifact in a MuseHub repo.
291
292 Returns path, size_bytes, mime_type, and object_id. Binary content is
293 intentionally excluded — MCP tool responses must be text-safe JSON.
294 Agents that need the raw bytes should use the HTTP objects endpoint.
295
296 Args:
297 repo_id: UUID of the target MuseHub repository.
298 object_id: Content-addressed ID (e.g. ``sha256:abc...``).
299
300 Returns:
301 ``MusehubToolResult`` with file metadata, or ``error_code="not_found"``
302 if the repo or object does not exist.
303 """
304 if (err := _check_db_available()) is not None:
305 return err
306
307 async with AsyncSessionLocal() as session:
308 repo = await musehub_repository.get_repo(session, repo_id)
309 if repo is None:
310 return MusehubToolResult(
311 ok=False,
312 error_code="not_found",
313 error_message=f"Repository '{repo_id}' not found.",
314 )
315
316 obj = await musehub_repository.get_object_row(session, repo_id, object_id)
317 if obj is None:
318 return MusehubToolResult(
319 ok=False,
320 error_code="not_found",
321 error_message=f"Object '{object_id}' not found in repository '{repo_id}'.",
322 )
323
324 data: dict[str, JSONValue] = {
325 "object_id": obj.object_id,
326 "repo_id": repo_id,
327 "path": obj.path,
328 "size_bytes": obj.size_bytes,
329 "mime_type": _mime_for_path(obj.path),
330 "created_at": obj.created_at.isoformat(),
331 }
332 return MusehubToolResult(ok=True, data=data)
333
334
335 async def execute_get_analysis(
336 repo_id: str,
337 dimension: str = "overview",
338 ) -> MusehubToolResult:
339 """Return structured analysis for a MuseHub repository.
340
341 Supported dimensions:
342 - ``overview`` — repo stats: branch count, commit count, object count,
343 most active author, most recent commit timestamp.
344 - ``commits`` — commit activity: total, per-branch breakdown, author
345 distribution, and a sample of the most recent messages.
346 - ``objects`` — artifact inventory: total size, per-MIME-type counts
347 and sizes, and a sample of object paths.
348
349 MIDI audio analysis (key, tempo, harmonic content) requires Storpheus
350 integration and is not yet available; those fields will be ``null``.
351
352 Args:
353 repo_id: UUID of the target MuseHub repository.
354 dimension: Analysis dimension — one of ``overview``, ``commits``,
355 ``objects``.
356
357 Returns:
358 ``MusehubToolResult`` with analysis data, or an error code on failure.
359 """
360 valid_dimensions = {"overview", "commits", "objects"}
361 if dimension not in valid_dimensions:
362 return MusehubToolResult(
363 ok=False,
364 error_code="invalid_dimension",
365 error_message=(
366 f"Unknown dimension '{dimension}'. "
367 f"Valid values: {', '.join(sorted(valid_dimensions))}."
368 ),
369 )
370
371 if (err := _check_db_available()) is not None:
372 return err
373
374 async with AsyncSessionLocal() as session:
375 repo = await musehub_repository.get_repo(session, repo_id)
376 if repo is None:
377 return MusehubToolResult(
378 ok=False,
379 error_code="not_found",
380 error_message=f"Repository '{repo_id}' not found.",
381 )
382
383 if dimension == "overview":
384 branches = await musehub_repository.list_branches(session, repo_id)
385 commits, total_commits = await musehub_repository.list_commits(
386 session, repo_id, limit=1
387 )
388 objects = await musehub_repository.list_objects(session, repo_id)
389
390 last_commit_at: JSONValue = None
391 most_recent_author: JSONValue = None
392 if commits:
393 last_commit_at = commits[0].timestamp.isoformat()
394 most_recent_author = commits[0].author
395
396 data: dict[str, JSONValue] = {
397 "repo_id": repo_id,
398 "dimension": "overview",
399 "repo_name": repo.name,
400 "visibility": repo.visibility,
401 "branch_count": len(branches),
402 "commit_count": total_commits,
403 "object_count": len(objects),
404 "last_commit_at": last_commit_at,
405 "most_recent_author": most_recent_author,
406 "midi_analysis": None,
407 }
408 return MusehubToolResult(ok=True, data=data)
409
410 if dimension == "commits":
411 all_commits, total = await musehub_repository.list_commits(
412 session, repo_id, limit=100
413 )
414
415 by_branch: dict[str, int] = {}
416 by_author: dict[str, int] = {}
417 for c in all_commits:
418 by_branch[c.branch] = by_branch.get(c.branch, 0) + 1
419 by_author[c.author] = by_author.get(c.author, 0) + 1
420
421 data = {
422 "repo_id": repo_id,
423 "dimension": "commits",
424 "total_commits": total,
425 "commits_in_sample": len(all_commits),
426 "by_branch": {k: v for k, v in by_branch.items()},
427 "by_author": {k: v for k, v in by_author.items()},
428 "recent_messages": [c.message for c in all_commits[:5]],
429 }
430 return MusehubToolResult(ok=True, data=data)
431
432 # dimension == "objects"
433 objects = await musehub_repository.list_objects(session, repo_id)
434
435 by_mime: dict[str, int] = {}
436 size_by_mime: dict[str, int] = {}
437 total_size = 0
438 for obj in objects:
439 mime = _mime_for_path(obj.path)
440 by_mime[mime] = by_mime.get(mime, 0) + 1
441 size_by_mime[mime] = size_by_mime.get(mime, 0) + obj.size_bytes
442 total_size += obj.size_bytes
443
444 data = {
445 "repo_id": repo_id,
446 "dimension": "objects",
447 "total_objects": len(objects),
448 "total_size_bytes": total_size,
449 "by_mime_type": {k: v for k, v in by_mime.items()},
450 "size_by_mime_type": {k: v for k, v in size_by_mime.items()},
451 "sample_paths": [obj.path for obj in objects[:10]],
452 }
453 return MusehubToolResult(ok=True, data=data)
454
455
456 async def execute_search(
457 repo_id: str,
458 query: str,
459 mode: str = "path",
460 ) -> MusehubToolResult:
461 """Search within a MuseHub repository by substring match.
462
463 Search is case-insensitive substring matching. Two modes are supported:
464 - ``path`` — matches object file paths (e.g. ``tracks/jazz_4b.mid``).
465 - ``commit`` — matches commit messages (e.g. ``add bass intro``).
466
467 The search operates on the full in-memory dataset (no DB-level LIKE query)
468 so results are consistent across database backends. For very large repos
469 (>10 k objects/commits) this may be slow — index-backed search is a
470 planned future enhancement.
471
472 Args:
473 repo_id: UUID of the target MuseHub repository.
474 query: Case-insensitive substring to search for.
475 mode: ``"path"`` or ``"commit"``.
476
477 Returns:
478 ``MusehubToolResult`` with ``data.results`` list, or an error on failure.
479 """
480 valid_modes = {"path", "commit"}
481 if mode not in valid_modes:
482 return MusehubToolResult(
483 ok=False,
484 error_code="invalid_mode",
485 error_message=(
486 f"Unknown search mode '{mode}'. "
487 f"Valid values: {', '.join(sorted(valid_modes))}."
488 ),
489 )
490
491 if (err := _check_db_available()) is not None:
492 return err
493
494 q = query.lower()
495
496 async with AsyncSessionLocal() as session:
497 repo = await musehub_repository.get_repo(session, repo_id)
498 if repo is None:
499 return MusehubToolResult(
500 ok=False,
501 error_code="not_found",
502 error_message=f"Repository '{repo_id}' not found.",
503 )
504
505 if mode == "path":
506 objects = await musehub_repository.list_objects(session, repo_id)
507 results: list[JSONValue] = [
508 {
509 "object_id": obj.object_id,
510 "path": obj.path,
511 "size_bytes": obj.size_bytes,
512 "mime_type": _mime_for_path(obj.path),
513 }
514 for obj in objects
515 if q in obj.path.lower()
516 ]
517 else: # mode == "commit"
518 commits, _ = await musehub_repository.list_commits(
519 session, repo_id, limit=100
520 )
521 results = [
522 {
523 "commit_id": c.commit_id,
524 "branch": c.branch,
525 "message": c.message,
526 "author": c.author,
527 "timestamp": c.timestamp.isoformat(),
528 }
529 for c in commits
530 if q in c.message.lower()
531 ]
532
533 data: dict[str, JSONValue] = {
534 "repo_id": repo_id,
535 "query": query,
536 "mode": mode,
537 "result_count": len(results),
538 "results": results,
539 }
540 return MusehubToolResult(ok=True, data=data)
541
542
543 async def execute_get_context(repo_id: str) -> MusehubToolResult:
544 """Return the full AI context document for a MuseHub repository.
545
546 This is the primary read-side interface for music generation agents.
547 It aggregates repo metadata, all branches, the 10 most recent commits
548 across all branches, and the full artifact inventory into a single
549 structured document — ready to paste into an agent's context window.
550
551 Feed this document to the agent before generating new music to ensure
552 harmonic and structural coherence with existing work in the repository.
553
554 Musical analysis fields (key, tempo, time_signature) are ``null`` until
555 Storpheus MIDI analysis integration is complete.
556
557 Args:
558 repo_id: UUID of the target MuseHub repository.
559
560 Returns:
561 ``MusehubToolResult`` with ``data.context`` (the full context doc),
562 or ``error_code="not_found"`` if the repo does not exist.
563 """
564 if (err := _check_db_available()) is not None:
565 return err
566
567 async with AsyncSessionLocal() as session:
568 repo = await musehub_repository.get_repo(session, repo_id)
569 if repo is None:
570 return MusehubToolResult(
571 ok=False,
572 error_code="not_found",
573 error_message=f"Repository '{repo_id}' not found.",
574 )
575
576 branches = await musehub_repository.list_branches(session, repo_id)
577 commits, total_commits = await musehub_repository.list_commits(
578 session, repo_id, limit=10
579 )
580 objects = await musehub_repository.list_objects(session, repo_id)
581
582 by_mime: dict[str, int] = {}
583 for obj in objects:
584 mime = _mime_for_path(obj.path)
585 by_mime[mime] = by_mime.get(mime, 0) + 1
586
587 context: dict[str, JSONValue] = {
588 "repo": {
589 "repo_id": repo.repo_id,
590 "name": repo.name,
591 "visibility": repo.visibility,
592 "owner_user_id": repo.owner_user_id,
593 "created_at": repo.created_at.isoformat(),
594 },
595 "branches": [
596 {
597 "name": b.name,
598 "head_commit_id": b.head_commit_id,
599 }
600 for b in branches
601 ],
602 "recent_commits": [
603 {
604 "commit_id": c.commit_id,
605 "branch": c.branch,
606 "message": c.message,
607 "author": c.author,
608 "timestamp": c.timestamp.isoformat(),
609 }
610 for c in commits
611 ],
612 "commit_stats": {
613 "total": total_commits,
614 "shown": len(commits),
615 },
616 "artifacts": {
617 "total_count": len(objects),
618 "by_mime_type": {k: v for k, v in by_mime.items()},
619 "paths": [obj.path for obj in objects],
620 },
621 "musical_analysis": {
622 "key": None,
623 "tempo": None,
624 "time_signature": None,
625 "note": (
626 "Musical analysis requires Storpheus MIDI integration "
627 "(not yet available — fields will be populated in a future release)."
628 ),
629 },
630 }
631
632 data: dict[str, JSONValue] = {
633 "repo_id": repo_id,
634 "context": context,
635 }
636 return MusehubToolResult(ok=True, data=data)