gabriel / muse public
plugin.py python
760 lines 28.5 KB
59a915a4 refactor: repo root is the working tree — remove state/ subdirectory Gabriel Cardona <gabriel@tellurstori.com> 4d ago
1 """Code domain plugin — semantic version control for source code.
2
3 This plugin implements :class:`~muse.domain.MuseDomainPlugin` and
4 :class:`~muse.domain.StructuredMergePlugin` for software repositories.
5
6 Philosophy
7 ----------
8 Git models files as sequences of lines. The code plugin models them as
9 **collections of named symbols** — functions, classes, methods, variables.
10 Two commits that only reformat a Python file (no semantic change) produce
11 identical symbol ``content_id`` values and therefore *no* structured delta.
12 Two commits that rename a function produce a ``ReplaceOp`` annotated
13 ``"renamed to bar"`` rather than a red/green line diff.
14
15 Live State
16 ----------
17 ``LiveState`` is either a ``pathlib.Path`` pointing to the repository root or a
18 ``SnapshotManifest`` dict. The path form is used by the CLI; the dict form
19 is used by in-memory merge and diff operations.
20
21 Snapshot Format
22 ---------------
23 A code snapshot is a ``SnapshotManifest``:
24
25 .. code-block:: json
26
27 {
28 "files": {
29 "src/utils.py": "<sha256-of-raw-bytes>",
30 "README.md": "<sha256-of-raw-bytes>"
31 },
32 "domain": "code"
33 }
34
35 The ``files`` values are **raw-bytes SHA-256 hashes** (not AST hashes).
36 This ensures the object store can correctly restore files verbatim on
37 ``muse checkout``. Semantic identity (AST-based hashing) is used only
38 inside ``diff()`` when constructing the structured delta.
39
40 Delta Format
41 ------------
42 ``diff()`` returns a ``StructuredDelta``. For Python files (and other
43 languages with adapters) it produces ``PatchOp`` entries whose ``child_ops``
44 carry symbol-level operations:
45
46 - ``InsertOp`` — a symbol was added (address ``"src/utils.py::my_func"``).
47 - ``DeleteOp`` — a symbol was removed.
48 - ``ReplaceOp`` — a symbol changed. The ``new_summary`` field describes the
49 change: ``"renamed to bar"``, ``"implementation changed"``, etc.
50
51 Non-Python files produce coarse ``InsertOp`` / ``DeleteOp`` / ``ReplaceOp``
52 at the file level.
53
54 Merge Semantics
55 ---------------
56 The plugin implements :class:`~muse.domain.StructuredMergePlugin` so that
57 OT-aware merges detect conflicts at *symbol* granularity:
58
59 - Agent A modifies ``foo()`` and Agent B modifies ``bar()`` in the same
60 file → **auto-merge** (ops commute).
61 - Both agents modify ``foo()`` → **symbol-level conflict** at address
62 ``"src/utils.py::foo"`` rather than a coarse file conflict.
63
64 Schema
65 ------
66 The code domain schema declares five dimensions:
67
68 ``structure``
69 The module/file tree — ``TreeSchema`` with GumTree diff.
70
71 ``symbols``
72 The AST symbol tree — ``TreeSchema`` with GumTree diff.
73
74 ``imports``
75 The import set — ``SetSchema`` with ``by_content`` identity.
76
77 ``variables``
78 Top-level variable assignments — ``SetSchema``.
79
80 ``metadata``
81 Configuration and non-code files — ``SetSchema``.
82 """
83
84 from __future__ import annotations
85
86 import hashlib
87 import logging
88 import pathlib
89
90 from muse.core.attributes import load_attributes, resolve_strategy
91 from muse.core.diff_algorithms import snapshot_diff
92 from muse.core.ignore import is_ignored, load_ignore_config, resolve_patterns
93 from muse.core.object_store import read_object
94 from muse.core.op_transform import merge_op_lists, ops_commute
95 from muse.core.schema import (
96 DimensionSpec,
97 DomainSchema,
98 SetSchema,
99 TreeSchema,
100 )
101 from muse.domain import (
102 DeleteOp,
103 DomainOp,
104 DriftReport,
105 InsertOp,
106 LiveState,
107 MergeResult,
108 PatchOp,
109 ReplaceOp,
110 SnapshotManifest,
111 StateDelta,
112 StateSnapshot,
113 StructuredDelta,
114 )
115 from muse.plugins.code.ast_parser import (
116 SymbolTree,
117 adapter_for_path,
118 parse_symbols,
119 )
120 from muse.plugins.code.symbol_diff import (
121 build_diff_ops,
122 delta_summary,
123 )
124
125 logger = logging.getLogger(__name__)
126
127 _DOMAIN_NAME = "code"
128
129 # Directories that are never versioned regardless of .museignore.
130 # These are implicit ignores that apply to all code repositories.
131 _ALWAYS_IGNORE_DIRS: frozenset[str] = frozenset({
132 ".git",
133 ".muse",
134 "__pycache__",
135 ".mypy_cache",
136 ".pytest_cache",
137 ".ruff_cache",
138 "node_modules",
139 ".venv",
140 "venv",
141 ".tox",
142 "dist",
143 "build",
144 ".eggs",
145 ".DS_Store",
146 })
147
148
149 class CodePlugin:
150 """Muse domain plugin for software source code repositories.
151
152 Implements all six core protocol methods plus the optional
153 :class:`~muse.domain.StructuredMergePlugin` OT extension. The plugin
154 does not implement :class:`~muse.domain.CRDTPlugin` — source code is
155 human-authored and benefits from explicit conflict resolution rather
156 than automatic convergence.
157
158 The plugin is stateless. The module-level singleton :data:`plugin` is
159 the standard entry point.
160 """
161
162 # ------------------------------------------------------------------
163 # 1. snapshot
164 # ------------------------------------------------------------------
165
166 def snapshot(self, live_state: LiveState) -> StateSnapshot:
167 """Capture the current working tree as a snapshot dict.
168
169 Walks all regular files under *live_state*, hashing each one with
170 SHA-256 (raw bytes). Honours ``.museignore`` and always ignores
171 known tool-generated directories (``__pycache__``, ``.git``, etc.).
172
173 Args:
174 live_state: A ``pathlib.Path`` pointing to the repository root, or an
175 existing ``SnapshotManifest`` dict (returned as-is).
176
177 Returns:
178 A ``SnapshotManifest`` mapping workspace-relative POSIX paths to
179 their SHA-256 raw-bytes digests.
180 """
181 if not isinstance(live_state, pathlib.Path):
182 return live_state
183
184 workdir = live_state
185 # workdir IS the repository root; .museignore lives here.
186 repo_root = workdir
187 patterns = resolve_patterns(load_ignore_config(repo_root), _DOMAIN_NAME)
188
189 files: dict[str, str] = {}
190 for p in sorted(workdir.rglob("*")):
191 if not p.is_file():
192 continue
193 rel_parts = p.relative_to(workdir).parts
194 # Skip hidden files and files inside hidden directories (e.g. .muse/).
195 if any(part.startswith(".") for part in rel_parts):
196 continue
197 # Skip always-ignored directories by checking path parts.
198 if any(part in _ALWAYS_IGNORE_DIRS for part in p.parts):
199 continue
200 rel = p.relative_to(workdir).as_posix()
201 if is_ignored(rel, patterns):
202 continue
203 files[rel] = _hash_file(p)
204
205 return SnapshotManifest(files=files, domain=_DOMAIN_NAME)
206
207 # ------------------------------------------------------------------
208 # 2. diff
209 # ------------------------------------------------------------------
210
211 def diff(
212 self,
213 base: StateSnapshot,
214 target: StateSnapshot,
215 *,
216 repo_root: pathlib.Path | None = None,
217 ) -> StateDelta:
218 """Compute the structured delta between two snapshots.
219
220 Without ``repo_root``
221 Produces coarse file-level ops (``InsertOp`` / ``DeleteOp`` /
222 ``ReplaceOp``). Used by ``muse checkout`` which only needs file
223 paths.
224
225 With ``repo_root``
226 Reads source bytes from the object store, parses AST for
227 supported languages (Python), and produces ``PatchOp`` entries
228 with symbol-level ``child_ops``. Used by ``muse commit`` (to
229 store the structured delta) and ``muse show`` / ``muse diff``.
230
231 Args:
232 base: Base snapshot (older state).
233 target: Target snapshot (newer state).
234 repo_root: Repository root for object-store access and symbol
235 extraction. ``None`` → file-level ops only.
236
237 Returns:
238 A ``StructuredDelta`` with ``domain="code"``.
239 """
240 base_files = base["files"]
241 target_files = target["files"]
242
243 if repo_root is None:
244 # snapshot_diff provides the free file-level diff promised by the
245 # DomainSchema architecture: any plugin that declares a schema can
246 # call this instead of writing file-set algebra from scratch.
247 return snapshot_diff(self.schema(), base, target)
248
249 ops = _semantic_ops(base_files, target_files, repo_root)
250 summary = delta_summary(ops)
251 return StructuredDelta(domain=_DOMAIN_NAME, ops=ops, summary=summary)
252
253 # ------------------------------------------------------------------
254 # 3. merge
255 # ------------------------------------------------------------------
256
257 def merge(
258 self,
259 base: StateSnapshot,
260 left: StateSnapshot,
261 right: StateSnapshot,
262 *,
263 repo_root: pathlib.Path | None = None,
264 ) -> MergeResult:
265 """Three-way merge at file granularity, respecting ``.museattributes``.
266
267 Standard three-way logic, augmented by per-path strategy overrides
268 declared in ``.museattributes``:
269
270 - Both sides agree → consensus wins (including both deleted).
271 - Only one side changed → take that side.
272 - Both sides changed differently → consult ``.museattributes``:
273
274 - ``ours`` — take left; remove from conflict list.
275 - ``theirs`` — take right; remove from conflict list.
276 - ``base`` — revert to the common ancestor; remove from conflicts.
277 - ``union`` — keep all additions from both sides; prefer left for
278 conflicting blobs; remove from conflict list.
279 - ``manual`` — force into conflict list regardless of auto resolution.
280 - ``auto`` — default three-way conflict.
281
282 This is the fallback used by ``muse cherry-pick`` and contexts where
283 the OT merge path is not available. :meth:`merge_ops` provides
284 symbol-level conflict detection when both sides have structured deltas.
285
286 Args:
287 base: Common ancestor snapshot.
288 left: Our branch snapshot.
289 right: Their branch snapshot.
290 repo_root: Repository root; when provided, ``.museattributes`` is
291 consulted for per-path strategy overrides.
292
293 Returns:
294 A ``MergeResult`` with the reconciled snapshot, any file-level
295 conflicts, and ``applied_strategies`` recording which rules fired.
296 """
297 attrs = load_attributes(repo_root, domain=_DOMAIN_NAME) if repo_root else []
298
299 base_files = base["files"]
300 left_files = left["files"]
301 right_files = right["files"]
302
303 merged: dict[str, str] = dict(base_files)
304 conflicts: list[str] = []
305 applied_strategies: dict[str, str] = {}
306
307 all_paths = set(base_files) | set(left_files) | set(right_files)
308 for path in sorted(all_paths):
309 b = base_files.get(path)
310 l = left_files.get(path)
311 r = right_files.get(path)
312
313 if l == r:
314 # Both sides agree — or both deleted.
315 if l is None:
316 merged.pop(path, None)
317 else:
318 merged[path] = l
319 # Honour "manual" override even on clean paths.
320 if attrs and resolve_strategy(attrs, path) == "manual":
321 conflicts.append(path)
322 applied_strategies[path] = "manual"
323 elif b == l:
324 # Only right changed.
325 if r is None:
326 merged.pop(path, None)
327 else:
328 merged[path] = r
329 if attrs and resolve_strategy(attrs, path) == "manual":
330 conflicts.append(path)
331 applied_strategies[path] = "manual"
332 elif b == r:
333 # Only left changed.
334 if l is None:
335 merged.pop(path, None)
336 else:
337 merged[path] = l
338 if attrs and resolve_strategy(attrs, path) == "manual":
339 conflicts.append(path)
340 applied_strategies[path] = "manual"
341 else:
342 # Both sides changed differently — consult attributes.
343 strategy = resolve_strategy(attrs, path) if attrs else "auto"
344 if strategy == "ours":
345 merged[path] = l or b or ""
346 applied_strategies[path] = "ours"
347 elif strategy == "theirs":
348 merged[path] = r or b or ""
349 applied_strategies[path] = "theirs"
350 elif strategy == "base":
351 if b is None:
352 merged.pop(path, None)
353 else:
354 merged[path] = b
355 applied_strategies[path] = "base"
356 elif strategy == "union":
357 # For file-level blobs, full union is not representable —
358 # prefer left and keep all additions from both branches.
359 merged[path] = l or r or b or ""
360 applied_strategies[path] = "union"
361 elif strategy == "manual":
362 conflicts.append(path)
363 merged[path] = l or r or b or ""
364 applied_strategies[path] = "manual"
365 else:
366 # "auto" — standard three-way conflict.
367 conflicts.append(path)
368 merged[path] = l or r or b or ""
369
370 return MergeResult(
371 merged=SnapshotManifest(files=merged, domain=_DOMAIN_NAME),
372 conflicts=conflicts,
373 applied_strategies=applied_strategies,
374 )
375
376 # ------------------------------------------------------------------
377 # 4. drift
378 # ------------------------------------------------------------------
379
380 def drift(self, committed: StateSnapshot, live: LiveState) -> DriftReport:
381 """Report how much the working tree has drifted from the last commit.
382
383 Called by ``muse status``. Takes a snapshot of the current live
384 state and diffs it against the committed snapshot.
385
386 Args:
387 committed: The last committed snapshot.
388 live: Current live state (path or snapshot manifest).
389
390 Returns:
391 A ``DriftReport`` describing what has changed since the last commit.
392 """
393 current = self.snapshot(live)
394 delta = self.diff(committed, current)
395 return DriftReport(
396 has_drift=len(delta["ops"]) > 0,
397 summary=delta["summary"],
398 delta=delta,
399 )
400
401 # ------------------------------------------------------------------
402 # 5. apply
403 # ------------------------------------------------------------------
404
405 def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState:
406 """Apply a delta to the working tree.
407
408 Called by ``muse checkout`` after the core engine has already
409 restored file-level objects from the object store. The code plugin
410 has no domain-specific post-processing to perform, so this is a
411 pass-through.
412
413 Args:
414 delta: The typed operation list (unused at post-checkout time).
415 live_state: Current live state (returned unchanged).
416
417 Returns:
418 *live_state* unchanged.
419 """
420 return live_state
421
422 # ------------------------------------------------------------------
423 # 6. schema
424 # ------------------------------------------------------------------
425
426 def schema(self) -> DomainSchema:
427 """Declare the structural schema of the code domain.
428
429 Returns:
430 A ``DomainSchema`` with five semantic dimensions:
431 ``structure``, ``symbols``, ``imports``, ``variables``,
432 and ``metadata``.
433 """
434 return DomainSchema(
435 domain=_DOMAIN_NAME,
436 description=(
437 "Semantic version control for source code. "
438 "Treats code as a structured system of named symbols "
439 "(functions, classes, methods) rather than lines of text. "
440 "Two commits that only reformat a file produce no delta. "
441 "Renames and moves are detected via content-addressed "
442 "symbol identity."
443 ),
444 top_level=TreeSchema(
445 kind="tree",
446 node_type="module",
447 diff_algorithm="gumtree",
448 ),
449 dimensions=[
450 DimensionSpec(
451 name="structure",
452 description=(
453 "Module / file tree. Tracks which files exist and "
454 "how they relate to each other."
455 ),
456 schema=TreeSchema(
457 kind="tree",
458 node_type="file",
459 diff_algorithm="gumtree",
460 ),
461 independent_merge=False,
462 ),
463 DimensionSpec(
464 name="symbols",
465 description=(
466 "AST symbol tree. Functions, classes, methods, and "
467 "variables — the primary unit of semantic change."
468 ),
469 schema=TreeSchema(
470 kind="tree",
471 node_type="symbol",
472 diff_algorithm="gumtree",
473 ),
474 independent_merge=True,
475 ),
476 DimensionSpec(
477 name="imports",
478 description=(
479 "Import set. Tracks added / removed import statements "
480 "as an unordered set — order is semantically irrelevant."
481 ),
482 schema=SetSchema(
483 kind="set",
484 element_type="import",
485 identity="by_content",
486 ),
487 independent_merge=True,
488 ),
489 DimensionSpec(
490 name="variables",
491 description=(
492 "Top-level variable and constant assignments. "
493 "Tracked as an unordered set."
494 ),
495 schema=SetSchema(
496 kind="set",
497 element_type="variable",
498 identity="by_content",
499 ),
500 independent_merge=True,
501 ),
502 DimensionSpec(
503 name="metadata",
504 description=(
505 "Non-code files: configuration, documentation, "
506 "build scripts, etc. Tracked at file granularity."
507 ),
508 schema=SetSchema(
509 kind="set",
510 element_type="file",
511 identity="by_content",
512 ),
513 independent_merge=True,
514 ),
515 ],
516 merge_mode="three_way",
517 schema_version=1,
518 )
519
520 # ------------------------------------------------------------------
521 # StructuredMergePlugin — OT extension
522 # ------------------------------------------------------------------
523
524 def merge_ops(
525 self,
526 base: StateSnapshot,
527 ours_snap: StateSnapshot,
528 theirs_snap: StateSnapshot,
529 ours_ops: list[DomainOp],
530 theirs_ops: list[DomainOp],
531 *,
532 repo_root: pathlib.Path | None = None,
533 ) -> MergeResult:
534 """Operation-level three-way merge using Operational Transformation.
535
536 Uses :func:`~muse.core.op_transform.merge_op_lists` to determine
537 which ``DomainOp`` pairs commute (auto-mergeable) and which conflict.
538 For ``PatchOp`` entries at the same file address, the engine recurses
539 into ``child_ops`` — so two agents modifying *different* functions in
540 the same file auto-merge, while concurrent modifications to the *same*
541 function produce a symbol-level conflict address.
542
543 The reconciled ``merged`` snapshot is produced by the file-level
544 three-way :meth:`merge` fallback (we cannot reconstruct merged source
545 bytes without a text-merge pass). This is correct for all cases where
546 the two sides touched *different* files. For the same-file-different-
547 symbol case the merged manifest holds the *ours* version of the file —
548 annotated as a conflict-free merge — which may require the user to
549 re-apply the theirs changes manually. This limitation is documented
550 and will be lifted in a future release that implements source-level
551 patching.
552
553 Args:
554 base: Common ancestor snapshot.
555 ours_snap: Our branch's final snapshot.
556 theirs_snap: Their branch's final snapshot.
557 ours_ops: Our branch's typed operation list.
558 theirs_ops: Their branch's typed operation list.
559 repo_root: Repository root for ``.museattributes`` lookup.
560
561 Returns:
562 A ``MergeResult`` where ``conflicts`` contains symbol-level
563 addresses (e.g. ``"src/utils.py::calculate_total"``) rather than
564 bare file paths.
565 """
566 # The core OT engine's _op_key for PatchOp hashes only the file path
567 # and child_domain — not the child_ops themselves. This means two
568 # PatchOps for the same file are treated as "consensus" regardless of
569 # whether they touch the same or different symbols. We therefore
570 # implement symbol-level conflict detection directly here.
571
572 attrs = load_attributes(repo_root, domain=_DOMAIN_NAME) if repo_root else []
573
574 # ── Step 1: symbol-level conflict detection for PatchOps ──────────
575 ours_patches: dict[str, PatchOp] = {
576 op["address"]: op for op in ours_ops if op["op"] == "patch"
577 }
578 theirs_patches: dict[str, PatchOp] = {
579 op["address"]: op for op in theirs_ops if op["op"] == "patch"
580 }
581
582 conflict_addresses: set[str] = set()
583 for path in ours_patches:
584 if path not in theirs_patches:
585 continue
586 for our_child in ours_patches[path]["child_ops"]:
587 for their_child in theirs_patches[path]["child_ops"]:
588 if not ops_commute(our_child, their_child):
589 conflict_addresses.add(our_child["address"])
590
591 # ── Step 2: coarse OT for non-PatchOp ops (file-level inserts/deletes) ──
592 non_patch_ours: list[DomainOp] = [op for op in ours_ops if op["op"] != "patch"]
593 non_patch_theirs: list[DomainOp] = [op for op in theirs_ops if op["op"] != "patch"]
594 file_result = merge_op_lists(
595 base_ops=[],
596 ours_ops=non_patch_ours,
597 theirs_ops=non_patch_theirs,
598 )
599 for our_op, _ in file_result.conflict_ops:
600 conflict_addresses.add(our_op["address"])
601
602 # ── Step 3: apply .museattributes to symbol-level conflicts ──────
603 # Symbol addresses are of the form "src/utils.py::function_name".
604 # We resolve strategy against the file path portion so that a
605 # path = "src/**/*.py" / strategy = "ours" rule suppresses symbol
606 # conflicts in those files, not just file-level manifest conflicts.
607 op_applied_strategies: dict[str, str] = {}
608 resolved_conflicts: list[str] = []
609 if attrs:
610 for addr in sorted(conflict_addresses):
611 file_path = addr.split("::")[0] if "::" in addr else addr
612 strategy = resolve_strategy(attrs, file_path)
613 if strategy in ("ours", "theirs", "base", "union"):
614 op_applied_strategies[addr] = strategy
615 elif strategy == "manual":
616 resolved_conflicts.append(addr)
617 op_applied_strategies[addr] = "manual"
618 else:
619 resolved_conflicts.append(addr)
620 else:
621 resolved_conflicts = sorted(conflict_addresses)
622
623 merged_ops: list[DomainOp] = list(file_result.merged_ops) + list(ours_ops)
624
625 # Fall back to file-level merge for the manifest (carries its own
626 # applied_strategies from file-level attribute resolution).
627 fallback = self.merge(base, ours_snap, theirs_snap, repo_root=repo_root)
628 combined_strategies = {**fallback.applied_strategies, **op_applied_strategies}
629 return MergeResult(
630 merged=fallback.merged,
631 conflicts=resolved_conflicts,
632 applied_strategies=combined_strategies,
633 dimension_reports=fallback.dimension_reports,
634 op_log=merged_ops,
635 )
636
637
638 # ---------------------------------------------------------------------------
639 # Private helpers
640 # ---------------------------------------------------------------------------
641
642
643 def _hash_file(path: pathlib.Path) -> str:
644 """Return the SHA-256 hex digest of *path*'s raw bytes."""
645 h = hashlib.sha256()
646 with path.open("rb") as fh:
647 for chunk in iter(lambda: fh.read(65_536), b""):
648 h.update(chunk)
649 return h.hexdigest()
650
651
652 def _file_level_ops(
653 base_files: dict[str, str],
654 target_files: dict[str, str],
655 ) -> list[DomainOp]:
656 """Produce coarse file-level ops (no AST parsing)."""
657 base_paths = set(base_files)
658 target_paths = set(target_files)
659 ops: list[DomainOp] = []
660
661 for path in sorted(target_paths - base_paths):
662 ops.append(InsertOp(
663 op="insert",
664 address=path,
665 position=None,
666 content_id=target_files[path],
667 content_summary=f"added {path}",
668 ))
669 for path in sorted(base_paths - target_paths):
670 ops.append(DeleteOp(
671 op="delete",
672 address=path,
673 position=None,
674 content_id=base_files[path],
675 content_summary=f"removed {path}",
676 ))
677 for path in sorted(base_paths & target_paths):
678 if base_files[path] != target_files[path]:
679 ops.append(ReplaceOp(
680 op="replace",
681 address=path,
682 position=None,
683 old_content_id=base_files[path],
684 new_content_id=target_files[path],
685 old_summary=f"{path} (before)",
686 new_summary=f"{path} (after)",
687 ))
688 return ops
689
690
691 def _semantic_ops(
692 base_files: dict[str, str],
693 target_files: dict[str, str],
694 repo_root: pathlib.Path,
695 ) -> list[DomainOp]:
696 """Produce symbol-level ops by reading files from the object store."""
697 base_paths = set(base_files)
698 target_paths = set(target_files)
699 changed_paths = (
700 (target_paths - base_paths) # added
701 | (base_paths - target_paths) # removed
702 | { # modified
703 p for p in base_paths & target_paths
704 if base_files[p] != target_files[p]
705 }
706 )
707
708 base_trees: dict[str, SymbolTree] = {}
709 target_trees: dict[str, SymbolTree] = {}
710
711 for path in changed_paths:
712 if path in base_files:
713 raw = read_object(repo_root, base_files[path])
714 if raw is not None:
715 base_trees[path] = _parse_with_fallback(raw, path)
716
717 if path in target_files:
718 raw = read_object(repo_root, target_files[path])
719 if raw is not None:
720 target_trees[path] = _parse_with_fallback(raw, path)
721
722 return build_diff_ops(base_files, target_files, base_trees, target_trees)
723
724
725 def _parse_with_fallback(source: bytes, file_path: str) -> SymbolTree:
726 """Parse symbols from *source*, returning an empty tree on any error."""
727 try:
728 return parse_symbols(source, file_path)
729 except Exception:
730 logger.debug("Symbol parsing failed for %s — falling back to file-level.", file_path)
731 return {}
732
733
734 def _load_symbol_trees_from_workdir(
735 workdir: pathlib.Path,
736 manifest: dict[str, str],
737 ) -> dict[str, SymbolTree]:
738 """Build symbol trees for all files in *manifest* that live in *workdir*."""
739 trees: dict[str, SymbolTree] = {}
740 for rel_path in manifest:
741 file_path = workdir / rel_path
742 if not file_path.is_file():
743 continue
744 try:
745 source = file_path.read_bytes()
746 except OSError:
747 continue
748 suffix = pathlib.PurePosixPath(rel_path).suffix.lower()
749 adapter = adapter_for_path(rel_path)
750 if adapter.supported_extensions().intersection({suffix}):
751 trees[rel_path] = _parse_with_fallback(source, rel_path)
752 return trees
753
754
755 # ---------------------------------------------------------------------------
756 # Module-level singleton
757 # ---------------------------------------------------------------------------
758
759 #: The singleton plugin instance registered in ``muse/plugins/registry.py``.
760 plugin = CodePlugin()