plugin.py
python
| 1 | """Code domain plugin — semantic version control for source code. |
| 2 | |
| 3 | This plugin implements :class:`~muse.domain.MuseDomainPlugin` and |
| 4 | :class:`~muse.domain.StructuredMergePlugin` for software repositories. |
| 5 | |
| 6 | Philosophy |
| 7 | ---------- |
| 8 | Git models files as sequences of lines. The code plugin models them as |
| 9 | **collections of named symbols** — functions, classes, methods, variables. |
| 10 | Two commits that only reformat a Python file (no semantic change) produce |
| 11 | identical symbol ``content_id`` values and therefore *no* structured delta. |
| 12 | Two commits that rename a function produce a ``ReplaceOp`` annotated |
| 13 | ``"renamed to bar"`` rather than a red/green line diff. |
| 14 | |
| 15 | Live State |
| 16 | ---------- |
| 17 | ``LiveState`` is either a ``pathlib.Path`` pointing to ``muse-work/`` or a |
| 18 | ``SnapshotManifest`` dict. The path form is used by the CLI; the dict form |
| 19 | is used by in-memory merge and diff operations. |
| 20 | |
| 21 | Snapshot Format |
| 22 | --------------- |
| 23 | A code snapshot is a ``SnapshotManifest``: |
| 24 | |
| 25 | .. code-block:: json |
| 26 | |
| 27 | { |
| 28 | "files": { |
| 29 | "src/utils.py": "<sha256-of-raw-bytes>", |
| 30 | "README.md": "<sha256-of-raw-bytes>" |
| 31 | }, |
| 32 | "domain": "code" |
| 33 | } |
| 34 | |
| 35 | The ``files`` values are **raw-bytes SHA-256 hashes** (not AST hashes). |
| 36 | This ensures the object store can correctly restore files verbatim on |
| 37 | ``muse checkout``. Semantic identity (AST-based hashing) is used only |
| 38 | inside ``diff()`` when constructing the structured delta. |
| 39 | |
| 40 | Delta Format |
| 41 | ------------ |
| 42 | ``diff()`` returns a ``StructuredDelta``. For Python files (and other |
| 43 | languages with adapters) it produces ``PatchOp`` entries whose ``child_ops`` |
| 44 | carry symbol-level operations: |
| 45 | |
| 46 | - ``InsertOp`` — a symbol was added (address ``"src/utils.py::my_func"``). |
| 47 | - ``DeleteOp`` — a symbol was removed. |
| 48 | - ``ReplaceOp`` — a symbol changed. The ``new_summary`` field describes the |
| 49 | change: ``"renamed to bar"``, ``"implementation changed"``, etc. |
| 50 | |
| 51 | Non-Python files produce coarse ``InsertOp`` / ``DeleteOp`` / ``ReplaceOp`` |
| 52 | at the file level. |
| 53 | |
| 54 | Merge Semantics |
| 55 | --------------- |
| 56 | The plugin implements :class:`~muse.domain.StructuredMergePlugin` so that |
| 57 | OT-aware merges detect conflicts at *symbol* granularity: |
| 58 | |
| 59 | - Agent A modifies ``foo()`` and Agent B modifies ``bar()`` in the same |
| 60 | file → **auto-merge** (ops commute). |
| 61 | - Both agents modify ``foo()`` → **symbol-level conflict** at address |
| 62 | ``"src/utils.py::foo"`` rather than a coarse file conflict. |
| 63 | |
| 64 | Schema |
| 65 | ------ |
| 66 | The code domain schema declares five dimensions: |
| 67 | |
| 68 | ``structure`` |
| 69 | The module/file tree — ``TreeSchema`` with GumTree diff. |
| 70 | |
| 71 | ``symbols`` |
| 72 | The AST symbol tree — ``TreeSchema`` with GumTree diff. |
| 73 | |
| 74 | ``imports`` |
| 75 | The import set — ``SetSchema`` with ``by_content`` identity. |
| 76 | |
| 77 | ``variables`` |
| 78 | Top-level variable assignments — ``SetSchema``. |
| 79 | |
| 80 | ``metadata`` |
| 81 | Configuration and non-code files — ``SetSchema``. |
| 82 | """ |
| 83 | |
| 84 | from __future__ import annotations |
| 85 | |
| 86 | import hashlib |
| 87 | import logging |
| 88 | import pathlib |
| 89 | |
| 90 | from muse.core.attributes import load_attributes, resolve_strategy |
| 91 | from muse.core.diff_algorithms import snapshot_diff |
| 92 | from muse.core.ignore import is_ignored, load_ignore_config, resolve_patterns |
| 93 | from muse.core.object_store import read_object |
| 94 | from muse.core.op_transform import merge_op_lists, ops_commute |
| 95 | from muse.core.schema import ( |
| 96 | DimensionSpec, |
| 97 | DomainSchema, |
| 98 | SetSchema, |
| 99 | TreeSchema, |
| 100 | ) |
| 101 | from muse.domain import ( |
| 102 | DeleteOp, |
| 103 | DomainOp, |
| 104 | DriftReport, |
| 105 | InsertOp, |
| 106 | LiveState, |
| 107 | MergeResult, |
| 108 | PatchOp, |
| 109 | ReplaceOp, |
| 110 | SnapshotManifest, |
| 111 | StateDelta, |
| 112 | StateSnapshot, |
| 113 | StructuredDelta, |
| 114 | ) |
| 115 | from muse.plugins.code.ast_parser import ( |
| 116 | SymbolTree, |
| 117 | adapter_for_path, |
| 118 | parse_symbols, |
| 119 | ) |
| 120 | from muse.plugins.code.symbol_diff import ( |
| 121 | build_diff_ops, |
| 122 | delta_summary, |
| 123 | ) |
| 124 | |
| 125 | logger = logging.getLogger(__name__) |
| 126 | |
| 127 | _DOMAIN_NAME = "code" |
| 128 | |
| 129 | # Directories that are never versioned regardless of .museignore. |
| 130 | # These are implicit ignores that apply to all code repositories. |
| 131 | _ALWAYS_IGNORE_DIRS: frozenset[str] = frozenset({ |
| 132 | ".git", |
| 133 | ".muse", |
| 134 | "__pycache__", |
| 135 | ".mypy_cache", |
| 136 | ".pytest_cache", |
| 137 | ".ruff_cache", |
| 138 | "node_modules", |
| 139 | ".venv", |
| 140 | "venv", |
| 141 | ".tox", |
| 142 | "dist", |
| 143 | "build", |
| 144 | ".eggs", |
| 145 | ".DS_Store", |
| 146 | }) |
| 147 | |
| 148 | |
| 149 | class CodePlugin: |
| 150 | """Muse domain plugin for software source code repositories. |
| 151 | |
| 152 | Implements all six core protocol methods plus the optional |
| 153 | :class:`~muse.domain.StructuredMergePlugin` OT extension. The plugin |
| 154 | does not implement :class:`~muse.domain.CRDTPlugin` — source code is |
| 155 | human-authored and benefits from explicit conflict resolution rather |
| 156 | than automatic convergence. |
| 157 | |
| 158 | The plugin is stateless. The module-level singleton :data:`plugin` is |
| 159 | the standard entry point. |
| 160 | """ |
| 161 | |
| 162 | # ------------------------------------------------------------------ |
| 163 | # 1. snapshot |
| 164 | # ------------------------------------------------------------------ |
| 165 | |
| 166 | def snapshot(self, live_state: LiveState) -> StateSnapshot: |
| 167 | """Capture the current ``muse-work/`` directory as a snapshot dict. |
| 168 | |
| 169 | Walks all regular files under *live_state*, hashing each one with |
| 170 | SHA-256 (raw bytes). Honours ``.museignore`` and always ignores |
| 171 | known tool-generated directories (``__pycache__``, ``.git``, etc.). |
| 172 | |
| 173 | Args: |
| 174 | live_state: A ``pathlib.Path`` pointing to ``muse-work/``, or an |
| 175 | existing ``SnapshotManifest`` dict (returned as-is). |
| 176 | |
| 177 | Returns: |
| 178 | A ``SnapshotManifest`` mapping workspace-relative POSIX paths to |
| 179 | their SHA-256 raw-bytes digests. |
| 180 | """ |
| 181 | if not isinstance(live_state, pathlib.Path): |
| 182 | return live_state |
| 183 | |
| 184 | workdir = live_state |
| 185 | # .museignore lives in the repo root (parent of muse-work/). |
| 186 | repo_root = workdir.parent |
| 187 | patterns = resolve_patterns(load_ignore_config(repo_root), _DOMAIN_NAME) |
| 188 | |
| 189 | files: dict[str, str] = {} |
| 190 | for p in sorted(workdir.rglob("*")): |
| 191 | if not p.is_file(): |
| 192 | continue |
| 193 | # Skip always-ignored directories by checking path parts. |
| 194 | if any(part in _ALWAYS_IGNORE_DIRS for part in p.parts): |
| 195 | continue |
| 196 | rel = p.relative_to(workdir).as_posix() |
| 197 | if is_ignored(rel, patterns): |
| 198 | continue |
| 199 | files[rel] = _hash_file(p) |
| 200 | |
| 201 | return SnapshotManifest(files=files, domain=_DOMAIN_NAME) |
| 202 | |
| 203 | # ------------------------------------------------------------------ |
| 204 | # 2. diff |
| 205 | # ------------------------------------------------------------------ |
| 206 | |
| 207 | def diff( |
| 208 | self, |
| 209 | base: StateSnapshot, |
| 210 | target: StateSnapshot, |
| 211 | *, |
| 212 | repo_root: pathlib.Path | None = None, |
| 213 | ) -> StateDelta: |
| 214 | """Compute the structured delta between two snapshots. |
| 215 | |
| 216 | Without ``repo_root`` |
| 217 | Produces coarse file-level ops (``InsertOp`` / ``DeleteOp`` / |
| 218 | ``ReplaceOp``). Used by ``muse checkout`` which only needs file |
| 219 | paths. |
| 220 | |
| 221 | With ``repo_root`` |
| 222 | Reads source bytes from the object store, parses AST for |
| 223 | supported languages (Python), and produces ``PatchOp`` entries |
| 224 | with symbol-level ``child_ops``. Used by ``muse commit`` (to |
| 225 | store the structured delta) and ``muse show`` / ``muse diff``. |
| 226 | |
| 227 | Args: |
| 228 | base: Base snapshot (older state). |
| 229 | target: Target snapshot (newer state). |
| 230 | repo_root: Repository root for object-store access and symbol |
| 231 | extraction. ``None`` → file-level ops only. |
| 232 | |
| 233 | Returns: |
| 234 | A ``StructuredDelta`` with ``domain="code"``. |
| 235 | """ |
| 236 | base_files = base["files"] |
| 237 | target_files = target["files"] |
| 238 | |
| 239 | if repo_root is None: |
| 240 | # snapshot_diff provides the free file-level diff promised by the |
| 241 | # DomainSchema architecture: any plugin that declares a schema can |
| 242 | # call this instead of writing file-set algebra from scratch. |
| 243 | return snapshot_diff(self.schema(), base, target) |
| 244 | |
| 245 | ops = _semantic_ops(base_files, target_files, repo_root) |
| 246 | summary = delta_summary(ops) |
| 247 | return StructuredDelta(domain=_DOMAIN_NAME, ops=ops, summary=summary) |
| 248 | |
| 249 | # ------------------------------------------------------------------ |
| 250 | # 3. merge |
| 251 | # ------------------------------------------------------------------ |
| 252 | |
| 253 | def merge( |
| 254 | self, |
| 255 | base: StateSnapshot, |
| 256 | left: StateSnapshot, |
| 257 | right: StateSnapshot, |
| 258 | *, |
| 259 | repo_root: pathlib.Path | None = None, |
| 260 | ) -> MergeResult: |
| 261 | """Three-way merge at file granularity, respecting ``.museattributes``. |
| 262 | |
| 263 | Standard three-way logic, augmented by per-path strategy overrides |
| 264 | declared in ``.museattributes``: |
| 265 | |
| 266 | - Both sides agree → consensus wins (including both deleted). |
| 267 | - Only one side changed → take that side. |
| 268 | - Both sides changed differently → consult ``.museattributes``: |
| 269 | |
| 270 | - ``ours`` — take left; remove from conflict list. |
| 271 | - ``theirs`` — take right; remove from conflict list. |
| 272 | - ``base`` — revert to the common ancestor; remove from conflicts. |
| 273 | - ``union`` — keep all additions from both sides; prefer left for |
| 274 | conflicting blobs; remove from conflict list. |
| 275 | - ``manual`` — force into conflict list regardless of auto resolution. |
| 276 | - ``auto`` — default three-way conflict. |
| 277 | |
| 278 | This is the fallback used by ``muse cherry-pick`` and contexts where |
| 279 | the OT merge path is not available. :meth:`merge_ops` provides |
| 280 | symbol-level conflict detection when both sides have structured deltas. |
| 281 | |
| 282 | Args: |
| 283 | base: Common ancestor snapshot. |
| 284 | left: Our branch snapshot. |
| 285 | right: Their branch snapshot. |
| 286 | repo_root: Repository root; when provided, ``.museattributes`` is |
| 287 | consulted for per-path strategy overrides. |
| 288 | |
| 289 | Returns: |
| 290 | A ``MergeResult`` with the reconciled snapshot, any file-level |
| 291 | conflicts, and ``applied_strategies`` recording which rules fired. |
| 292 | """ |
| 293 | attrs = load_attributes(repo_root, domain=_DOMAIN_NAME) if repo_root else [] |
| 294 | |
| 295 | base_files = base["files"] |
| 296 | left_files = left["files"] |
| 297 | right_files = right["files"] |
| 298 | |
| 299 | merged: dict[str, str] = dict(base_files) |
| 300 | conflicts: list[str] = [] |
| 301 | applied_strategies: dict[str, str] = {} |
| 302 | |
| 303 | all_paths = set(base_files) | set(left_files) | set(right_files) |
| 304 | for path in sorted(all_paths): |
| 305 | b = base_files.get(path) |
| 306 | l = left_files.get(path) |
| 307 | r = right_files.get(path) |
| 308 | |
| 309 | if l == r: |
| 310 | # Both sides agree — or both deleted. |
| 311 | if l is None: |
| 312 | merged.pop(path, None) |
| 313 | else: |
| 314 | merged[path] = l |
| 315 | # Honour "manual" override even on clean paths. |
| 316 | if attrs and resolve_strategy(attrs, path) == "manual": |
| 317 | conflicts.append(path) |
| 318 | applied_strategies[path] = "manual" |
| 319 | elif b == l: |
| 320 | # Only right changed. |
| 321 | if r is None: |
| 322 | merged.pop(path, None) |
| 323 | else: |
| 324 | merged[path] = r |
| 325 | if attrs and resolve_strategy(attrs, path) == "manual": |
| 326 | conflicts.append(path) |
| 327 | applied_strategies[path] = "manual" |
| 328 | elif b == r: |
| 329 | # Only left changed. |
| 330 | if l is None: |
| 331 | merged.pop(path, None) |
| 332 | else: |
| 333 | merged[path] = l |
| 334 | if attrs and resolve_strategy(attrs, path) == "manual": |
| 335 | conflicts.append(path) |
| 336 | applied_strategies[path] = "manual" |
| 337 | else: |
| 338 | # Both sides changed differently — consult attributes. |
| 339 | strategy = resolve_strategy(attrs, path) if attrs else "auto" |
| 340 | if strategy == "ours": |
| 341 | merged[path] = l or b or "" |
| 342 | applied_strategies[path] = "ours" |
| 343 | elif strategy == "theirs": |
| 344 | merged[path] = r or b or "" |
| 345 | applied_strategies[path] = "theirs" |
| 346 | elif strategy == "base": |
| 347 | if b is None: |
| 348 | merged.pop(path, None) |
| 349 | else: |
| 350 | merged[path] = b |
| 351 | applied_strategies[path] = "base" |
| 352 | elif strategy == "union": |
| 353 | # For file-level blobs, full union is not representable — |
| 354 | # prefer left and keep all additions from both branches. |
| 355 | merged[path] = l or r or b or "" |
| 356 | applied_strategies[path] = "union" |
| 357 | elif strategy == "manual": |
| 358 | conflicts.append(path) |
| 359 | merged[path] = l or r or b or "" |
| 360 | applied_strategies[path] = "manual" |
| 361 | else: |
| 362 | # "auto" — standard three-way conflict. |
| 363 | conflicts.append(path) |
| 364 | merged[path] = l or r or b or "" |
| 365 | |
| 366 | return MergeResult( |
| 367 | merged=SnapshotManifest(files=merged, domain=_DOMAIN_NAME), |
| 368 | conflicts=conflicts, |
| 369 | applied_strategies=applied_strategies, |
| 370 | ) |
| 371 | |
| 372 | # ------------------------------------------------------------------ |
| 373 | # 4. drift |
| 374 | # ------------------------------------------------------------------ |
| 375 | |
| 376 | def drift(self, committed: StateSnapshot, live: LiveState) -> DriftReport: |
| 377 | """Report how much the working tree has drifted from the last commit. |
| 378 | |
| 379 | Called by ``muse status``. Takes a snapshot of the current live |
| 380 | state and diffs it against the committed snapshot. |
| 381 | |
| 382 | Args: |
| 383 | committed: The last committed snapshot. |
| 384 | live: Current live state (path or snapshot manifest). |
| 385 | |
| 386 | Returns: |
| 387 | A ``DriftReport`` describing what has changed since the last commit. |
| 388 | """ |
| 389 | current = self.snapshot(live) |
| 390 | delta = self.diff(committed, current) |
| 391 | return DriftReport( |
| 392 | has_drift=len(delta["ops"]) > 0, |
| 393 | summary=delta["summary"], |
| 394 | delta=delta, |
| 395 | ) |
| 396 | |
| 397 | # ------------------------------------------------------------------ |
| 398 | # 5. apply |
| 399 | # ------------------------------------------------------------------ |
| 400 | |
| 401 | def apply(self, delta: StateDelta, live_state: LiveState) -> LiveState: |
| 402 | """Apply a delta to the working tree. |
| 403 | |
| 404 | Called by ``muse checkout`` after the core engine has already |
| 405 | restored file-level objects from the object store. The code plugin |
| 406 | has no domain-specific post-processing to perform, so this is a |
| 407 | pass-through. |
| 408 | |
| 409 | Args: |
| 410 | delta: The typed operation list (unused at post-checkout time). |
| 411 | live_state: Current live state (returned unchanged). |
| 412 | |
| 413 | Returns: |
| 414 | *live_state* unchanged. |
| 415 | """ |
| 416 | return live_state |
| 417 | |
| 418 | # ------------------------------------------------------------------ |
| 419 | # 6. schema |
| 420 | # ------------------------------------------------------------------ |
| 421 | |
| 422 | def schema(self) -> DomainSchema: |
| 423 | """Declare the structural schema of the code domain. |
| 424 | |
| 425 | Returns: |
| 426 | A ``DomainSchema`` with five semantic dimensions: |
| 427 | ``structure``, ``symbols``, ``imports``, ``variables``, |
| 428 | and ``metadata``. |
| 429 | """ |
| 430 | return DomainSchema( |
| 431 | domain=_DOMAIN_NAME, |
| 432 | description=( |
| 433 | "Semantic version control for source code. " |
| 434 | "Treats code as a structured system of named symbols " |
| 435 | "(functions, classes, methods) rather than lines of text. " |
| 436 | "Two commits that only reformat a file produce no delta. " |
| 437 | "Renames and moves are detected via content-addressed " |
| 438 | "symbol identity." |
| 439 | ), |
| 440 | top_level=TreeSchema( |
| 441 | kind="tree", |
| 442 | node_type="module", |
| 443 | diff_algorithm="gumtree", |
| 444 | ), |
| 445 | dimensions=[ |
| 446 | DimensionSpec( |
| 447 | name="structure", |
| 448 | description=( |
| 449 | "Module / file tree. Tracks which files exist and " |
| 450 | "how they relate to each other." |
| 451 | ), |
| 452 | schema=TreeSchema( |
| 453 | kind="tree", |
| 454 | node_type="file", |
| 455 | diff_algorithm="gumtree", |
| 456 | ), |
| 457 | independent_merge=False, |
| 458 | ), |
| 459 | DimensionSpec( |
| 460 | name="symbols", |
| 461 | description=( |
| 462 | "AST symbol tree. Functions, classes, methods, and " |
| 463 | "variables — the primary unit of semantic change." |
| 464 | ), |
| 465 | schema=TreeSchema( |
| 466 | kind="tree", |
| 467 | node_type="symbol", |
| 468 | diff_algorithm="gumtree", |
| 469 | ), |
| 470 | independent_merge=True, |
| 471 | ), |
| 472 | DimensionSpec( |
| 473 | name="imports", |
| 474 | description=( |
| 475 | "Import set. Tracks added / removed import statements " |
| 476 | "as an unordered set — order is semantically irrelevant." |
| 477 | ), |
| 478 | schema=SetSchema( |
| 479 | kind="set", |
| 480 | element_type="import", |
| 481 | identity="by_content", |
| 482 | ), |
| 483 | independent_merge=True, |
| 484 | ), |
| 485 | DimensionSpec( |
| 486 | name="variables", |
| 487 | description=( |
| 488 | "Top-level variable and constant assignments. " |
| 489 | "Tracked as an unordered set." |
| 490 | ), |
| 491 | schema=SetSchema( |
| 492 | kind="set", |
| 493 | element_type="variable", |
| 494 | identity="by_content", |
| 495 | ), |
| 496 | independent_merge=True, |
| 497 | ), |
| 498 | DimensionSpec( |
| 499 | name="metadata", |
| 500 | description=( |
| 501 | "Non-code files: configuration, documentation, " |
| 502 | "build scripts, etc. Tracked at file granularity." |
| 503 | ), |
| 504 | schema=SetSchema( |
| 505 | kind="set", |
| 506 | element_type="file", |
| 507 | identity="by_content", |
| 508 | ), |
| 509 | independent_merge=True, |
| 510 | ), |
| 511 | ], |
| 512 | merge_mode="three_way", |
| 513 | schema_version=1, |
| 514 | ) |
| 515 | |
| 516 | # ------------------------------------------------------------------ |
| 517 | # StructuredMergePlugin — OT extension |
| 518 | # ------------------------------------------------------------------ |
| 519 | |
| 520 | def merge_ops( |
| 521 | self, |
| 522 | base: StateSnapshot, |
| 523 | ours_snap: StateSnapshot, |
| 524 | theirs_snap: StateSnapshot, |
| 525 | ours_ops: list[DomainOp], |
| 526 | theirs_ops: list[DomainOp], |
| 527 | *, |
| 528 | repo_root: pathlib.Path | None = None, |
| 529 | ) -> MergeResult: |
| 530 | """Operation-level three-way merge using Operational Transformation. |
| 531 | |
| 532 | Uses :func:`~muse.core.op_transform.merge_op_lists` to determine |
| 533 | which ``DomainOp`` pairs commute (auto-mergeable) and which conflict. |
| 534 | For ``PatchOp`` entries at the same file address, the engine recurses |
| 535 | into ``child_ops`` — so two agents modifying *different* functions in |
| 536 | the same file auto-merge, while concurrent modifications to the *same* |
| 537 | function produce a symbol-level conflict address. |
| 538 | |
| 539 | The reconciled ``merged`` snapshot is produced by the file-level |
| 540 | three-way :meth:`merge` fallback (we cannot reconstruct merged source |
| 541 | bytes without a text-merge pass). This is correct for all cases where |
| 542 | the two sides touched *different* files. For the same-file-different- |
| 543 | symbol case the merged manifest holds the *ours* version of the file — |
| 544 | annotated as a conflict-free merge — which may require the user to |
| 545 | re-apply the theirs changes manually. This limitation is documented |
| 546 | and will be lifted in a future release that implements source-level |
| 547 | patching. |
| 548 | |
| 549 | Args: |
| 550 | base: Common ancestor snapshot. |
| 551 | ours_snap: Our branch's final snapshot. |
| 552 | theirs_snap: Their branch's final snapshot. |
| 553 | ours_ops: Our branch's typed operation list. |
| 554 | theirs_ops: Their branch's typed operation list. |
| 555 | repo_root: Repository root for ``.museattributes`` lookup. |
| 556 | |
| 557 | Returns: |
| 558 | A ``MergeResult`` where ``conflicts`` contains symbol-level |
| 559 | addresses (e.g. ``"src/utils.py::calculate_total"``) rather than |
| 560 | bare file paths. |
| 561 | """ |
| 562 | # The core OT engine's _op_key for PatchOp hashes only the file path |
| 563 | # and child_domain — not the child_ops themselves. This means two |
| 564 | # PatchOps for the same file are treated as "consensus" regardless of |
| 565 | # whether they touch the same or different symbols. We therefore |
| 566 | # implement symbol-level conflict detection directly here. |
| 567 | |
| 568 | attrs = load_attributes(repo_root, domain=_DOMAIN_NAME) if repo_root else [] |
| 569 | |
| 570 | # ── Step 1: symbol-level conflict detection for PatchOps ────────── |
| 571 | ours_patches: dict[str, PatchOp] = { |
| 572 | op["address"]: op for op in ours_ops if op["op"] == "patch" |
| 573 | } |
| 574 | theirs_patches: dict[str, PatchOp] = { |
| 575 | op["address"]: op for op in theirs_ops if op["op"] == "patch" |
| 576 | } |
| 577 | |
| 578 | conflict_addresses: set[str] = set() |
| 579 | for path in ours_patches: |
| 580 | if path not in theirs_patches: |
| 581 | continue |
| 582 | for our_child in ours_patches[path]["child_ops"]: |
| 583 | for their_child in theirs_patches[path]["child_ops"]: |
| 584 | if not ops_commute(our_child, their_child): |
| 585 | conflict_addresses.add(our_child["address"]) |
| 586 | |
| 587 | # ── Step 2: coarse OT for non-PatchOp ops (file-level inserts/deletes) ── |
| 588 | non_patch_ours: list[DomainOp] = [op for op in ours_ops if op["op"] != "patch"] |
| 589 | non_patch_theirs: list[DomainOp] = [op for op in theirs_ops if op["op"] != "patch"] |
| 590 | file_result = merge_op_lists( |
| 591 | base_ops=[], |
| 592 | ours_ops=non_patch_ours, |
| 593 | theirs_ops=non_patch_theirs, |
| 594 | ) |
| 595 | for our_op, _ in file_result.conflict_ops: |
| 596 | conflict_addresses.add(our_op["address"]) |
| 597 | |
| 598 | # ── Step 3: apply .museattributes to symbol-level conflicts ────── |
| 599 | # Symbol addresses are of the form "src/utils.py::function_name". |
| 600 | # We resolve strategy against the file path portion so that a |
| 601 | # path = "src/**/*.py" / strategy = "ours" rule suppresses symbol |
| 602 | # conflicts in those files, not just file-level manifest conflicts. |
| 603 | op_applied_strategies: dict[str, str] = {} |
| 604 | resolved_conflicts: list[str] = [] |
| 605 | if attrs: |
| 606 | for addr in sorted(conflict_addresses): |
| 607 | file_path = addr.split("::")[0] if "::" in addr else addr |
| 608 | strategy = resolve_strategy(attrs, file_path) |
| 609 | if strategy in ("ours", "theirs", "base", "union"): |
| 610 | op_applied_strategies[addr] = strategy |
| 611 | elif strategy == "manual": |
| 612 | resolved_conflicts.append(addr) |
| 613 | op_applied_strategies[addr] = "manual" |
| 614 | else: |
| 615 | resolved_conflicts.append(addr) |
| 616 | else: |
| 617 | resolved_conflicts = sorted(conflict_addresses) |
| 618 | |
| 619 | merged_ops: list[DomainOp] = list(file_result.merged_ops) + list(ours_ops) |
| 620 | |
| 621 | # Fall back to file-level merge for the manifest (carries its own |
| 622 | # applied_strategies from file-level attribute resolution). |
| 623 | fallback = self.merge(base, ours_snap, theirs_snap, repo_root=repo_root) |
| 624 | combined_strategies = {**fallback.applied_strategies, **op_applied_strategies} |
| 625 | return MergeResult( |
| 626 | merged=fallback.merged, |
| 627 | conflicts=resolved_conflicts, |
| 628 | applied_strategies=combined_strategies, |
| 629 | dimension_reports=fallback.dimension_reports, |
| 630 | op_log=merged_ops, |
| 631 | ) |
| 632 | |
| 633 | |
| 634 | # --------------------------------------------------------------------------- |
| 635 | # Private helpers |
| 636 | # --------------------------------------------------------------------------- |
| 637 | |
| 638 | |
| 639 | def _hash_file(path: pathlib.Path) -> str: |
| 640 | """Return the SHA-256 hex digest of *path*'s raw bytes.""" |
| 641 | h = hashlib.sha256() |
| 642 | with path.open("rb") as fh: |
| 643 | for chunk in iter(lambda: fh.read(65_536), b""): |
| 644 | h.update(chunk) |
| 645 | return h.hexdigest() |
| 646 | |
| 647 | |
| 648 | def _file_level_ops( |
| 649 | base_files: dict[str, str], |
| 650 | target_files: dict[str, str], |
| 651 | ) -> list[DomainOp]: |
| 652 | """Produce coarse file-level ops (no AST parsing).""" |
| 653 | base_paths = set(base_files) |
| 654 | target_paths = set(target_files) |
| 655 | ops: list[DomainOp] = [] |
| 656 | |
| 657 | for path in sorted(target_paths - base_paths): |
| 658 | ops.append(InsertOp( |
| 659 | op="insert", |
| 660 | address=path, |
| 661 | position=None, |
| 662 | content_id=target_files[path], |
| 663 | content_summary=f"added {path}", |
| 664 | )) |
| 665 | for path in sorted(base_paths - target_paths): |
| 666 | ops.append(DeleteOp( |
| 667 | op="delete", |
| 668 | address=path, |
| 669 | position=None, |
| 670 | content_id=base_files[path], |
| 671 | content_summary=f"removed {path}", |
| 672 | )) |
| 673 | for path in sorted(base_paths & target_paths): |
| 674 | if base_files[path] != target_files[path]: |
| 675 | ops.append(ReplaceOp( |
| 676 | op="replace", |
| 677 | address=path, |
| 678 | position=None, |
| 679 | old_content_id=base_files[path], |
| 680 | new_content_id=target_files[path], |
| 681 | old_summary=f"{path} (before)", |
| 682 | new_summary=f"{path} (after)", |
| 683 | )) |
| 684 | return ops |
| 685 | |
| 686 | |
| 687 | def _semantic_ops( |
| 688 | base_files: dict[str, str], |
| 689 | target_files: dict[str, str], |
| 690 | repo_root: pathlib.Path, |
| 691 | ) -> list[DomainOp]: |
| 692 | """Produce symbol-level ops by reading files from the object store.""" |
| 693 | base_paths = set(base_files) |
| 694 | target_paths = set(target_files) |
| 695 | changed_paths = ( |
| 696 | (target_paths - base_paths) # added |
| 697 | | (base_paths - target_paths) # removed |
| 698 | | { # modified |
| 699 | p for p in base_paths & target_paths |
| 700 | if base_files[p] != target_files[p] |
| 701 | } |
| 702 | ) |
| 703 | |
| 704 | base_trees: dict[str, SymbolTree] = {} |
| 705 | target_trees: dict[str, SymbolTree] = {} |
| 706 | |
| 707 | for path in changed_paths: |
| 708 | if path in base_files: |
| 709 | raw = read_object(repo_root, base_files[path]) |
| 710 | if raw is not None: |
| 711 | base_trees[path] = _parse_with_fallback(raw, path) |
| 712 | |
| 713 | if path in target_files: |
| 714 | raw = read_object(repo_root, target_files[path]) |
| 715 | if raw is not None: |
| 716 | target_trees[path] = _parse_with_fallback(raw, path) |
| 717 | |
| 718 | return build_diff_ops(base_files, target_files, base_trees, target_trees) |
| 719 | |
| 720 | |
| 721 | def _parse_with_fallback(source: bytes, file_path: str) -> SymbolTree: |
| 722 | """Parse symbols from *source*, returning an empty tree on any error.""" |
| 723 | try: |
| 724 | return parse_symbols(source, file_path) |
| 725 | except Exception: |
| 726 | logger.debug("Symbol parsing failed for %s — falling back to file-level.", file_path) |
| 727 | return {} |
| 728 | |
| 729 | |
| 730 | def _load_symbol_trees_from_workdir( |
| 731 | workdir: pathlib.Path, |
| 732 | manifest: dict[str, str], |
| 733 | ) -> dict[str, SymbolTree]: |
| 734 | """Build symbol trees for all files in *manifest* that live in *workdir*.""" |
| 735 | trees: dict[str, SymbolTree] = {} |
| 736 | for rel_path in manifest: |
| 737 | file_path = workdir / rel_path |
| 738 | if not file_path.is_file(): |
| 739 | continue |
| 740 | try: |
| 741 | source = file_path.read_bytes() |
| 742 | except OSError: |
| 743 | continue |
| 744 | suffix = pathlib.PurePosixPath(rel_path).suffix.lower() |
| 745 | adapter = adapter_for_path(rel_path) |
| 746 | if adapter.supported_extensions().intersection({suffix}): |
| 747 | trees[rel_path] = _parse_with_fallback(source, rel_path) |
| 748 | return trees |
| 749 | |
| 750 | |
| 751 | # --------------------------------------------------------------------------- |
| 752 | # Module-level singleton |
| 753 | # --------------------------------------------------------------------------- |
| 754 | |
| 755 | #: The singleton plugin instance registered in ``muse/plugins/registry.py``. |
| 756 | plugin = CodePlugin() |