gabriel / muse public
merge_engine.py python
351 lines 12.2 KB
a55ffe59 docs: fill docstring gaps identified in Phase 3 audit Gabriel Cardona <gabriel@tellurstori.com> 6d ago
1 """Muse VCS merge engine — fast-forward, 3-way path-level, and structured op-level merge.
2
3 Public API
4 ----------
5 Pure functions (no I/O):
6
7 - :func:`diff_snapshots` — paths that changed between two snapshot manifests.
8 - :func:`detect_conflicts` — paths changed on *both* branches since the base.
9 - :func:`apply_merge` — build merged manifest for a conflict-free 3-way merge.
10
11 Structured (operation-level) merge — Phase 3:
12
13 - :mod:`muse.core.op_transform` — ``ops_commute``, ``transform``, ``merge_op_lists``,
14 ``merge_structured``, and :class:`~muse.core.op_transform.MergeOpsResult`.
15 Plugins that implement :class:`~muse.domain.StructuredMergePlugin` use these
16 functions to auto-merge non-conflicting ``DomainOp`` lists.
17
18 File-based helpers:
19
20 - :func:`find_merge_base` — lowest common ancestor (LCA) of two commits.
21 - :func:`read_merge_state` — detect and load an in-progress merge.
22 - :func:`write_merge_state` — persist conflict state before exiting.
23 - :func:`clear_merge_state` — remove MERGE_STATE.json after resolution.
24 - :func:`apply_resolution` — restore a specific object version to muse-work/.
25
26 ``MERGE_STATE.json`` schema
27 ---------------------------
28
29 .. code-block:: json
30
31 {
32 "base_commit": "abc123...",
33 "ours_commit": "def456...",
34 "theirs_commit": "789abc...",
35 "conflict_paths": ["beat.mid", "lead.mp3"],
36 "other_branch": "feature/experiment"
37 }
38
39 ``other_branch`` is optional; all other fields are required when conflicts exist.
40 """
41 from __future__ import annotations
42
43 import json
44 import logging
45 import pathlib
46 from collections import deque
47 from dataclasses import dataclass, field
48 from typing import TypedDict
49
50 logger = logging.getLogger(__name__)
51
52 _MERGE_STATE_FILENAME = "MERGE_STATE.json"
53
54
55 # ---------------------------------------------------------------------------
56 # Wire-format TypedDict
57 # ---------------------------------------------------------------------------
58
59
60 class MergeStatePayload(TypedDict, total=False):
61 """JSON-serialisable form of an in-progress merge state."""
62
63 base_commit: str
64 ours_commit: str
65 theirs_commit: str
66 conflict_paths: list[str]
67 other_branch: str
68
69
70 # ---------------------------------------------------------------------------
71 # MergeState dataclass
72 # ---------------------------------------------------------------------------
73
74
75 @dataclass(frozen=True)
76 class MergeState:
77 """Describes an in-progress merge with unresolved conflicts."""
78
79 conflict_paths: list[str] = field(default_factory=list)
80 base_commit: str | None = None
81 ours_commit: str | None = None
82 theirs_commit: str | None = None
83 other_branch: str | None = None
84
85
86 # ---------------------------------------------------------------------------
87 # Filesystem helpers
88 # ---------------------------------------------------------------------------
89
90
91 def read_merge_state(root: pathlib.Path) -> MergeState | None:
92 """Return :class:`MergeState` if a merge is in progress, otherwise ``None``."""
93 merge_state_path = root / ".muse" / _MERGE_STATE_FILENAME
94 if not merge_state_path.exists():
95 return None
96 try:
97 data = json.loads(merge_state_path.read_text())
98 except (json.JSONDecodeError, OSError) as exc:
99 logger.warning("⚠️ Failed to read %s: %s", _MERGE_STATE_FILENAME, exc)
100 return None
101
102 raw_conflicts = data.get("conflict_paths", [])
103 conflict_paths: list[str] = (
104 [str(c) for c in raw_conflicts] if isinstance(raw_conflicts, list) else []
105 )
106
107 def _str_or_none(key: str) -> str | None:
108 val = data.get(key)
109 return str(val) if val is not None else None
110
111 return MergeState(
112 conflict_paths=conflict_paths,
113 base_commit=_str_or_none("base_commit"),
114 ours_commit=_str_or_none("ours_commit"),
115 theirs_commit=_str_or_none("theirs_commit"),
116 other_branch=_str_or_none("other_branch"),
117 )
118
119
120 def write_merge_state(
121 root: pathlib.Path,
122 *,
123 base_commit: str,
124 ours_commit: str,
125 theirs_commit: str,
126 conflict_paths: list[str],
127 other_branch: str | None = None,
128 ) -> None:
129 """Write ``.muse/MERGE_STATE.json`` to signal an in-progress conflicted merge.
130
131 Called by the ``muse merge`` command when the merge produces at least one
132 conflict that cannot be auto-resolved. The file is read back by
133 :func:`read_merge_state` on subsequent ``muse status`` and ``muse commit``
134 invocations to surface conflict state to the user.
135
136 Args:
137 root: Repository root (parent of ``.muse/``).
138 base_commit: Commit ID of the merge base (common ancestor).
139 ours_commit: Commit ID of the current branch (HEAD) at merge time.
140 theirs_commit: Commit ID of the branch being merged in.
141 conflict_paths: Sorted list of workspace-relative POSIX paths with
142 unresolvable conflicts.
143 other_branch: Name of the branch being merged in; stored for
144 informational display but not required for resolution.
145 """
146 merge_state_path = root / ".muse" / _MERGE_STATE_FILENAME
147 payload: MergeStatePayload = {
148 "base_commit": base_commit,
149 "ours_commit": ours_commit,
150 "theirs_commit": theirs_commit,
151 "conflict_paths": sorted(conflict_paths),
152 }
153 if other_branch is not None:
154 payload["other_branch"] = other_branch
155 merge_state_path.write_text(json.dumps(payload, indent=2))
156 logger.info("✅ Wrote MERGE_STATE.json with %d conflict(s)", len(conflict_paths))
157
158
159 def clear_merge_state(root: pathlib.Path) -> None:
160 """Remove ``.muse/MERGE_STATE.json`` after a successful merge or resolution."""
161 merge_state_path = root / ".muse" / _MERGE_STATE_FILENAME
162 if merge_state_path.exists():
163 merge_state_path.unlink()
164 logger.debug("✅ Cleared MERGE_STATE.json")
165
166
167 def apply_resolution(
168 root: pathlib.Path,
169 rel_path: str,
170 object_id: str,
171 ) -> None:
172 """Restore a specific object version to ``muse-work/<rel_path>``.
173
174 Used by the ``muse merge --resolve`` workflow: after a user has chosen
175 which version of a conflicting file to keep, this function writes that
176 version into the working directory so ``muse commit`` can snapshot it.
177
178 Args:
179 root: Repository root (parent of ``.muse/``).
180 rel_path: Workspace-relative POSIX path of the conflicting file.
181 object_id: SHA-256 of the chosen resolution content in the object store.
182
183 Raises:
184 FileNotFoundError: When *object_id* is not present in the local store.
185 """
186 from muse.core.object_store import read_object
187
188 content = read_object(root, object_id)
189 if content is None:
190 raise FileNotFoundError(
191 f"Object {object_id[:8]} for '{rel_path}' not found in local store."
192 )
193 dest = root / "muse-work" / rel_path
194 dest.parent.mkdir(parents=True, exist_ok=True)
195 dest.write_bytes(content)
196 logger.debug("✅ Restored '%s' from object %s", rel_path, object_id[:8])
197
198
199 def is_conflict_resolved(merge_state: MergeState, rel_path: str) -> bool:
200 """Return ``True`` if *rel_path* is NOT listed as a conflict in *merge_state*."""
201 return rel_path not in merge_state.conflict_paths
202
203
204 # ---------------------------------------------------------------------------
205 # Pure merge functions (no I/O)
206 # ---------------------------------------------------------------------------
207
208
209 def diff_snapshots(
210 base_manifest: dict[str, str],
211 other_manifest: dict[str, str],
212 ) -> set[str]:
213 """Return the set of paths that differ between *base_manifest* and *other_manifest*.
214
215 A path is "different" if it was added (in *other* but not *base*), deleted
216 (in *base* but not *other*), or modified (present in both with different
217 content hashes).
218
219 Args:
220 base_manifest: Path → content-hash map for the ancestor snapshot.
221 other_manifest: Path → content-hash map for the other snapshot.
222
223 Returns:
224 Set of workspace-relative POSIX paths that differ.
225 """
226 base_paths = set(base_manifest.keys())
227 other_paths = set(other_manifest.keys())
228 added = other_paths - base_paths
229 deleted = base_paths - other_paths
230 common = base_paths & other_paths
231 modified = {p for p in common if base_manifest[p] != other_manifest[p]}
232 return added | deleted | modified
233
234
235 def detect_conflicts(
236 ours_changed: set[str],
237 theirs_changed: set[str],
238 ) -> set[str]:
239 """Return paths changed on *both* branches since the merge base."""
240 return ours_changed & theirs_changed
241
242
243 def apply_merge(
244 base_manifest: dict[str, str],
245 ours_manifest: dict[str, str],
246 theirs_manifest: dict[str, str],
247 ours_changed: set[str],
248 theirs_changed: set[str],
249 conflict_paths: set[str],
250 ) -> dict[str, str]:
251 """Build the merged snapshot manifest for a conflict-free 3-way merge.
252
253 Starts from *base_manifest* and applies non-conflicting changes from both
254 branches:
255
256 - Ours-only changes (in *ours_changed* but not *conflict_paths*) are taken
257 from *ours_manifest*. Deletions are handled by the absence of the path
258 in *ours_manifest*.
259 - Theirs-only changes (in *theirs_changed* but not *conflict_paths*) are
260 taken from *theirs_manifest* by the same logic.
261 - Paths in *conflict_paths* are excluded — callers must resolve them
262 separately before producing a final merged snapshot.
263
264 Args:
265 base_manifest: Path → content-hash for the common ancestor.
266 ours_manifest: Path → content-hash for our branch.
267 theirs_manifest: Path → content-hash for their branch.
268 ours_changed: Paths changed by our branch (from :func:`diff_snapshots`).
269 theirs_changed: Paths changed by their branch.
270 conflict_paths: Paths with concurrent changes — excluded from output.
271
272 Returns:
273 Merged path → content-hash mapping; conflict paths are absent.
274 """
275 merged: dict[str, str] = dict(base_manifest)
276 for path in ours_changed - conflict_paths:
277 if path in ours_manifest:
278 merged[path] = ours_manifest[path]
279 else:
280 merged.pop(path, None)
281 for path in theirs_changed - conflict_paths:
282 if path in theirs_manifest:
283 merged[path] = theirs_manifest[path]
284 else:
285 merged.pop(path, None)
286 return merged
287
288
289 # ---------------------------------------------------------------------------
290 # File-based merge base finder
291 # ---------------------------------------------------------------------------
292
293
294 def find_merge_base(
295 repo_root: pathlib.Path,
296 commit_id_a: str,
297 commit_id_b: str,
298 ) -> str | None:
299 """Find the Lowest Common Ancestor (LCA) of two commits.
300
301 Uses BFS to collect all ancestors of *commit_id_a* (inclusive), then
302 walks *commit_id_b*'s ancestor graph (BFS) until the first node found
303 in *a*'s ancestor set is reached.
304
305 Args:
306 repo_root: The repository root directory.
307 commit_id_a: First commit ID (e.g., current branch HEAD).
308 commit_id_b: Second commit ID (e.g., target branch HEAD).
309
310 Returns:
311 The LCA commit ID, or ``None`` if the commits share no common ancestor.
312 """
313 from muse.core.store import read_commit
314
315 def _all_ancestors(start: str) -> set[str]:
316 visited: set[str] = set()
317 queue: deque[str] = deque([start])
318 while queue:
319 cid = queue.popleft()
320 if cid in visited:
321 continue
322 visited.add(cid)
323 commit = read_commit(repo_root, cid)
324 if commit is None:
325 continue
326 if commit.parent_commit_id:
327 queue.append(commit.parent_commit_id)
328 if commit.parent2_commit_id:
329 queue.append(commit.parent2_commit_id)
330 return visited
331
332 a_ancestors = _all_ancestors(commit_id_a)
333
334 visited_b: set[str] = set()
335 queue_b: deque[str] = deque([commit_id_b])
336 while queue_b:
337 cid = queue_b.popleft()
338 if cid in visited_b:
339 continue
340 visited_b.add(cid)
341 if cid in a_ancestors:
342 return cid
343 commit = read_commit(repo_root, cid)
344 if commit is None:
345 continue
346 if commit.parent_commit_id:
347 queue_b.append(commit.parent_commit_id)
348 if commit.parent2_commit_id:
349 queue_b.append(commit.parent2_commit_id)
350
351 return None