gabriel / muse public
indices.py python
252 lines 7.9 KB
6647e2c9 feat(code): Phase 3 — .muse/indices/ infrastructure and muse index command Gabriel Cardona <gabriel@tellurstori.com> 5d ago
1 """Optional local index layer for Muse repositories.
2
3 Indexes live under ``.muse/indices/`` and are derived, versioned, and fully
4 rebuildable from the commit history. **No index is required for repository
5 correctness** — all commands work without them; indexes only accelerate
6 repeated queries.
7
8 Available indexes
9 -----------------
10
11 ``symbol_history``
12 Maps symbol addresses to their event timeline across all commits.
13 Enables O(1) ``muse symbol-log``, ``muse lineage``, and ``muse query-history``
14 instead of O(commits × files) scans.
15
16 Schema v1::
17
18 {
19 "schema_version": 1,
20 "index": "symbol_history",
21 "updated_at": "2026-03-18T12:00:00+00:00",
22 "entries": {
23 "src/billing.py::compute_total": [
24 {
25 "commit_id": "<sha256>",
26 "committed_at": "2026-01-01T00:00:00+00:00",
27 "op": "insert",
28 "content_id": "<sha256>",
29 "body_hash": "<sha256>",
30 "signature_id": "<sha256>"
31 },
32 ...
33 ],
34 ...
35 }
36 }
37
38 ``hash_occurrence``
39 Maps ``body_hash`` values to the list of symbol addresses that share them.
40 Enables O(1) ``muse clones`` and ``muse find-symbol hash=``.
41
42 Schema v1::
43
44 {
45 "schema_version": 1,
46 "index": "hash_occurrence",
47 "updated_at": "2026-03-18T12:00:00+00:00",
48 "entries": {
49 "<body_hash>": ["src/billing.py::compute_total", ...]
50 }
51 }
52
53 Rebuild
54 -------
55
56 Indexes are rebuilt by ``muse index rebuild``. They can also be built
57 incrementally: the ``update_*_index`` functions accept an existing index
58 dict and patch it rather than rebuilding from scratch.
59 """
60 from __future__ import annotations
61
62 import datetime
63 import json
64 import logging
65 import pathlib
66
67 logger = logging.getLogger(__name__)
68
69 _INDICES_DIR = pathlib.PurePosixPath(".muse") / "indices"
70
71 _SCHEMA_VERSION = 1
72
73
74 # ---------------------------------------------------------------------------
75 # Typed index entry shapes (TypedDicts)
76 # ---------------------------------------------------------------------------
77
78
79 class SymbolHistoryEntry:
80 """One event in a symbol's history timeline."""
81
82 __slots__ = (
83 "commit_id", "committed_at", "op",
84 "content_id", "body_hash", "signature_id",
85 )
86
87 def __init__(
88 self,
89 commit_id: str,
90 committed_at: str,
91 op: str,
92 content_id: str,
93 body_hash: str,
94 signature_id: str,
95 ) -> None:
96 self.commit_id = commit_id
97 self.committed_at = committed_at
98 self.op = op
99 self.content_id = content_id
100 self.body_hash = body_hash
101 self.signature_id = signature_id
102
103 def to_dict(self) -> dict[str, str]:
104 return {
105 "commit_id": self.commit_id,
106 "committed_at": self.committed_at,
107 "op": self.op,
108 "content_id": self.content_id,
109 "body_hash": self.body_hash,
110 "signature_id": self.signature_id,
111 }
112
113 @classmethod
114 def from_dict(cls, d: dict[str, str]) -> "SymbolHistoryEntry":
115 return cls(
116 commit_id=d["commit_id"],
117 committed_at=d["committed_at"],
118 op=d["op"],
119 content_id=d["content_id"],
120 body_hash=d["body_hash"],
121 signature_id=d["signature_id"],
122 )
123
124
125 # ---------------------------------------------------------------------------
126 # Index I/O helpers
127 # ---------------------------------------------------------------------------
128
129
130 def _indices_dir(root: pathlib.Path) -> pathlib.Path:
131 return root / ".muse" / "indices"
132
133
134 def _index_path(root: pathlib.Path, name: str) -> pathlib.Path:
135 return _indices_dir(root) / f"{name}.json"
136
137
138 def _ensure_dir(root: pathlib.Path) -> None:
139 _indices_dir(root).mkdir(parents=True, exist_ok=True)
140
141
142 def _now_iso() -> str:
143 return datetime.datetime.now(datetime.timezone.utc).isoformat()
144
145
146 # ---------------------------------------------------------------------------
147 # Symbol history index
148 # ---------------------------------------------------------------------------
149
150
151 SymbolHistoryIndex = dict[str, list[SymbolHistoryEntry]]
152
153
154 def load_symbol_history(root: pathlib.Path) -> SymbolHistoryIndex:
155 """Load the symbol history index, returning an empty dict if absent."""
156 path = _index_path(root, "symbol_history")
157 if not path.exists():
158 return {}
159 try:
160 raw = json.loads(path.read_text())
161 result: SymbolHistoryIndex = {}
162 for address, entries in raw.get("entries", {}).items():
163 result[address] = [SymbolHistoryEntry.from_dict(e) for e in entries]
164 return result
165 except (json.JSONDecodeError, KeyError) as exc:
166 logger.warning("⚠️ Corrupt symbol_history index: %s — returning empty", exc)
167 return {}
168
169
170 def save_symbol_history(root: pathlib.Path, index: SymbolHistoryIndex) -> None:
171 """Persist the symbol history index."""
172 _ensure_dir(root)
173 path = _index_path(root, "symbol_history")
174 entries: dict[str, list[dict[str, str]]] = {
175 addr: [e.to_dict() for e in evts]
176 for addr, evts in sorted(index.items())
177 }
178 data = {
179 "schema_version": _SCHEMA_VERSION,
180 "index": "symbol_history",
181 "updated_at": _now_iso(),
182 "entries": entries,
183 }
184 path.write_text(json.dumps(data, indent=2) + "\n")
185 logger.debug("✅ Saved symbol_history index (%d addresses)", len(index))
186
187
188 # ---------------------------------------------------------------------------
189 # Hash occurrence index
190 # ---------------------------------------------------------------------------
191
192
193 HashOccurrenceIndex = dict[str, list[str]]
194
195
196 def load_hash_occurrence(root: pathlib.Path) -> HashOccurrenceIndex:
197 """Load the hash occurrence index, returning an empty dict if absent."""
198 path = _index_path(root, "hash_occurrence")
199 if not path.exists():
200 return {}
201 try:
202 raw = json.loads(path.read_text())
203 result: HashOccurrenceIndex = {}
204 for body_hash, addresses in raw.get("entries", {}).items():
205 result[body_hash] = list(addresses)
206 return result
207 except (json.JSONDecodeError, KeyError) as exc:
208 logger.warning("⚠️ Corrupt hash_occurrence index: %s — returning empty", exc)
209 return {}
210
211
212 def save_hash_occurrence(root: pathlib.Path, index: HashOccurrenceIndex) -> None:
213 """Persist the hash occurrence index."""
214 _ensure_dir(root)
215 path = _index_path(root, "hash_occurrence")
216 data = {
217 "schema_version": _SCHEMA_VERSION,
218 "index": "hash_occurrence",
219 "updated_at": _now_iso(),
220 "entries": {h: sorted(addrs) for h, addrs in sorted(index.items())},
221 }
222 path.write_text(json.dumps(data, indent=2) + "\n")
223 logger.debug("✅ Saved hash_occurrence index (%d hashes)", len(index))
224
225
226 # ---------------------------------------------------------------------------
227 # Index metadata
228 # ---------------------------------------------------------------------------
229
230
231 def index_info(root: pathlib.Path) -> list[dict[str, str]]:
232 """Return status information about all known indexes."""
233 names = ["symbol_history", "hash_occurrence"]
234 result: list[dict[str, str]] = []
235 for name in names:
236 path = _index_path(root, name)
237 if path.exists():
238 try:
239 raw = json.loads(path.read_text())
240 updated_at = raw.get("updated_at", "unknown")
241 entries = len(raw.get("entries", {}))
242 result.append({
243 "name": name,
244 "status": "present",
245 "updated_at": updated_at,
246 "entries": str(entries),
247 })
248 except (json.JSONDecodeError, KeyError):
249 result.append({"name": name, "status": "corrupt", "updated_at": "", "entries": "0"})
250 else:
251 result.append({"name": name, "status": "absent", "updated_at": "", "entries": "0"})
252 return result