indices.py
python
| 1 | """Optional local index layer for Muse repositories. |
| 2 | |
| 3 | Indexes live under ``.muse/indices/`` and are derived, versioned, and fully |
| 4 | rebuildable from the commit history. **No index is required for repository |
| 5 | correctness** — all commands work without them; indexes only accelerate |
| 6 | repeated queries. |
| 7 | |
| 8 | Available indexes |
| 9 | ----------------- |
| 10 | |
| 11 | ``symbol_history`` |
| 12 | Maps symbol addresses to their event timeline across all commits. |
| 13 | Enables O(1) ``muse symbol-log``, ``muse lineage``, and ``muse query-history`` |
| 14 | instead of O(commits × files) scans. |
| 15 | |
| 16 | Schema v1:: |
| 17 | |
| 18 | { |
| 19 | "schema_version": 1, |
| 20 | "index": "symbol_history", |
| 21 | "updated_at": "2026-03-18T12:00:00+00:00", |
| 22 | "entries": { |
| 23 | "src/billing.py::compute_total": [ |
| 24 | { |
| 25 | "commit_id": "<sha256>", |
| 26 | "committed_at": "2026-01-01T00:00:00+00:00", |
| 27 | "op": "insert", |
| 28 | "content_id": "<sha256>", |
| 29 | "body_hash": "<sha256>", |
| 30 | "signature_id": "<sha256>" |
| 31 | }, |
| 32 | ... |
| 33 | ], |
| 34 | ... |
| 35 | } |
| 36 | } |
| 37 | |
| 38 | ``hash_occurrence`` |
| 39 | Maps ``body_hash`` values to the list of symbol addresses that share them. |
| 40 | Enables O(1) ``muse clones`` and ``muse find-symbol hash=``. |
| 41 | |
| 42 | Schema v1:: |
| 43 | |
| 44 | { |
| 45 | "schema_version": 1, |
| 46 | "index": "hash_occurrence", |
| 47 | "updated_at": "2026-03-18T12:00:00+00:00", |
| 48 | "entries": { |
| 49 | "<body_hash>": ["src/billing.py::compute_total", ...] |
| 50 | } |
| 51 | } |
| 52 | |
| 53 | Rebuild |
| 54 | ------- |
| 55 | |
| 56 | Indexes are rebuilt by ``muse index rebuild``. They can also be built |
| 57 | incrementally: the ``update_*_index`` functions accept an existing index |
| 58 | dict and patch it rather than rebuilding from scratch. |
| 59 | """ |
| 60 | from __future__ import annotations |
| 61 | |
| 62 | import datetime |
| 63 | import json |
| 64 | import logging |
| 65 | import pathlib |
| 66 | |
| 67 | logger = logging.getLogger(__name__) |
| 68 | |
| 69 | _INDICES_DIR = pathlib.PurePosixPath(".muse") / "indices" |
| 70 | |
| 71 | _SCHEMA_VERSION = 1 |
| 72 | |
| 73 | |
| 74 | # --------------------------------------------------------------------------- |
| 75 | # Typed index entry shapes (TypedDicts) |
| 76 | # --------------------------------------------------------------------------- |
| 77 | |
| 78 | |
| 79 | class SymbolHistoryEntry: |
| 80 | """One event in a symbol's history timeline.""" |
| 81 | |
| 82 | __slots__ = ( |
| 83 | "commit_id", "committed_at", "op", |
| 84 | "content_id", "body_hash", "signature_id", |
| 85 | ) |
| 86 | |
| 87 | def __init__( |
| 88 | self, |
| 89 | commit_id: str, |
| 90 | committed_at: str, |
| 91 | op: str, |
| 92 | content_id: str, |
| 93 | body_hash: str, |
| 94 | signature_id: str, |
| 95 | ) -> None: |
| 96 | self.commit_id = commit_id |
| 97 | self.committed_at = committed_at |
| 98 | self.op = op |
| 99 | self.content_id = content_id |
| 100 | self.body_hash = body_hash |
| 101 | self.signature_id = signature_id |
| 102 | |
| 103 | def to_dict(self) -> dict[str, str]: |
| 104 | return { |
| 105 | "commit_id": self.commit_id, |
| 106 | "committed_at": self.committed_at, |
| 107 | "op": self.op, |
| 108 | "content_id": self.content_id, |
| 109 | "body_hash": self.body_hash, |
| 110 | "signature_id": self.signature_id, |
| 111 | } |
| 112 | |
| 113 | @classmethod |
| 114 | def from_dict(cls, d: dict[str, str]) -> "SymbolHistoryEntry": |
| 115 | return cls( |
| 116 | commit_id=d["commit_id"], |
| 117 | committed_at=d["committed_at"], |
| 118 | op=d["op"], |
| 119 | content_id=d["content_id"], |
| 120 | body_hash=d["body_hash"], |
| 121 | signature_id=d["signature_id"], |
| 122 | ) |
| 123 | |
| 124 | |
| 125 | # --------------------------------------------------------------------------- |
| 126 | # Index I/O helpers |
| 127 | # --------------------------------------------------------------------------- |
| 128 | |
| 129 | |
| 130 | def _indices_dir(root: pathlib.Path) -> pathlib.Path: |
| 131 | return root / ".muse" / "indices" |
| 132 | |
| 133 | |
| 134 | def _index_path(root: pathlib.Path, name: str) -> pathlib.Path: |
| 135 | return _indices_dir(root) / f"{name}.json" |
| 136 | |
| 137 | |
| 138 | def _ensure_dir(root: pathlib.Path) -> None: |
| 139 | _indices_dir(root).mkdir(parents=True, exist_ok=True) |
| 140 | |
| 141 | |
| 142 | def _now_iso() -> str: |
| 143 | return datetime.datetime.now(datetime.timezone.utc).isoformat() |
| 144 | |
| 145 | |
| 146 | # --------------------------------------------------------------------------- |
| 147 | # Symbol history index |
| 148 | # --------------------------------------------------------------------------- |
| 149 | |
| 150 | |
| 151 | SymbolHistoryIndex = dict[str, list[SymbolHistoryEntry]] |
| 152 | |
| 153 | |
| 154 | def load_symbol_history(root: pathlib.Path) -> SymbolHistoryIndex: |
| 155 | """Load the symbol history index, returning an empty dict if absent.""" |
| 156 | path = _index_path(root, "symbol_history") |
| 157 | if not path.exists(): |
| 158 | return {} |
| 159 | try: |
| 160 | raw = json.loads(path.read_text()) |
| 161 | result: SymbolHistoryIndex = {} |
| 162 | for address, entries in raw.get("entries", {}).items(): |
| 163 | result[address] = [SymbolHistoryEntry.from_dict(e) for e in entries] |
| 164 | return result |
| 165 | except (json.JSONDecodeError, KeyError) as exc: |
| 166 | logger.warning("⚠️ Corrupt symbol_history index: %s — returning empty", exc) |
| 167 | return {} |
| 168 | |
| 169 | |
| 170 | def save_symbol_history(root: pathlib.Path, index: SymbolHistoryIndex) -> None: |
| 171 | """Persist the symbol history index.""" |
| 172 | _ensure_dir(root) |
| 173 | path = _index_path(root, "symbol_history") |
| 174 | entries: dict[str, list[dict[str, str]]] = { |
| 175 | addr: [e.to_dict() for e in evts] |
| 176 | for addr, evts in sorted(index.items()) |
| 177 | } |
| 178 | data = { |
| 179 | "schema_version": _SCHEMA_VERSION, |
| 180 | "index": "symbol_history", |
| 181 | "updated_at": _now_iso(), |
| 182 | "entries": entries, |
| 183 | } |
| 184 | path.write_text(json.dumps(data, indent=2) + "\n") |
| 185 | logger.debug("✅ Saved symbol_history index (%d addresses)", len(index)) |
| 186 | |
| 187 | |
| 188 | # --------------------------------------------------------------------------- |
| 189 | # Hash occurrence index |
| 190 | # --------------------------------------------------------------------------- |
| 191 | |
| 192 | |
| 193 | HashOccurrenceIndex = dict[str, list[str]] |
| 194 | |
| 195 | |
| 196 | def load_hash_occurrence(root: pathlib.Path) -> HashOccurrenceIndex: |
| 197 | """Load the hash occurrence index, returning an empty dict if absent.""" |
| 198 | path = _index_path(root, "hash_occurrence") |
| 199 | if not path.exists(): |
| 200 | return {} |
| 201 | try: |
| 202 | raw = json.loads(path.read_text()) |
| 203 | result: HashOccurrenceIndex = {} |
| 204 | for body_hash, addresses in raw.get("entries", {}).items(): |
| 205 | result[body_hash] = list(addresses) |
| 206 | return result |
| 207 | except (json.JSONDecodeError, KeyError) as exc: |
| 208 | logger.warning("⚠️ Corrupt hash_occurrence index: %s — returning empty", exc) |
| 209 | return {} |
| 210 | |
| 211 | |
| 212 | def save_hash_occurrence(root: pathlib.Path, index: HashOccurrenceIndex) -> None: |
| 213 | """Persist the hash occurrence index.""" |
| 214 | _ensure_dir(root) |
| 215 | path = _index_path(root, "hash_occurrence") |
| 216 | data = { |
| 217 | "schema_version": _SCHEMA_VERSION, |
| 218 | "index": "hash_occurrence", |
| 219 | "updated_at": _now_iso(), |
| 220 | "entries": {h: sorted(addrs) for h, addrs in sorted(index.items())}, |
| 221 | } |
| 222 | path.write_text(json.dumps(data, indent=2) + "\n") |
| 223 | logger.debug("✅ Saved hash_occurrence index (%d hashes)", len(index)) |
| 224 | |
| 225 | |
| 226 | # --------------------------------------------------------------------------- |
| 227 | # Index metadata |
| 228 | # --------------------------------------------------------------------------- |
| 229 | |
| 230 | |
| 231 | def index_info(root: pathlib.Path) -> list[dict[str, str]]: |
| 232 | """Return status information about all known indexes.""" |
| 233 | names = ["symbol_history", "hash_occurrence"] |
| 234 | result: list[dict[str, str]] = [] |
| 235 | for name in names: |
| 236 | path = _index_path(root, name) |
| 237 | if path.exists(): |
| 238 | try: |
| 239 | raw = json.loads(path.read_text()) |
| 240 | updated_at = raw.get("updated_at", "unknown") |
| 241 | entries = len(raw.get("entries", {})) |
| 242 | result.append({ |
| 243 | "name": name, |
| 244 | "status": "present", |
| 245 | "updated_at": updated_at, |
| 246 | "entries": str(entries), |
| 247 | }) |
| 248 | except (json.JSONDecodeError, KeyError): |
| 249 | result.append({"name": name, "status": "corrupt", "updated_at": "", "entries": "0"}) |
| 250 | else: |
| 251 | result.append({"name": name, "status": "absent", "updated_at": "", "entries": "0"}) |
| 252 | return result |