gabriel / muse public
test_toml_adapter.py python
509 lines 19.9 KB
9d49af7a feat: semantic TOML and Markdown adapters for the code domain plugin Gabriel Cardona <gabriel@tellurstori.com> 1d ago
1 """Tests for the TOML language adapter (TomlAdapter) in ast_parser.py.
2
3 Coverage
4 --------
5 - Extension routing via :func:`adapter_for_path`.
6 - Symbol extraction: scalars, tables, nested tables, array-of-tables.
7 - Edge cases: empty file, comments-only, invalid TOML, mixed lists.
8 - Semantic content IDs: comment-insensitive, key-order-insensitive,
9 whitespace-insensitive, date-stable.
10 - Rename detection via ``body_hash``.
11 - ``canonical_key`` uniqueness within a snapshot.
12 - Depth limit: symbols beyond ``_MAX_DEPTH`` are not emitted.
13 - Real-world ``pyproject.toml``-shaped fixture.
14 """
15
16 from __future__ import annotations
17
18 import pytest
19
20 from muse.plugins.code.ast_parser import (
21 FallbackAdapter,
22 TomlAdapter,
23 adapter_for_path,
24 )
25
26
27 # ---------------------------------------------------------------------------
28 # Fixtures
29 # ---------------------------------------------------------------------------
30
31
32 @pytest.fixture()
33 def adapter() -> TomlAdapter:
34 """Return a fresh TomlAdapter instance for each test."""
35 return TomlAdapter()
36
37
38 # ---------------------------------------------------------------------------
39 # Extension routing
40 # ---------------------------------------------------------------------------
41
42
43 class TestExtensionRouting:
44 """adapter_for_path must route .toml to TomlAdapter."""
45
46 def test_supported_extensions_contains_toml(self, adapter: TomlAdapter) -> None:
47 assert ".toml" in adapter.supported_extensions()
48
49 def test_supported_extensions_is_toml_only(self, adapter: TomlAdapter) -> None:
50 assert adapter.supported_extensions() == frozenset({".toml"})
51
52 def test_adapter_for_path_flat(self) -> None:
53 assert isinstance(adapter_for_path("pyproject.toml"), TomlAdapter)
54
55 def test_adapter_for_path_nested(self) -> None:
56 assert isinstance(adapter_for_path("config/settings.toml"), TomlAdapter)
57
58 def test_adapter_for_path_does_not_match_py(self) -> None:
59 assert not isinstance(adapter_for_path("main.py"), TomlAdapter)
60
61 def test_adapter_for_path_does_not_match_yaml(self) -> None:
62 # .yaml has no dedicated adapter → FallbackAdapter
63 assert isinstance(adapter_for_path("config.yaml"), FallbackAdapter)
64
65
66 # ---------------------------------------------------------------------------
67 # Scalar key-value pairs → variable symbols
68 # ---------------------------------------------------------------------------
69
70
71 class TestScalarSymbols:
72 """Scalar TOML values produce ``variable`` kind symbols."""
73
74 def test_string_value(self, adapter: TomlAdapter) -> None:
75 src = b'name = "muse"\n'
76 syms = adapter.parse_symbols(src, "p.toml")
77 assert "p.toml::name" in syms
78 s = syms["p.toml::name"]
79 assert s["kind"] == "variable"
80 assert s["name"] == "name"
81 assert s["qualified_name"] == "name"
82
83 def test_integer_value(self, adapter: TomlAdapter) -> None:
84 src = b"port = 8080\n"
85 syms = adapter.parse_symbols(src, "cfg.toml")
86 assert "cfg.toml::port" in syms
87 assert syms["cfg.toml::port"]["kind"] == "variable"
88
89 def test_boolean_value(self, adapter: TomlAdapter) -> None:
90 src = b"strict = true\n"
91 syms = adapter.parse_symbols(src, "mypy.toml")
92 assert "mypy.toml::strict" in syms
93 assert syms["mypy.toml::strict"]["kind"] == "variable"
94
95 def test_float_value(self, adapter: TomlAdapter) -> None:
96 src = b"threshold = 0.95\n"
97 syms = adapter.parse_symbols(src, "c.toml")
98 assert "c.toml::threshold" in syms
99
100 def test_list_of_strings_is_variable(self, adapter: TomlAdapter) -> None:
101 """A list whose elements are not all dicts is treated as a variable."""
102 src = b'deps = ["typer", "mido"]\n'
103 syms = adapter.parse_symbols(src, "p.toml")
104 assert "p.toml::deps" in syms
105 assert syms["p.toml::deps"]["kind"] == "variable"
106
107 def test_multiple_top_level_scalars(self, adapter: TomlAdapter) -> None:
108 src = b'name = "foo"\nversion = "1.0"\nbuild = 42\n'
109 syms = adapter.parse_symbols(src, "p.toml")
110 assert "p.toml::name" in syms
111 assert "p.toml::version" in syms
112 assert "p.toml::build" in syms
113
114
115 # ---------------------------------------------------------------------------
116 # Tables → section symbols
117 # ---------------------------------------------------------------------------
118
119
120 class TestTableSymbols:
121 """TOML tables emit ``section`` symbols; their scalar children emit ``variable``."""
122
123 def test_simple_table_section(self, adapter: TomlAdapter) -> None:
124 src = b'[project]\nname = "muse"\n'
125 syms = adapter.parse_symbols(src, "p.toml")
126 assert "p.toml::project" in syms
127 assert syms["p.toml::project"]["kind"] == "section"
128
129 def test_simple_table_child_variable(self, adapter: TomlAdapter) -> None:
130 src = b'[project]\nname = "muse"\n'
131 syms = adapter.parse_symbols(src, "p.toml")
132 assert "p.toml::project.name" in syms
133 assert syms["p.toml::project.name"]["kind"] == "variable"
134 assert syms["p.toml::project.name"]["qualified_name"] == "project.name"
135
136 def test_nested_table_emits_all_levels(self, adapter: TomlAdapter) -> None:
137 src = b"[tool.mypy]\nstrict = true\n"
138 syms = adapter.parse_symbols(src, "p.toml")
139 # [tool] is an implicit table — still emitted.
140 assert "p.toml::tool" in syms
141 assert "p.toml::tool.mypy" in syms
142 assert "p.toml::tool.mypy.strict" in syms
143
144 def test_table_name_preserves_hyphens(self, adapter: TomlAdapter) -> None:
145 src = b"[build-system]\nrequires = []\n"
146 syms = adapter.parse_symbols(src, "p.toml")
147 assert "p.toml::build-system" in syms
148 assert syms["p.toml::build-system"]["kind"] == "section"
149
150 def test_multiple_sibling_tables(self, adapter: TomlAdapter) -> None:
151 src = b"[a]\nx = 1\n\n[b]\ny = 2\n"
152 syms = adapter.parse_symbols(src, "c.toml")
153 assert "c.toml::a" in syms
154 assert "c.toml::b" in syms
155 assert "c.toml::a.x" in syms
156 assert "c.toml::b.y" in syms
157
158 def test_file_path_used_as_prefix(self, adapter: TomlAdapter) -> None:
159 src = b'[project]\nname = "x"\n'
160 syms = adapter.parse_symbols(src, "sub/dir/p.toml")
161 assert "sub/dir/p.toml::project" in syms
162 assert "sub/dir/p.toml::project.name" in syms
163
164
165 # ---------------------------------------------------------------------------
166 # Array of tables → indexed section symbols
167 # ---------------------------------------------------------------------------
168
169
170 class TestArrayOfTableSymbols:
171 """[[array.of.tables]] entries become indexed ``section`` symbols."""
172
173 def test_single_entry(self, adapter: TomlAdapter) -> None:
174 src = b"[[servers]]\nname = 'alpha'\n"
175 syms = adapter.parse_symbols(src, "c.toml")
176 assert "c.toml::servers[0]" in syms
177 assert syms["c.toml::servers[0]"]["kind"] == "section"
178
179 def test_multiple_entries_indexed(self, adapter: TomlAdapter) -> None:
180 src = (
181 b"[[tool.mypy.overrides]]\n"
182 b"module = ['mido']\n"
183 b"ignore_missing_imports = true\n\n"
184 b"[[tool.mypy.overrides]]\n"
185 b"module = ['tree_sitter']\n"
186 b"ignore_missing_imports = true\n"
187 )
188 syms = adapter.parse_symbols(src, "p.toml")
189 assert "p.toml::tool.mypy.overrides[0]" in syms
190 assert "p.toml::tool.mypy.overrides[1]" in syms
191
192 def test_array_entry_children_emitted(self, adapter: TomlAdapter) -> None:
193 src = b"[[servers]]\nname = 'alpha'\nport = 8080\n"
194 syms = adapter.parse_symbols(src, "c.toml")
195 assert "c.toml::servers[0].name" in syms
196 assert "c.toml::servers[0].port" in syms
197
198 def test_different_entries_different_content_ids(
199 self, adapter: TomlAdapter
200 ) -> None:
201 src = (
202 b"[[deps]]\nname = 'typer'\n\n"
203 b"[[deps]]\nname = 'mido'\n"
204 )
205 syms = adapter.parse_symbols(src, "p.toml")
206 assert (
207 syms["p.toml::deps[0]"]["content_id"]
208 != syms["p.toml::deps[1]"]["content_id"]
209 )
210
211
212 # ---------------------------------------------------------------------------
213 # Edge cases
214 # ---------------------------------------------------------------------------
215
216
217 class TestEdgeCases:
218 """Degenerate inputs must never raise; empty dicts are returned."""
219
220 def test_empty_file(self, adapter: TomlAdapter) -> None:
221 assert adapter.parse_symbols(b"", "e.toml") == {}
222
223 def test_comments_only(self, adapter: TomlAdapter) -> None:
224 src = b"# This is a comment\n# Another\n"
225 assert adapter.parse_symbols(src, "c.toml") == {}
226
227 def test_invalid_toml_returns_empty(self, adapter: TomlAdapter) -> None:
228 src = b"invalid toml [[[[\n"
229 assert adapter.parse_symbols(src, "bad.toml") == {}
230
231 def test_duplicate_key_invalid_toml(self, adapter: TomlAdapter) -> None:
232 """TOML forbids duplicate keys — parser should reject, adapter returns {}."""
233 src = b'name = "a"\nname = "b"\n'
234 assert adapter.parse_symbols(src, "dup.toml") == {}
235
236 def test_empty_table(self, adapter: TomlAdapter) -> None:
237 src = b"[project]\n"
238 syms = adapter.parse_symbols(src, "p.toml")
239 # The section itself is emitted even though it has no children.
240 assert "p.toml::project" in syms
241
242 def test_mixed_list_is_variable(self, adapter: TomlAdapter) -> None:
243 """A list mixing dicts and scalars is treated as a variable, not a section."""
244 src = b"mixed = [1, {key = 'val'}]\n"
245 syms = adapter.parse_symbols(src, "m.toml")
246 assert "m.toml::mixed" in syms
247 assert syms["m.toml::mixed"]["kind"] == "variable"
248
249
250 # ---------------------------------------------------------------------------
251 # Semantic content ID (file_content_id)
252 # ---------------------------------------------------------------------------
253
254
255 class TestFileContentID:
256 """file_content_id must be deterministic and semantics-based."""
257
258 def test_same_content_same_id(self, adapter: TomlAdapter) -> None:
259 src = b'[project]\nname = "muse"\n'
260 assert adapter.file_content_id(src) == adapter.file_content_id(src)
261
262 def test_different_value_different_id(self, adapter: TomlAdapter) -> None:
263 src1 = b'name = "muse"\n'
264 src2 = b'name = "musehub"\n'
265 assert adapter.file_content_id(src1) != adapter.file_content_id(src2)
266
267 def test_comment_insensitive(self, adapter: TomlAdapter) -> None:
268 src1 = b'name = "muse"\n'
269 src2 = b'# A leading comment\nname = "muse"\n'
270 assert adapter.file_content_id(src1) == adapter.file_content_id(src2)
271
272 def test_key_order_insensitive(self, adapter: TomlAdapter) -> None:
273 src1 = b'name = "muse"\nversion = "1.0"\n'
274 src2 = b'version = "1.0"\nname = "muse"\n'
275 assert adapter.file_content_id(src1) == adapter.file_content_id(src2)
276
277 def test_whitespace_insensitive(self, adapter: TomlAdapter) -> None:
278 src1 = b'name="muse"\n'
279 src2 = b'name = "muse"\n'
280 assert adapter.file_content_id(src1) == adapter.file_content_id(src2)
281
282 def test_invalid_toml_falls_back_to_raw_hash(self, adapter: TomlAdapter) -> None:
283 """Malformed TOML must not raise — falls back to raw-bytes SHA-256."""
284 src = b"invalid [[[[\n"
285 result = adapter.file_content_id(src)
286 assert isinstance(result, str)
287 assert len(result) == 64 # SHA-256 hex digest length
288
289 def test_returns_hex_string(self, adapter: TomlAdapter) -> None:
290 src = b'[project]\nname = "muse"\n'
291 result = adapter.file_content_id(src)
292 assert all(c in "0123456789abcdef" for c in result)
293 assert len(result) == 64
294
295
296 # ---------------------------------------------------------------------------
297 # Per-symbol content IDs and rename detection
298 # ---------------------------------------------------------------------------
299
300
301 class TestSymbolContentIDs:
302 """Symbol-level content_id and body_hash must enable rename detection."""
303
304 def test_content_id_changes_on_value_change(self, adapter: TomlAdapter) -> None:
305 src1 = b'[project]\nname = "muse"\n'
306 src2 = b'[project]\nname = "musehub"\n'
307 syms1 = adapter.parse_symbols(src1, "p.toml")
308 syms2 = adapter.parse_symbols(src2, "p.toml")
309 assert (
310 syms1["p.toml::project.name"]["content_id"]
311 != syms2["p.toml::project.name"]["content_id"]
312 )
313
314 def test_body_hash_same_for_same_value_different_key(
315 self, adapter: TomlAdapter
316 ) -> None:
317 """Rename detection: same scalar value under different keys → same body_hash."""
318 src = b'[a]\nfoo = "bar"\n\n[b]\nbaz = "bar"\n'
319 syms = adapter.parse_symbols(src, "c.toml")
320 assert syms["c.toml::a.foo"]["body_hash"] == syms["c.toml::b.baz"]["body_hash"]
321
322 def test_body_hash_differs_for_different_values(
323 self, adapter: TomlAdapter
324 ) -> None:
325 src = b'x = "hello"\ny = "world"\n'
326 syms = adapter.parse_symbols(src, "c.toml")
327 assert syms["c.toml::x"]["body_hash"] != syms["c.toml::y"]["body_hash"]
328
329 def test_table_content_id_stable_across_key_order(
330 self, adapter: TomlAdapter
331 ) -> None:
332 """Table content_id is stable regardless of internal key order."""
333 src1 = b'[project]\nname = "muse"\nversion = "1.0"\n'
334 src2 = b'[project]\nversion = "1.0"\nname = "muse"\n'
335 syms1 = adapter.parse_symbols(src1, "p.toml")
336 syms2 = adapter.parse_symbols(src2, "p.toml")
337 assert (
338 syms1["p.toml::project"]["content_id"]
339 == syms2["p.toml::project"]["content_id"]
340 )
341
342 def test_section_content_id_changes_when_child_changes(
343 self, adapter: TomlAdapter
344 ) -> None:
345 src1 = b'[project]\nname = "muse"\n'
346 src2 = b'[project]\nname = "musehub"\n'
347 syms1 = adapter.parse_symbols(src1, "p.toml")
348 syms2 = adapter.parse_symbols(src2, "p.toml")
349 assert (
350 syms1["p.toml::project"]["content_id"]
351 != syms2["p.toml::project"]["content_id"]
352 )
353
354
355 # ---------------------------------------------------------------------------
356 # canonical_key uniqueness
357 # ---------------------------------------------------------------------------
358
359
360 class TestCanonicalKeyUniqueness:
361 """canonical_key must be unique within a snapshot."""
362
363 def test_flat_keys_unique(self, adapter: TomlAdapter) -> None:
364 src = b'a = 1\nb = 2\nc = 3\n'
365 syms = adapter.parse_symbols(src, "c.toml")
366 keys = [s["canonical_key"] for s in syms.values()]
367 assert len(keys) == len(set(keys))
368
369 def test_mixed_tables_and_scalars_unique(self, adapter: TomlAdapter) -> None:
370 src = (
371 b'name = "muse"\n'
372 b"[project]\n"
373 b'version = "1.0"\n'
374 b"[tool.mypy]\n"
375 b"strict = true\n"
376 )
377 syms = adapter.parse_symbols(src, "p.toml")
378 keys = [s["canonical_key"] for s in syms.values()]
379 assert len(keys) == len(set(keys))
380
381 def test_array_of_tables_entries_unique(self, adapter: TomlAdapter) -> None:
382 src = (
383 b"[[overrides]]\nmodule = 'a'\n\n"
384 b"[[overrides]]\nmodule = 'b'\n\n"
385 b"[[overrides]]\nmodule = 'c'\n"
386 )
387 syms = adapter.parse_symbols(src, "p.toml")
388 keys = [s["canonical_key"] for s in syms.values()]
389 assert len(keys) == len(set(keys))
390
391
392 # ---------------------------------------------------------------------------
393 # Depth limit
394 # ---------------------------------------------------------------------------
395
396
397 class TestDepthLimit:
398 """Symbols deeper than _MAX_DEPTH (6) must not be emitted."""
399
400 def test_max_depth_not_exceeded(self, adapter: TomlAdapter) -> None:
401 # TOML dotted keys: a.b.c.d.e.f.g = "deep" creates 7-level nesting.
402 # [a.b.c.d.e.f.g] header syntax is valid TOML.
403 src = b"[a.b.c.d.e.f.g]\nkey = 'val'\n"
404 syms = adapter.parse_symbols(src, "d.toml")
405 # Levels 1-6 are within limit and should appear.
406 assert "d.toml::a" in syms
407 assert "d.toml::a.b" in syms
408 assert "d.toml::a.b.c" in syms
409 # Level 7 key inside a level-7 section exceeds _MAX_DEPTH.
410 assert "d.toml::a.b.c.d.e.f.g.key" not in syms
411
412 def test_within_depth_limit_emitted(self, adapter: TomlAdapter) -> None:
413 src = b"[a.b.c]\nkey = 'val'\n"
414 syms = adapter.parse_symbols(src, "d.toml")
415 assert "d.toml::a.b.c.key" in syms
416
417
418 # ---------------------------------------------------------------------------
419 # Real-world pyproject.toml shape
420 # ---------------------------------------------------------------------------
421
422
423 class TestRealWorldShape:
424 """Validate symbol extraction against a realistic pyproject.toml structure."""
425
426 _PYPROJECT = b"""
427 [project]
428 name = "muse"
429 version = "0.1.5"
430 description = "Domain-agnostic version control"
431
432 [project.scripts]
433 muse = "muse.cli.app:main"
434
435 [build-system]
436 requires = ["hatchling>=1.29.0"]
437 build-backend = "hatchling.build"
438
439 [tool.mypy]
440 python_version = "3.14"
441 strict = true
442
443 [[tool.mypy.overrides]]
444 module = ["mido"]
445 ignore_missing_imports = true
446
447 [[tool.mypy.overrides]]
448 module = ["tree_sitter"]
449 ignore_missing_imports = true
450 """
451
452 def test_top_level_sections_present(self, adapter: TomlAdapter) -> None:
453 syms = adapter.parse_symbols(self._PYPROJECT, "pyproject.toml")
454 assert "pyproject.toml::project" in syms
455 assert "pyproject.toml::build-system" in syms
456 assert "pyproject.toml::tool" in syms
457 assert "pyproject.toml::tool.mypy" in syms
458
459 def test_scalar_children_present(self, adapter: TomlAdapter) -> None:
460 syms = adapter.parse_symbols(self._PYPROJECT, "pyproject.toml")
461 assert "pyproject.toml::project.name" in syms
462 assert "pyproject.toml::project.version" in syms
463 assert "pyproject.toml::project.description" in syms
464 assert "pyproject.toml::tool.mypy.strict" in syms
465
466 def test_nested_table_present(self, adapter: TomlAdapter) -> None:
467 syms = adapter.parse_symbols(self._PYPROJECT, "pyproject.toml")
468 assert "pyproject.toml::project.scripts" in syms
469
470 def test_array_of_tables_indexed(self, adapter: TomlAdapter) -> None:
471 syms = adapter.parse_symbols(self._PYPROJECT, "pyproject.toml")
472 assert "pyproject.toml::tool.mypy.overrides[0]" in syms
473 assert "pyproject.toml::tool.mypy.overrides[1]" in syms
474
475 def test_all_canonical_keys_unique(self, adapter: TomlAdapter) -> None:
476 syms = adapter.parse_symbols(self._PYPROJECT, "pyproject.toml")
477 keys = [s["canonical_key"] for s in syms.values()]
478 assert len(keys) == len(set(keys)), "Duplicate canonical_keys detected"
479
480 def test_comment_and_reorder_stable_file_id(self, adapter: TomlAdapter) -> None:
481 """Adding a comment or reordering keys must not change file_content_id."""
482 src_with_comment = b'# Top comment\n' + self._PYPROJECT
483 assert adapter.file_content_id(self._PYPROJECT) == adapter.file_content_id(
484 src_with_comment
485 )
486
487 def test_version_change_detected(self, adapter: TomlAdapter) -> None:
488 v1 = self._PYPROJECT
489 v2 = v1.replace(b'version = "0.1.5"', b'version = "0.2.0"')
490 assert adapter.file_content_id(v1) != adapter.file_content_id(v2)
491 syms1 = adapter.parse_symbols(v1, "pyproject.toml")
492 syms2 = adapter.parse_symbols(v2, "pyproject.toml")
493 assert (
494 syms1["pyproject.toml::project.version"]["content_id"]
495 != syms2["pyproject.toml::project.version"]["content_id"]
496 )
497
498 def test_unrelated_section_change_does_not_affect_other_content_id(
499 self, adapter: TomlAdapter
500 ) -> None:
501 """Changing [tool.mypy] must not change project.name content_id."""
502 v1 = self._PYPROJECT
503 v2 = v1.replace(b"strict = true", b"strict = false")
504 syms1 = adapter.parse_symbols(v1, "pyproject.toml")
505 syms2 = adapter.parse_symbols(v2, "pyproject.toml")
506 assert (
507 syms1["pyproject.toml::project.name"]["content_id"]
508 == syms2["pyproject.toml::project.name"]["content_id"]
509 )