"""Tests for muse.core.xml_safe — defusedxml typed adapter. These tests verify that: 1. SafeET.parse() correctly parses well-formed XML / MusicXML files. 2. SafeET.parse() blocks XML entity expansion attacks (Billion Laughs). 3. SafeET.parse() blocks external entity injection (XXE). 4. The returned ElementTree is a standard stdlib ElementTree instance. 5. The ParseError, Element, and ElementTree types are correctly re-exported. """ from __future__ import annotations import pathlib import xml.etree.ElementTree as StdET import pytest from muse.core.xml_safe import SafeET # --------------------------------------------------------------------------- # Helpers — test XML file factories # --------------------------------------------------------------------------- def _write(path: pathlib.Path, content: str) -> pathlib.Path: path.write_text(content, encoding="utf-8") return path def _minimal_musicxml(tmp_path: pathlib.Path) -> pathlib.Path: xml = """\ C44 """ return _write(tmp_path / "score.xml", xml) def _billion_laughs_xml(tmp_path: pathlib.Path) -> pathlib.Path: """Classic entity expansion DoS payload (Billion Laughs).""" xml = """\ ]> &lol4; """ return _write(tmp_path / "billion_laughs.xml", xml) def _xxe_file_xml(tmp_path: pathlib.Path) -> pathlib.Path: """External entity reference attempting to read /etc/passwd.""" xml = """\ ]> &xxe; """ return _write(tmp_path / "xxe.xml", xml) def _external_dtd_xml(tmp_path: pathlib.Path) -> pathlib.Path: """DTD pulled from an external URL — should be forbidden.""" xml = """\ data """ return _write(tmp_path / "ext_dtd.xml", xml) # --------------------------------------------------------------------------- # Happy path # --------------------------------------------------------------------------- class TestSafeETParse: def test_parses_minimal_musicxml(self, tmp_path: pathlib.Path) -> None: path = _minimal_musicxml(tmp_path) tree = SafeET.parse(path) assert tree is not None def test_returns_element_tree_instance(self, tmp_path: pathlib.Path) -> None: path = _minimal_musicxml(tmp_path) tree = SafeET.parse(path) assert isinstance(tree, StdET.ElementTree) def test_getroot_returns_element(self, tmp_path: pathlib.Path) -> None: path = _minimal_musicxml(tmp_path) tree = SafeET.parse(path) root = tree.getroot() assert root is not None assert root.tag == "score-partwise" def test_find_works_on_result(self, tmp_path: pathlib.Path) -> None: path = _minimal_musicxml(tmp_path) tree = SafeET.parse(path) root = tree.getroot() assert root is not None note = root.find(".//note") assert note is not None def test_accepts_str_path(self, tmp_path: pathlib.Path) -> None: path = _minimal_musicxml(tmp_path) tree = SafeET.parse(str(path)) assert tree.getroot() is not None def test_accepts_pathlib_path(self, tmp_path: pathlib.Path) -> None: path = _minimal_musicxml(tmp_path) tree = SafeET.parse(path) assert tree.getroot() is not None def test_nonexistent_file_raises(self, tmp_path: pathlib.Path) -> None: with pytest.raises((FileNotFoundError, StdET.ParseError)): SafeET.parse(tmp_path / "nonexistent.xml") # --------------------------------------------------------------------------- # Security — attack XML must be blocked # --------------------------------------------------------------------------- class TestSafeETSecurity: def test_billion_laughs_is_blocked(self, tmp_path: pathlib.Path) -> None: """Entity expansion (Billion Laughs DoS) must be rejected by defusedxml.""" path = _billion_laughs_xml(tmp_path) with pytest.raises(Exception): SafeET.parse(path) def test_xxe_is_blocked(self, tmp_path: pathlib.Path) -> None: """External entity reference (XXE credential theft) must be rejected.""" path = _xxe_file_xml(tmp_path) with pytest.raises(Exception): SafeET.parse(path) def test_external_dtd_reference_does_not_fetch(self, tmp_path: pathlib.Path) -> None: """An external DTD reference in the DOCTYPE must not trigger a network request. defusedxml either raises or parses without fetching; in both cases no network call should occur. We verify there is no connection attempt by relying on the offline test environment — if defusedxml silently ignores the SYSTEM reference the parse can succeed (the DTD is not actually applied), which is also acceptable. """ path = _external_dtd_xml(tmp_path) # defusedxml may raise or succeed — both are safe outcomes. # What is never acceptable: fetching the remote URL. try: SafeET.parse(path) except Exception: pass # Blocking the DTD by raising is the strictest safe outcome. # --------------------------------------------------------------------------- # Type re-exports # --------------------------------------------------------------------------- class TestSafeETReexports: def test_parse_error_is_xml_parse_error(self) -> None: """SafeET.ParseError must be the stdlib ParseError for generic catching.""" assert SafeET.ParseError is StdET.ParseError def test_element_is_xml_element(self) -> None: assert SafeET.Element is StdET.Element def test_element_tree_is_xml_element_tree(self) -> None: assert SafeET.ElementTree is StdET.ElementTree def test_parse_method_exists(self) -> None: assert callable(SafeET.parse)