cgcardona / muse public
test_core_validation.py python
555 lines 18.3 KB
368bcde6 Add security test coverage and reference documentation Gabriel Cardona <gabriel@tellurstori.com> 9h ago
1 """Tests for muse.core.validation — all trust-boundary primitives.
2
3 Every function in the validation module operates on untrusted input and must
4 either return a safe value or raise ValueError / TypeError with a descriptive
5 message. These tests verify correctness of the allow-lists, reject-lists, and
6 edge cases for each guard.
7 """
8
9 from __future__ import annotations
10
11 import math
12 import pathlib
13
14 import pytest
15
16 from muse.core.validation import (
17 MAX_FILE_BYTES,
18 MAX_RESPONSE_BYTES,
19 MAX_SYSEX_BYTES,
20 clamp_int,
21 contain_path,
22 finite_float,
23 sanitize_display,
24 sanitize_glob_prefix,
25 validate_branch_name,
26 validate_domain_name,
27 validate_object_id,
28 validate_ref_id,
29 validate_repo_id,
30 )
31
32
33 # ---------------------------------------------------------------------------
34 # Constants
35 # ---------------------------------------------------------------------------
36
37
38 class TestConstants:
39 def test_max_file_bytes_is_256mb(self) -> None:
40 assert MAX_FILE_BYTES == 256 * 1024 * 1024
41
42 def test_max_response_bytes_is_64mb(self) -> None:
43 assert MAX_RESPONSE_BYTES == 64 * 1024 * 1024
44
45 def test_max_sysex_bytes_is_64kib(self) -> None:
46 assert MAX_SYSEX_BYTES == 65_536
47
48
49 # ---------------------------------------------------------------------------
50 # validate_object_id
51 # ---------------------------------------------------------------------------
52
53
54 class TestValidateObjectId:
55 """validate_object_id must accept valid 64-char hex and reject everything else."""
56
57 def test_valid_all_zeros(self) -> None:
58 oid = "0" * 64
59 assert validate_object_id(oid) == oid
60
61 def test_valid_all_lowercase_hex(self) -> None:
62 oid = "a" * 64
63 assert validate_object_id(oid) == oid
64
65 def test_valid_mixed_hex(self) -> None:
66 oid = "deadbeef" * 8
67 assert validate_object_id(oid) == oid
68
69 def test_returns_same_string(self) -> None:
70 oid = "f" * 64
71 result = validate_object_id(oid)
72 assert result is oid # identity, not a copy
73
74 def test_rejects_uppercase(self) -> None:
75 with pytest.raises(ValueError, match="64 lowercase hex"):
76 validate_object_id("A" * 64)
77
78 def test_rejects_63_chars(self) -> None:
79 with pytest.raises(ValueError):
80 validate_object_id("a" * 63)
81
82 def test_rejects_65_chars(self) -> None:
83 with pytest.raises(ValueError):
84 validate_object_id("a" * 65)
85
86 def test_rejects_empty_string(self) -> None:
87 with pytest.raises(ValueError):
88 validate_object_id("")
89
90 def test_rejects_non_hex_chars(self) -> None:
91 oid = "g" + "a" * 63 # 'g' is not hex
92 with pytest.raises(ValueError):
93 validate_object_id(oid)
94
95 def test_rejects_path_traversal_string(self) -> None:
96 with pytest.raises(ValueError):
97 validate_object_id("../evil/../path/" + "a" * 48)
98
99 def test_rejects_null_byte_in_id(self) -> None:
100 with pytest.raises(ValueError):
101 validate_object_id("\x00" * 64)
102
103
104
105 # ---------------------------------------------------------------------------
106 # validate_ref_id
107 # ---------------------------------------------------------------------------
108
109
110 class TestValidateRefId:
111 """validate_ref_id is an alias for the same 64-char hex rule."""
112
113 def test_valid_commit_id(self) -> None:
114 rid = "b" * 64
115 assert validate_ref_id(rid) == rid
116
117 def test_rejects_short_id(self) -> None:
118 with pytest.raises(ValueError):
119 validate_ref_id("abc123")
120
121 def test_rejects_uppercase(self) -> None:
122 with pytest.raises(ValueError):
123 validate_ref_id("B" * 64)
124
125 def test_error_message_mentions_ref_id(self) -> None:
126 with pytest.raises(ValueError, match="ref ID"):
127 validate_ref_id("short")
128
129
130 # ---------------------------------------------------------------------------
131 # validate_branch_name
132 # ---------------------------------------------------------------------------
133
134
135 class TestValidateBranchName:
136 """Branch names follow Git conventions — forward slashes allowed,
137 backslashes and null bytes are not."""
138
139 # --- valid names ---
140
141 def test_simple_name(self) -> None:
142 assert validate_branch_name("main") == "main"
143
144 def test_dev_branch(self) -> None:
145 assert validate_branch_name("dev") == "dev"
146
147 def test_feature_slash_style(self) -> None:
148 assert validate_branch_name("feature/my-branch") == "feature/my-branch"
149
150 def test_fix_slash_style(self) -> None:
151 assert validate_branch_name("fix/auth-token-exposure") == "fix/auth-token-exposure"
152
153 def test_nested_path(self) -> None:
154 assert validate_branch_name("feat/v2/core") == "feat/v2/core"
155
156 def test_max_length_255(self) -> None:
157 name = "a" * 255
158 assert validate_branch_name(name) == name
159
160 def test_digits_hyphens_underscores(self) -> None:
161 assert validate_branch_name("branch-123_test") == "branch-123_test"
162
163 # --- rejected names ---
164
165 def test_rejects_empty(self) -> None:
166 with pytest.raises(ValueError, match="must not be empty"):
167 validate_branch_name("")
168
169 def test_rejects_too_long(self) -> None:
170 with pytest.raises(ValueError, match="too long"):
171 validate_branch_name("a" * 256)
172
173 def test_rejects_backslash(self) -> None:
174 with pytest.raises(ValueError, match="forbidden"):
175 validate_branch_name("evil\\branch")
176
177 def test_rejects_null_byte(self) -> None:
178 with pytest.raises(ValueError):
179 validate_branch_name("branch\x00name")
180
181 def test_rejects_carriage_return(self) -> None:
182 with pytest.raises(ValueError):
183 validate_branch_name("branch\rname")
184
185 def test_rejects_linefeed(self) -> None:
186 with pytest.raises(ValueError):
187 validate_branch_name("branch\nname")
188
189 def test_rejects_tab(self) -> None:
190 with pytest.raises(ValueError):
191 validate_branch_name("branch\tname")
192
193 def test_rejects_leading_dot(self) -> None:
194 with pytest.raises(ValueError):
195 validate_branch_name(".hidden")
196
197 def test_rejects_trailing_dot(self) -> None:
198 with pytest.raises(ValueError):
199 validate_branch_name("branch.")
200
201 def test_rejects_consecutive_dots(self) -> None:
202 with pytest.raises(ValueError):
203 validate_branch_name("branch..name")
204
205 def test_rejects_triple_dot(self) -> None:
206 with pytest.raises(ValueError):
207 validate_branch_name("branch...name")
208
209 def test_rejects_consecutive_slashes(self) -> None:
210 with pytest.raises(ValueError):
211 validate_branch_name("feat//branch")
212
213 def test_rejects_leading_slash(self) -> None:
214 with pytest.raises(ValueError):
215 validate_branch_name("/branch")
216
217 def test_rejects_trailing_slash(self) -> None:
218 with pytest.raises(ValueError):
219 validate_branch_name("branch/")
220
221
222
223 # ---------------------------------------------------------------------------
224 # validate_repo_id
225 # ---------------------------------------------------------------------------
226
227
228 class TestValidateRepoId:
229 def test_valid_uuid_style(self) -> None:
230 rid = "abc123-def456-ghi789"
231 assert validate_repo_id(rid) == rid
232
233 def test_valid_simple_id(self) -> None:
234 assert validate_repo_id("myrepo") == "myrepo"
235
236 def test_rejects_empty(self) -> None:
237 with pytest.raises(ValueError, match="must not be empty"):
238 validate_repo_id("")
239
240 def test_rejects_too_long(self) -> None:
241 with pytest.raises(ValueError, match="too long"):
242 validate_repo_id("x" * 256)
243
244 def test_rejects_dotdot_component(self) -> None:
245 with pytest.raises(ValueError):
246 validate_repo_id("repo..evil")
247
248 def test_rejects_null_byte(self) -> None:
249 with pytest.raises(ValueError):
250 validate_repo_id("repo\x00id")
251
252
253
254 # ---------------------------------------------------------------------------
255 # validate_domain_name
256 # ---------------------------------------------------------------------------
257
258
259 class TestValidateDomainName:
260 def test_midi(self) -> None:
261 assert validate_domain_name("midi") == "midi"
262
263 def test_code(self) -> None:
264 assert validate_domain_name("code") == "code"
265
266 def test_scaffold(self) -> None:
267 assert validate_domain_name("scaffold") == "scaffold"
268
269 def test_with_hyphen(self) -> None:
270 assert validate_domain_name("my-domain") == "my-domain"
271
272 def test_with_underscore(self) -> None:
273 assert validate_domain_name("my_domain") == "my_domain"
274
275 def test_with_digits(self) -> None:
276 assert validate_domain_name("domain2") == "domain2"
277
278 def test_rejects_empty(self) -> None:
279 with pytest.raises(ValueError):
280 validate_domain_name("")
281
282 def test_rejects_leading_digit(self) -> None:
283 with pytest.raises(ValueError):
284 validate_domain_name("2domain")
285
286 def test_rejects_uppercase(self) -> None:
287 with pytest.raises(ValueError):
288 validate_domain_name("MIDI")
289
290 def test_rejects_space(self) -> None:
291 with pytest.raises(ValueError):
292 validate_domain_name("my domain")
293
294 def test_rejects_slash(self) -> None:
295 with pytest.raises(ValueError):
296 validate_domain_name("midi/ext")
297
298 def test_rejects_dot(self) -> None:
299 with pytest.raises(ValueError):
300 validate_domain_name("midi.ext")
301
302 def test_rejects_too_long(self) -> None:
303 with pytest.raises(ValueError):
304 # > 63 chars (the regex allows a start letter + up to 62 more)
305 validate_domain_name("a" + "b" * 63)
306
307
308 # ---------------------------------------------------------------------------
309 # contain_path
310 # ---------------------------------------------------------------------------
311
312
313 class TestContainPath:
314 def test_simple_subpath(self, tmp_path: pathlib.Path) -> None:
315 result = contain_path(tmp_path, "file.txt")
316 assert result == (tmp_path / "file.txt").resolve()
317
318 def test_nested_subpath(self, tmp_path: pathlib.Path) -> None:
319 result = contain_path(tmp_path, "sub/dir/file.txt")
320 assert result == (tmp_path / "sub" / "dir" / "file.txt").resolve()
321
322 def test_returns_resolved_path(self, tmp_path: pathlib.Path) -> None:
323 result = contain_path(tmp_path, "a/./b")
324 assert "./" not in str(result)
325
326 def test_rejects_dotdot_traversal(self, tmp_path: pathlib.Path) -> None:
327 with pytest.raises(ValueError, match="traversal"):
328 contain_path(tmp_path, "../escape")
329
330 def test_rejects_double_dotdot(self, tmp_path: pathlib.Path) -> None:
331 with pytest.raises(ValueError):
332 contain_path(tmp_path, "sub/../../etc/passwd")
333
334 def test_rejects_absolute_path(self, tmp_path: pathlib.Path) -> None:
335 with pytest.raises(ValueError):
336 contain_path(tmp_path, "/etc/passwd")
337
338 def test_rejects_empty_rel(self, tmp_path: pathlib.Path) -> None:
339 with pytest.raises(ValueError, match="must not be empty"):
340 contain_path(tmp_path, "")
341
342
343 def test_path_equal_to_child_is_fine(self, tmp_path: pathlib.Path) -> None:
344 # A path that resolves exactly to a direct child should pass.
345 result = contain_path(tmp_path, "direct_child")
346 assert result.parent == tmp_path.resolve()
347
348 def test_rejects_symlink_escaping_base(self, tmp_path: pathlib.Path) -> None:
349 # Create a symlink inside base that points outside.
350 outside = tmp_path.parent / "outside.txt"
351 outside.write_text("secret")
352 link = tmp_path / "link.txt"
353 link.symlink_to(outside)
354 # contain_path resolves the path — symlink target is outside base.
355 with pytest.raises(ValueError, match="traversal"):
356 contain_path(tmp_path, "link.txt")
357
358
359 # ---------------------------------------------------------------------------
360 # sanitize_glob_prefix
361 # ---------------------------------------------------------------------------
362
363
364 class TestSanitizeGlobPrefix:
365 def test_clean_prefix_unchanged(self) -> None:
366 assert sanitize_glob_prefix("abcdef") == "abcdef"
367
368 def test_strips_asterisk(self) -> None:
369 assert sanitize_glob_prefix("abc*def") == "abcdef"
370
371 def test_strips_question_mark(self) -> None:
372 assert sanitize_glob_prefix("abc?def") == "abcdef"
373
374 def test_strips_open_bracket(self) -> None:
375 assert sanitize_glob_prefix("abc[def") == "abcdef"
376
377 def test_strips_close_bracket(self) -> None:
378 assert sanitize_glob_prefix("abc]def") == "abcdef"
379
380 def test_strips_open_brace(self) -> None:
381 assert sanitize_glob_prefix("abc{def") == "abcdef"
382
383 def test_strips_close_brace(self) -> None:
384 assert sanitize_glob_prefix("abc}def") == "abcdef"
385
386 def test_strips_all_metacharacters(self) -> None:
387 assert sanitize_glob_prefix("*?[]{} abc") == " abc"
388
389 def test_empty_string(self) -> None:
390 assert sanitize_glob_prefix("") == ""
391
392 def test_hex_prefix_unaffected(self) -> None:
393 prefix = "deadbeef01"
394 assert sanitize_glob_prefix(prefix) == prefix
395
396
397 # ---------------------------------------------------------------------------
398 # sanitize_display
399 # ---------------------------------------------------------------------------
400
401
402 class TestSanitizeDisplay:
403 def test_clean_ascii_unchanged(self) -> None:
404 assert sanitize_display("Hello, World!") == "Hello, World!"
405
406 def test_newline_preserved(self) -> None:
407 s = "line1\nline2"
408 assert sanitize_display(s) == s
409
410 def test_tab_preserved(self) -> None:
411 s = "col1\tcol2"
412 assert sanitize_display(s) == s
413
414 def test_strips_ansi_escape_sequence(self) -> None:
415 ansi = "\x1b[31mred text\x1b[0m"
416 result = sanitize_display(ansi)
417 assert "\x1b" not in result
418 assert "red text" in result
419
420 def test_strips_bel(self) -> None:
421 assert sanitize_display("ring\x07bell") == "ringbell"
422
423 def test_strips_null_byte(self) -> None:
424 assert sanitize_display("no\x00null") == "nonull"
425
426 def test_strips_osc_sequence(self) -> None:
427 # OSC sequences start with \x9b (C1 CSI) or ESC [
428 osc = "\x9bmalicious"
429 result = sanitize_display(osc)
430 assert "\x9b" not in result
431
432 def test_strips_cr(self) -> None:
433 assert sanitize_display("text\r") == "text"
434
435 def test_strips_vertical_tab(self) -> None:
436 assert sanitize_display("text\x0bmore") == "textmore"
437
438 def test_strips_form_feed(self) -> None:
439 assert sanitize_display("text\x0cmore") == "textmore"
440
441 def test_strips_del(self) -> None:
442 assert sanitize_display("text\x7fmore") == "textmore"
443
444 def test_multiline_message_sanitized(self) -> None:
445 msg = "commit: \x1b[1mAdd feature\x1b[0m\nSigned-off-by: Alice"
446 result = sanitize_display(msg)
447 assert "\x1b" not in result
448 assert "Add feature" in result
449 assert "Signed-off-by: Alice" in result
450
451 def test_empty_string(self) -> None:
452 assert sanitize_display("") == ""
453
454 def test_unicode_letters_preserved(self) -> None:
455 s = "Héllo Wörld — 日本語"
456 assert sanitize_display(s) == s
457
458
459 # ---------------------------------------------------------------------------
460 # clamp_int
461 # ---------------------------------------------------------------------------
462
463
464 class TestClampInt:
465 def test_value_in_range_returned_unchanged(self) -> None:
466 assert clamp_int(5, 1, 10) == 5
467
468 def test_value_at_lower_bound(self) -> None:
469 assert clamp_int(1, 1, 10) == 1
470
471 def test_value_at_upper_bound(self) -> None:
472 assert clamp_int(10, 1, 10) == 10
473
474 def test_below_min_raises(self) -> None:
475 with pytest.raises(ValueError, match="between"):
476 clamp_int(0, 1, 10)
477
478 def test_above_max_raises(self) -> None:
479 with pytest.raises(ValueError, match="between"):
480 clamp_int(11, 1, 10)
481
482 def test_name_in_error_message(self) -> None:
483 with pytest.raises(ValueError, match="depth"):
484 clamp_int(-1, 0, 100, name="depth")
485
486 def test_negative_range(self) -> None:
487 assert clamp_int(-5, -10, 0) == -5
488
489 def test_equal_lo_hi(self) -> None:
490 assert clamp_int(42, 42, 42) == 42
491
492
493 # ---------------------------------------------------------------------------
494 # finite_float
495 # ---------------------------------------------------------------------------
496
497
498 class TestFiniteFloat:
499 def test_finite_value_returned_unchanged(self) -> None:
500 assert finite_float(120.0, 120.0) == 120.0
501
502 def test_zero_is_finite(self) -> None:
503 assert finite_float(0.0, 1.0) == 0.0
504
505 def test_negative_finite_returned(self) -> None:
506 assert finite_float(-5.5, 0.0) == -5.5
507
508 def test_positive_inf_returns_fallback(self) -> None:
509 assert finite_float(math.inf, 120.0) == 120.0
510
511 def test_negative_inf_returns_fallback(self) -> None:
512 assert finite_float(-math.inf, 120.0) == 120.0
513
514 def test_nan_returns_fallback(self) -> None:
515 assert finite_float(math.nan, 120.0) == 120.0
516
517 def test_large_finite_returned(self) -> None:
518 big = 1e300
519 assert finite_float(big, 0.0) == big
520
521
522 # ---------------------------------------------------------------------------
523 # Stress: contain_path with many adversarial inputs
524 # ---------------------------------------------------------------------------
525
526
527 class TestContainPathStress:
528 """Fuzz-style test — generate many adversarial path strings and verify
529 that contain_path rejects all traversal attempts."""
530
531 TRAVERSAL_ATTEMPTS: list[str] = [
532 "..",
533 "../etc/passwd",
534 "../../etc/shadow",
535 "sub/../../../etc/passwd",
536 "/absolute/path",
537 "/",
538 "//double-slash",
539 # Note: URL-encoded dots (%2e%2e) are NOT traversal from a filesystem
540 # perspective — contain_path is a filesystem guard, not an HTTP parser.
541 # Null bytes cause an OS-level ValueError, which we also accept.
542 "\x00null",
543 "sub/\x00null",
544 ]
545
546 def test_all_traversal_attempts_rejected(self, tmp_path: pathlib.Path) -> None:
547 for attempt in self.TRAVERSAL_ATTEMPTS:
548 with pytest.raises((ValueError, TypeError)):
549 contain_path(tmp_path, attempt)
550
551 def test_large_number_of_valid_paths_accepted(self, tmp_path: pathlib.Path) -> None:
552 for i in range(200):
553 rel = f"subdir/track_{i:04d}.mid"
554 result = contain_path(tmp_path, rel)
555 assert str(result).startswith(str(tmp_path.resolve()))