tests/test_core_validation.py · cgcardona/muse

1

"""Tests for muse.core.validation — all trust-boundary primitives.

2

3

Every function in the validation module operates on untrusted input and must

4

either return a safe value or raise ValueError / TypeError with a descriptive

5

message. These tests verify correctness of the allow-lists, reject-lists, and

6

edge cases for each guard.

7

"""

8

9

from __future__ import annotations

import math

import pathlib

import pytest

from muse.core.validation import (

MAX_FILE_BYTES,

MAX_RESPONSE_BYTES,

MAX_SYSEX_BYTES,

clamp_int,

contain_path,

finite_float,

sanitize_display,

sanitize_glob_prefix,

25

validate_branch_name,

26

validate_domain_name,

validate_object_id,

validate_ref_id,

validate_repo_id,

)

# ---------------------------------------------------------------------------

34

# Constants

35

# ---------------------------------------------------------------------------

class TestConstants:

def test_max_file_bytes_is_256mb(self) -> None:

40

assert MAX_FILE_BYTES == 256 * 1024 * 1024

41

42

def test_max_response_bytes_is_64mb(self) -> None:

43

assert MAX_RESPONSE_BYTES == 64 * 1024 * 1024

44

45

def test_max_sysex_bytes_is_64kib(self) -> None:

46

assert MAX_SYSEX_BYTES == 65_536

47

48

49

# ---------------------------------------------------------------------------

50

# validate_object_id

51

# ---------------------------------------------------------------------------

52

53

54

class TestValidateObjectId:

55

"""validate_object_id must accept valid 64-char hex and reject everything else."""

56

57

def test_valid_all_zeros(self) -> None:

58

oid = "0" * 64

59

assert validate_object_id(oid) == oid

60

61

def test_valid_all_lowercase_hex(self) -> None:

62

oid = "a" * 64

63

assert validate_object_id(oid) == oid

64

65

def test_valid_mixed_hex(self) -> None:

66

oid = "deadbeef" * 8

67

assert validate_object_id(oid) == oid

68

69

def test_returns_same_string(self) -> None:

70

oid = "f" * 64

71

result = validate_object_id(oid)

72

assert result is oid # identity, not a copy

73

74

def test_rejects_uppercase(self) -> None:

75

with pytest.raises(ValueError, match="64 lowercase hex"):

76

validate_object_id("A" * 64)

77

78

def test_rejects_63_chars(self) -> None:

79

with pytest.raises(ValueError):

80

validate_object_id("a" * 63)

81

82

def test_rejects_65_chars(self) -> None:

83

with pytest.raises(ValueError):

84

validate_object_id("a" * 65)

85

86

def test_rejects_empty_string(self) -> None:

87

with pytest.raises(ValueError):

88

validate_object_id("")

89

90

def test_rejects_non_hex_chars(self) -> None:

91

oid = "g" + "a" * 63 # 'g' is not hex

92

with pytest.raises(ValueError):

93

validate_object_id(oid)

94

95

def test_rejects_path_traversal_string(self) -> None:

96

with pytest.raises(ValueError):

97

validate_object_id("../evil/../path/" + "a" * 48)

98

99

def test_rejects_null_byte_in_id(self) -> None:

100

with pytest.raises(ValueError):

101

validate_object_id("\x00" * 64)

# ---------------------------------------------------------------------------

106

# validate_ref_id

107

# ---------------------------------------------------------------------------

108

109

110

class TestValidateRefId:

111

"""validate_ref_id is an alias for the same 64-char hex rule."""

112

113

def test_valid_commit_id(self) -> None:

114

rid = "b" * 64

115

assert validate_ref_id(rid) == rid

116

117

def test_rejects_short_id(self) -> None:

118

with pytest.raises(ValueError):

119

validate_ref_id("abc123")

120

121

def test_rejects_uppercase(self) -> None:

122

with pytest.raises(ValueError):

123

validate_ref_id("B" * 64)

124

125

def test_error_message_mentions_ref_id(self) -> None:

126

with pytest.raises(ValueError, match="ref ID"):

127

validate_ref_id("short")

128

129

130

# ---------------------------------------------------------------------------

131

# validate_branch_name

132

# ---------------------------------------------------------------------------

133

134

135

class TestValidateBranchName:

136

"""Branch names follow Git conventions — forward slashes allowed,

137

backslashes and null bytes are not."""

138

139

# --- valid names ---

140

141

def test_simple_name(self) -> None:

142

assert validate_branch_name("main") == "main"

143

144

def test_dev_branch(self) -> None:

145

assert validate_branch_name("dev") == "dev"

146

147

def test_feature_slash_style(self) -> None:

148

assert validate_branch_name("feature/my-branch") == "feature/my-branch"

149

150

def test_fix_slash_style(self) -> None:

151

assert validate_branch_name("fix/auth-token-exposure") == "fix/auth-token-exposure"

152

153

def test_nested_path(self) -> None:

154

assert validate_branch_name("feat/v2/core") == "feat/v2/core"

155

156

def test_max_length_255(self) -> None:

157

name = "a" * 255

158

assert validate_branch_name(name) == name

159

160

def test_digits_hyphens_underscores(self) -> None:

161

assert validate_branch_name("branch-123_test") == "branch-123_test"

162

163

# --- rejected names ---

164

165

def test_rejects_empty(self) -> None:

166

with pytest.raises(ValueError, match="must not be empty"):

167

validate_branch_name("")

168

169

def test_rejects_too_long(self) -> None:

170

with pytest.raises(ValueError, match="too long"):

171

validate_branch_name("a" * 256)

172

173

def test_rejects_backslash(self) -> None:

174

with pytest.raises(ValueError, match="forbidden"):

175

validate_branch_name("evil\\branch")

176

177

def test_rejects_null_byte(self) -> None:

178

with pytest.raises(ValueError):

179

validate_branch_name("branch\x00name")

180

181

def test_rejects_carriage_return(self) -> None:

182

with pytest.raises(ValueError):

183

validate_branch_name("branch\rname")

184

185

def test_rejects_linefeed(self) -> None:

186

with pytest.raises(ValueError):

187

validate_branch_name("branch\nname")

188

189

def test_rejects_tab(self) -> None:

190

with pytest.raises(ValueError):

191

validate_branch_name("branch\tname")

192

193

def test_rejects_leading_dot(self) -> None:

194

with pytest.raises(ValueError):

195

validate_branch_name(".hidden")

196

197

def test_rejects_trailing_dot(self) -> None:

198

with pytest.raises(ValueError):

199

validate_branch_name("branch.")

200

201

def test_rejects_consecutive_dots(self) -> None:

202

with pytest.raises(ValueError):

203

validate_branch_name("branch..name")

204

205

def test_rejects_triple_dot(self) -> None:

206

with pytest.raises(ValueError):

207

validate_branch_name("branch...name")

208

209

def test_rejects_consecutive_slashes(self) -> None:

210

with pytest.raises(ValueError):

211

validate_branch_name("feat//branch")

212

213

def test_rejects_leading_slash(self) -> None:

214

with pytest.raises(ValueError):

215

validate_branch_name("/branch")

216

217

def test_rejects_trailing_slash(self) -> None:

218

with pytest.raises(ValueError):

219

validate_branch_name("branch/")

# ---------------------------------------------------------------------------

224

# validate_repo_id

225

# ---------------------------------------------------------------------------

226

227

228

class TestValidateRepoId:

229

def test_valid_uuid_style(self) -> None:

230

rid = "abc123-def456-ghi789"

231

assert validate_repo_id(rid) == rid

232

233

def test_valid_simple_id(self) -> None:

234

assert validate_repo_id("myrepo") == "myrepo"

235

236

def test_rejects_empty(self) -> None:

237

with pytest.raises(ValueError, match="must not be empty"):

238

validate_repo_id("")

239

240

def test_rejects_too_long(self) -> None:

241

with pytest.raises(ValueError, match="too long"):

242

validate_repo_id("x" * 256)

243

244

def test_rejects_dotdot_component(self) -> None:

245

with pytest.raises(ValueError):

246

validate_repo_id("repo..evil")

247

248

def test_rejects_null_byte(self) -> None:

249

with pytest.raises(ValueError):

250

validate_repo_id("repo\x00id")

# ---------------------------------------------------------------------------

255

# validate_domain_name

256

# ---------------------------------------------------------------------------

257

258

259

class TestValidateDomainName:

260

def test_midi(self) -> None:

261

assert validate_domain_name("midi") == "midi"

262

263

def test_code(self) -> None:

264

assert validate_domain_name("code") == "code"

265

266

def test_scaffold(self) -> None:

267

assert validate_domain_name("scaffold") == "scaffold"

268

269

def test_with_hyphen(self) -> None:

270

assert validate_domain_name("my-domain") == "my-domain"

271

272

def test_with_underscore(self) -> None:

273

assert validate_domain_name("my_domain") == "my_domain"

274

275

def test_with_digits(self) -> None:

276

assert validate_domain_name("domain2") == "domain2"

277

278

def test_rejects_empty(self) -> None:

279

with pytest.raises(ValueError):

280

validate_domain_name("")

281

282

def test_rejects_leading_digit(self) -> None:

283

with pytest.raises(ValueError):

284

validate_domain_name("2domain")

285

286

def test_rejects_uppercase(self) -> None:

287

with pytest.raises(ValueError):

288

validate_domain_name("MIDI")

289

290

def test_rejects_space(self) -> None:

291

with pytest.raises(ValueError):

292

validate_domain_name("my domain")

293

294

def test_rejects_slash(self) -> None:

295

with pytest.raises(ValueError):

296

validate_domain_name("midi/ext")

297

298

def test_rejects_dot(self) -> None:

299

with pytest.raises(ValueError):

300

validate_domain_name("midi.ext")

301

302

def test_rejects_too_long(self) -> None:

303

with pytest.raises(ValueError):

304

# > 63 chars (the regex allows a start letter + up to 62 more)

305

validate_domain_name("a" + "b" * 63)

306

307

308

# ---------------------------------------------------------------------------

309

# contain_path

310

# ---------------------------------------------------------------------------

311

312

313

class TestContainPath:

314

def test_simple_subpath(self, tmp_path: pathlib.Path) -> None:

315

result = contain_path(tmp_path, "file.txt")

316

assert result == (tmp_path / "file.txt").resolve()

317

318

def test_nested_subpath(self, tmp_path: pathlib.Path) -> None:

319

result = contain_path(tmp_path, "sub/dir/file.txt")

320

assert result == (tmp_path / "sub" / "dir" / "file.txt").resolve()

321

322

def test_returns_resolved_path(self, tmp_path: pathlib.Path) -> None:

323

result = contain_path(tmp_path, "a/./b")

324

assert "./" not in str(result)

325

326

def test_rejects_dotdot_traversal(self, tmp_path: pathlib.Path) -> None:

327

with pytest.raises(ValueError, match="traversal"):

328

contain_path(tmp_path, "../escape")

329

330

def test_rejects_double_dotdot(self, tmp_path: pathlib.Path) -> None:

331

with pytest.raises(ValueError):

332

contain_path(tmp_path, "sub/../../etc/passwd")

333

334

def test_rejects_absolute_path(self, tmp_path: pathlib.Path) -> None:

335

with pytest.raises(ValueError):

336

contain_path(tmp_path, "/etc/passwd")

337

338

def test_rejects_empty_rel(self, tmp_path: pathlib.Path) -> None:

339

with pytest.raises(ValueError, match="must not be empty"):

340

contain_path(tmp_path, "")

341

342

343

def test_path_equal_to_child_is_fine(self, tmp_path: pathlib.Path) -> None:

344

# A path that resolves exactly to a direct child should pass.

345

result = contain_path(tmp_path, "direct_child")

346

assert result.parent == tmp_path.resolve()

347

348

def test_rejects_symlink_escaping_base(self, tmp_path: pathlib.Path) -> None:

349

# Create a symlink inside base that points outside.

350

outside = tmp_path.parent / "outside.txt"

351

outside.write_text("secret")

352

link = tmp_path / "link.txt"

353

link.symlink_to(outside)

354

# contain_path resolves the path — symlink target is outside base.

355

with pytest.raises(ValueError, match="traversal"):

356

contain_path(tmp_path, "link.txt")

357

358

359

# ---------------------------------------------------------------------------

360

# sanitize_glob_prefix

361

# ---------------------------------------------------------------------------

362

363

364

class TestSanitizeGlobPrefix:

365

def test_clean_prefix_unchanged(self) -> None:

366

assert sanitize_glob_prefix("abcdef") == "abcdef"

367

368

def test_strips_asterisk(self) -> None:

369

assert sanitize_glob_prefix("abc*def") == "abcdef"

370

371

def test_strips_question_mark(self) -> None:

372

assert sanitize_glob_prefix("abc?def") == "abcdef"

373

374

def test_strips_open_bracket(self) -> None:

375

assert sanitize_glob_prefix("abc[def") == "abcdef"

376

377

def test_strips_close_bracket(self) -> None:

378

assert sanitize_glob_prefix("abc]def") == "abcdef"

379

380

def test_strips_open_brace(self) -> None:

381

assert sanitize_glob_prefix("abc{def") == "abcdef"

382

383

def test_strips_close_brace(self) -> None:

384

assert sanitize_glob_prefix("abc}def") == "abcdef"

385

386

def test_strips_all_metacharacters(self) -> None:

387

assert sanitize_glob_prefix("*?[]{} abc") == " abc"

388

389

def test_empty_string(self) -> None:

390

assert sanitize_glob_prefix("") == ""

391

392

def test_hex_prefix_unaffected(self) -> None:

393

prefix = "deadbeef01"

394

assert sanitize_glob_prefix(prefix) == prefix

395

396

397

# ---------------------------------------------------------------------------

398

# sanitize_display

399

# ---------------------------------------------------------------------------

400

401

402

class TestSanitizeDisplay:

403

def test_clean_ascii_unchanged(self) -> None:

404

assert sanitize_display("Hello, World!") == "Hello, World!"

405

406

def test_newline_preserved(self) -> None:

407

s = "line1\nline2"

408

assert sanitize_display(s) == s

409

410

def test_tab_preserved(self) -> None:

411

s = "col1\tcol2"

412

assert sanitize_display(s) == s

413

414

def test_strips_ansi_escape_sequence(self) -> None:

415

ansi = "\x1b[31mred text\x1b[0m"

416

result = sanitize_display(ansi)

417

assert "\x1b" not in result

418

assert "red text" in result

419

420

def test_strips_bel(self) -> None:

421

assert sanitize_display("ring\x07bell") == "ringbell"

422

423

def test_strips_null_byte(self) -> None:

424

assert sanitize_display("no\x00null") == "nonull"

425

426

def test_strips_osc_sequence(self) -> None:

427

# OSC sequences start with \x9b (C1 CSI) or ESC [

428

osc = "\x9bmalicious"

429

result = sanitize_display(osc)

430

assert "\x9b" not in result

431

432

def test_strips_cr(self) -> None:

433

assert sanitize_display("text\r") == "text"

434

435

def test_strips_vertical_tab(self) -> None:

436

assert sanitize_display("text\x0bmore") == "textmore"

437

438

def test_strips_form_feed(self) -> None:

439

assert sanitize_display("text\x0cmore") == "textmore"

440

441

def test_strips_del(self) -> None:

442

assert sanitize_display("text\x7fmore") == "textmore"

443

444

def test_multiline_message_sanitized(self) -> None:

445

msg = "commit: \x1b[1mAdd feature\x1b[0m\nSigned-off-by: Alice"

446

result = sanitize_display(msg)

447

assert "\x1b" not in result

448

assert "Add feature" in result

449

assert "Signed-off-by: Alice" in result

450

451

def test_empty_string(self) -> None:

452

assert sanitize_display("") == ""

453

454

def test_unicode_letters_preserved(self) -> None:

455

s = "Héllo Wörld — 日本語"

456

assert sanitize_display(s) == s

457

458

459

# ---------------------------------------------------------------------------

460

# clamp_int

461

# ---------------------------------------------------------------------------

class TestClampInt:

def test_value_in_range_returned_unchanged(self) -> None:

466

assert clamp_int(5, 1, 10) == 5

467

468

def test_value_at_lower_bound(self) -> None:

469

assert clamp_int(1, 1, 10) == 1

470

471

def test_value_at_upper_bound(self) -> None:

472

assert clamp_int(10, 1, 10) == 10

473

474

def test_below_min_raises(self) -> None:

475

with pytest.raises(ValueError, match="between"):

476

clamp_int(0, 1, 10)

477

478

def test_above_max_raises(self) -> None:

479

with pytest.raises(ValueError, match="between"):

480

clamp_int(11, 1, 10)

481

482

def test_name_in_error_message(self) -> None:

483

with pytest.raises(ValueError, match="depth"):

484

clamp_int(-1, 0, 100, name="depth")

485

486

def test_negative_range(self) -> None:

487

assert clamp_int(-5, -10, 0) == -5

488

489

def test_equal_lo_hi(self) -> None:

490

assert clamp_int(42, 42, 42) == 42

491

492

493

# ---------------------------------------------------------------------------

494

# finite_float

495

# ---------------------------------------------------------------------------

496

497

498

class TestFiniteFloat:

499

def test_finite_value_returned_unchanged(self) -> None:

500

assert finite_float(120.0, 120.0) == 120.0

501

502

def test_zero_is_finite(self) -> None:

503

assert finite_float(0.0, 1.0) == 0.0

504

505

def test_negative_finite_returned(self) -> None:

506

assert finite_float(-5.5, 0.0) == -5.5

507

508

def test_positive_inf_returns_fallback(self) -> None:

509

assert finite_float(math.inf, 120.0) == 120.0

510

511

def test_negative_inf_returns_fallback(self) -> None:

512

assert finite_float(-math.inf, 120.0) == 120.0

513

514

def test_nan_returns_fallback(self) -> None:

515

assert finite_float(math.nan, 120.0) == 120.0

516

517

def test_large_finite_returned(self) -> None:

518

big = 1e300

519

assert finite_float(big, 0.0) == big

520

521

522

# ---------------------------------------------------------------------------

523

# Stress: contain_path with many adversarial inputs

524

# ---------------------------------------------------------------------------

525

526

527

class TestContainPathStress:

528

"""Fuzz-style test — generate many adversarial path strings and verify

529

that contain_path rejects all traversal attempts."""

530

531

TRAVERSAL_ATTEMPTS: list[str] = [

"..",

"../etc/passwd",

"../../etc/shadow",

"sub/../../../etc/passwd",

"/absolute/path",

"/",

"//double-slash",

# Note: URL-encoded dots (%2e%2e) are NOT traversal from a filesystem

540

# perspective — contain_path is a filesystem guard, not an HTTP parser.

541

# Null bytes cause an OS-level ValueError, which we also accept.

"\x00null",

"sub/\x00null",

]

def test_all_traversal_attempts_rejected(self, tmp_path: pathlib.Path) -> None:

547

for attempt in self.TRAVERSAL_ATTEMPTS:

548

with pytest.raises((ValueError, TypeError)):

549

contain_path(tmp_path, attempt)

550

551

def test_large_number_of_valid_paths_accepted(self, tmp_path: pathlib.Path) -> None:

552

for i in range(200):

553

rel = f"subdir/track_{i:04d}.mid"

554

result = contain_path(tmp_path, rel)

555

assert str(result).startswith(str(tmp_path.resolve()))