tests/test_predicate.py · cgcardona/muse

1

"""Tests for the predicate DSL parser (muse/plugins/code/_predicate.py).

2

Coverage

--------

Tokenisation

- Valid atoms, operators, keywords, parentheses, whitespace skipping.

7

- Unexpected character raises PredicateError.

8

9

Atom parsing

10

- All seven operators: = ~= ^= $= != >= <=

11

- All ten predicate keys: kind, language, name, qualified_name, file,

12

hash, body_hash, signature_id, lineno_gt, lineno_lt.

13

- Double-quoted values.

14

- Unknown key raises PredicateError.

15

- Non-integer value for lineno_gt / lineno_lt raises PredicateError.

16

17

Compound expressions

18

- Implicit AND (adjacent atoms).

19

- Explicit OR.

20

- Explicit NOT.

21

- Parenthesised sub-expressions.

22

- Mixed OR / NOT / AND / parentheses.

23

- Trailing garbage token raises PredicateError.

24

25

parse_query

26

- Empty string → match-all predicate.

27

- Empty list → match-all predicate.

28

- List of atoms → implicit AND.

29

- Single string → parsed normally.

30

31

Predicate evaluation

32

- Each key field reads the correct SymbolRecord / file_path field.

33

- lineno_gt / lineno_lt boundary conditions (strict inequality).

34

- hash / body_hash / signature_id prefix matching.

35

- Case-insensitive string matching for =, ~=, ^=, $=, !=.

"""

import pytest

from muse.plugins.code._predicate import PredicateError, parse_query

41

from muse.plugins.code.ast_parser import SymbolRecord

42

43

44

# ---------------------------------------------------------------------------

45

# Helpers

46

# ---------------------------------------------------------------------------

def _rec(

*,

kind: str = "function",

52

name: str = "my_func",

53

qualified_name: str = "my_func",

54

lineno: int = 10,

55

end_lineno: int = 20,

56

content_id: str = "abcdef1234567890" * 2,

57

body_hash: str = "deadbeef1234" * 4,

58

signature_id: str = "cafebabe5678" * 4,

59

metadata_id: str = "",

60

canonical_key: str = "",

) -> SymbolRecord:

return SymbolRecord(

kind=kind,

name=name,

qualified_name=qualified_name,

66

lineno=lineno,

67

end_lineno=end_lineno,

68

content_id=content_id,

69

body_hash=body_hash,

70

signature_id=signature_id,

71

metadata_id=metadata_id,

72

canonical_key=canonical_key,

)

def _match(

query: str | list[str],

78

file_path: str = "src/billing.py",

79

kind: str = "function",

80

name: str = "my_func",

81

qualified_name: str = "my_func",

82

lineno: int = 10,

83

) -> bool:

84

rec = _rec(kind=kind, name=name, qualified_name=qualified_name, lineno=lineno)

85

pred = parse_query(query)

86

return pred(file_path, rec)

87

88

89

# ---------------------------------------------------------------------------

90

# Empty / match-all

91

# ---------------------------------------------------------------------------

class TestMatchAll:

def test_empty_string_matches_everything(self) -> None:

96

pred = parse_query("")

97

assert pred("src/foo.py", _rec())

98

99

def test_empty_list_matches_everything(self) -> None:

100

pred = parse_query([])

101

assert pred("src/foo.py", _rec())

102

103

def test_whitespace_only_matches_everything(self) -> None:

104

pred = parse_query(" ")

105

assert pred("src/foo.py", _rec())

106

107

108

# ---------------------------------------------------------------------------

109

# Single atom — kind key

110

# ---------------------------------------------------------------------------

111

112

113

class TestKindPredicate:

114

def test_exact_match(self) -> None:

115

assert _match("kind=function", kind="function")

116

117

def test_exact_match_no_hit(self) -> None:

118

assert not _match("kind=class", kind="function")

119

120

def test_case_insensitive(self) -> None:

121

assert _match("kind=Function", kind="function")

122

123

def test_not_equal(self) -> None:

124

assert _match("kind!=class", kind="function")

125

assert not _match("kind!=function", kind="function")

126

127

def test_contains(self) -> None:

128

assert _match("kind~=unc", kind="function")

129

assert not _match("kind~=xyz", kind="function")

130

131

def test_starts_with(self) -> None:

132

assert _match("kind^=func", kind="function")

133

assert not _match("kind^=class", kind="function")

134

135

def test_ends_with(self) -> None:

136

assert _match("kind$=tion", kind="function")

137

assert not _match("kind$=ass", kind="function")

138

139

140

# ---------------------------------------------------------------------------

141

# name key

142

# ---------------------------------------------------------------------------

143

144

145

class TestNamePredicate:

146

def test_exact(self) -> None:

147

assert _match("name=compute_total", name="compute_total")

148

assert not _match("name=compute_total", name="compute_invoice")

149

150

def test_contains(self) -> None:

151

assert _match("name~=total", name="compute_total")

152

assert not _match("name~=invoice", name="compute_total")

153

154

def test_starts_with(self) -> None:

155

assert _match("name^=compute", name="compute_total")

156

157

def test_ends_with(self) -> None:

158

assert _match("name$=total", name="compute_total")

159

160

161

# ---------------------------------------------------------------------------

162

# qualified_name key

163

# ---------------------------------------------------------------------------

164

165

166

class TestQualifiedNamePredicate:

167

def test_dotted_name(self) -> None:

168

assert _match("qualified_name=Invoice.compute", qualified_name="Invoice.compute")

169

assert not _match("qualified_name=Invoice.pay", qualified_name="Invoice.compute")

170

171

def test_contains(self) -> None:

172

assert _match("qualified_name~=Invoice", qualified_name="Invoice.compute")

173

174

175

# ---------------------------------------------------------------------------

176

# file key

177

# ---------------------------------------------------------------------------

178

179

180

class TestFilePredicate:

181

def test_exact(self) -> None:

182

assert _match("file=src/billing.py", file_path="src/billing.py")

183

assert not _match("file=src/utils.py", file_path="src/billing.py")

184

185

def test_contains(self) -> None:

186

assert _match("file~=billing", file_path="src/billing.py")

187

188

def test_starts_with(self) -> None:

189

assert _match("file^=src/", file_path="src/billing.py")

190

191

def test_ends_with(self) -> None:

192

assert _match("file$=.py", file_path="src/billing.py")

193

194

195

# ---------------------------------------------------------------------------

196

# hash / body_hash / signature_id keys (prefix matching)

197

# ---------------------------------------------------------------------------

198

199

200

class TestHashPredicates:

201

def test_content_id_prefix(self) -> None:

202

rec = _rec(content_id="abcdef" + "0" * 58)

203

pred = parse_query("hash=abcde")

204

assert pred("f.py", rec)

205

206

def test_content_id_prefix_no_match(self) -> None:

207

rec = _rec(content_id="abcdef" + "0" * 58)

208

pred = parse_query("hash=xyz")

209

assert not pred("f.py", rec)

210

211

def test_body_hash_prefix(self) -> None:

212

rec = _rec(body_hash="deadbeef" + "0" * 56)

213

pred = parse_query("body_hash=deadbe")

214

assert pred("f.py", rec)

215

216

def test_signature_id_prefix(self) -> None:

217

rec = _rec(signature_id="cafebabe" + "0" * 56)

218

pred = parse_query("signature_id=cafeba")

219

assert pred("f.py", rec)

220

221

def test_hash_prefix_case_sensitive_match(self) -> None:

222

# Hash matching uses prefix-startswith; stored value case must match query case.

223

rec = _rec(content_id="abcdef" + "0" * 58)

224

pred = parse_query("hash=abcdef")

225

assert pred("f.py", rec)

226

# Upper-case stored hash won't match lower-case query prefix

227

# (hash= uses startswith without normalization — this is by design).

228

rec_upper = _rec(content_id="ABCDEF" + "0" * 58)

229

pred_lower = parse_query("hash=abcdef")

230

# The stored hash starts with "ABCDEF", query is "abcdef" → no match.

231

assert not pred_lower("f.py", rec_upper)

232

233

234

# ---------------------------------------------------------------------------

235

# lineno_gt / lineno_lt

236

# ---------------------------------------------------------------------------

237

238

239

class TestLinenoPredicates:

240

def test_lineno_gt_pass(self) -> None:

241

assert _match("lineno_gt=5", lineno=10)

242

243

def test_lineno_gt_boundary(self) -> None:

244

# lineno_gt=10 means lineno > 10, so lineno=10 should NOT match

245

assert not _match("lineno_gt=10", lineno=10)

246

assert _match("lineno_gt=9", lineno=10)

247

248

def test_lineno_lt_pass(self) -> None:

249

assert _match("lineno_lt=20", lineno=10)

250

251

def test_lineno_lt_boundary(self) -> None:

252

assert not _match("lineno_lt=10", lineno=10)

253

assert _match("lineno_lt=11", lineno=10)

254

255

def test_lineno_gt_bad_value(self) -> None:

256

with pytest.raises(PredicateError, match="integer"):

257

parse_query("lineno_gt=abc")

258

259

def test_lineno_lt_bad_value(self) -> None:

260

with pytest.raises(PredicateError, match="integer"):

261

parse_query("lineno_lt=abc")

262

263

264

# ---------------------------------------------------------------------------

265

# language key

266

# ---------------------------------------------------------------------------

267

268

269

class TestLanguagePredicate:

270

def test_python_by_extension(self) -> None:

271

pred = parse_query("language=Python")

272

assert pred("src/billing.py", _rec())

273

assert not pred("src/billing.go", _rec())

274

275

def test_go_by_extension(self) -> None:

276

pred = parse_query("language=Go")

277

assert pred("cmd/main.go", _rec())

278

assert not pred("cmd/main.py", _rec())

279

280

def test_typescript(self) -> None:

281

pred = parse_query("language=TypeScript")

282

assert pred("src/index.ts", _rec())

283

284

def test_rust(self) -> None:

285

pred = parse_query("language=Rust")

286

assert pred("src/main.rs", _rec())

287

288

289

# ---------------------------------------------------------------------------

290

# Compound: AND (implicit)

291

# ---------------------------------------------------------------------------

292

293

294

class TestImplicitAnd:

295

def test_two_atoms_both_match(self) -> None:

296

assert _match("kind=function name=compute_total", kind="function", name="compute_total")

297

298

def test_two_atoms_first_no_match(self) -> None:

299

assert not _match("kind=class name=compute_total", kind="function", name="compute_total")

300

301

def test_two_atoms_second_no_match(self) -> None:

302

assert not _match("kind=function name=invoice", kind="function", name="compute_total")

303

304

def test_three_atoms(self) -> None:

305

assert _match(

306

"kind=function name~=compute file~=billing",

307

kind="function",

308

name="compute_total",

309

file_path="src/billing.py",

310

)

311

312

def test_explicit_and_keyword(self) -> None:

313

assert _match("kind=function AND name=compute_total", kind="function", name="compute_total")

314

315

316

# ---------------------------------------------------------------------------

317

# Compound: OR

318

# ---------------------------------------------------------------------------

class TestOr:

def test_or_first_matches(self) -> None:

323

assert _match("kind=function OR kind=class", kind="function")

324

325

def test_or_second_matches(self) -> None:

326

assert _match("kind=function OR kind=class", kind="class")

327

328

def test_or_neither_matches(self) -> None:

329

assert not _match("kind=function OR kind=class", kind="method")

330

331

def test_or_with_three_alternatives(self) -> None:

332

pred = parse_query("kind=function OR kind=class OR kind=method")

333

assert pred("f.py", _rec(kind="function"))

334

assert pred("f.py", _rec(kind="class"))

335

assert pred("f.py", _rec(kind="method"))

336

assert not pred("f.py", _rec(kind="variable"))

337

338

def test_or_in_list_mode(self) -> None:

339

# List mode joins with spaces, so OR in middle still works.

340

pred = parse_query(["kind=function OR kind=class"])

341

assert pred("f.py", _rec(kind="class"))

342

343

344

# ---------------------------------------------------------------------------

345

# Compound: NOT

346

# ---------------------------------------------------------------------------

class TestNot:

def test_not_inverts_match(self) -> None:

351

assert not _match("NOT kind=function", kind="function")

352

assert _match("NOT kind=function", kind="class")

353

354

def test_not_with_and(self) -> None:

355

pred = parse_query("NOT kind=import name~=billing")

356

# kind=function, name=billing_util → matches (not import AND name contains billing)

357

assert pred("f.py", _rec(kind="function", name="billing_util"))

358

# kind=import → fails NOT

359

assert not pred("f.py", _rec(kind="import", name="billing_util"))

360

# name doesn't contain billing → fails AND

361

assert not pred("f.py", _rec(kind="function", name="compute"))

362

363

def test_not_with_parenthesised_group(self) -> None:

364

# NOT applied to a grouped predicate.

365

pred = parse_query("NOT (kind=import)")

366

assert pred("f.py", _rec(kind="function"))

367

assert not pred("f.py", _rec(kind="import"))

368

369

370

# ---------------------------------------------------------------------------

371

# Parentheses / grouping

372

# ---------------------------------------------------------------------------

373

374

375

class TestParentheses:

376

def test_parenthesised_or(self) -> None:

377

pred = parse_query("(kind=function OR kind=method) name^=_")

378

# function starting with _ → matches

379

assert pred("f.py", _rec(kind="function", name="_private"))

380

# method starting with _ → matches

381

assert pred("f.py", _rec(kind="method", name="_helper"))

382

# class starting with _ → does NOT match (kind check fails)

383

assert not pred("f.py", _rec(kind="class", name="_Base"))

384

# function NOT starting with _ → does NOT match (name check fails)

385

assert not pred("f.py", _rec(kind="function", name="public_func"))

386

387

def test_nested_parens(self) -> None:

388

pred = parse_query("((kind=function OR kind=class) AND file~=billing)")

389

assert pred("src/billing.py", _rec(kind="function"))

390

assert pred("src/billing.py", _rec(kind="class"))

391

assert not pred("src/utils.py", _rec(kind="function"))

392

393

def test_not_parenthesised_group(self) -> None:

394

pred = parse_query("NOT (kind=function OR kind=class)")

395

assert pred("f.py", _rec(kind="method"))

396

assert not pred("f.py", _rec(kind="function"))

397

398

399

# ---------------------------------------------------------------------------

400

# parse_query list mode

401

# ---------------------------------------------------------------------------

402

403

404

class TestParseQueryListMode:

405

def test_single_atom_list(self) -> None:

406

pred = parse_query(["kind=function"])

407

assert pred("f.py", _rec(kind="function"))

408

assert not pred("f.py", _rec(kind="class"))

409

410

def test_multi_atom_list_implicit_and(self) -> None:

411

pred = parse_query(["kind=function", "name~=compute"])

412

assert pred("f.py", _rec(kind="function", name="compute_total"))

413

assert not pred("f.py", _rec(kind="class", name="compute_total"))

414

415

def test_atom_with_or_in_list(self) -> None:

416

pred = parse_query(["kind=function OR kind=method"])

417

assert pred("f.py", _rec(kind="method"))

418

419

420

# ---------------------------------------------------------------------------

421

# Error cases

422

# ---------------------------------------------------------------------------

class TestErrors:

def test_unknown_key(self) -> None:

427

with pytest.raises(PredicateError, match="Unknown predicate key"):

428

parse_query("colour=red")

429

430

def test_missing_operator(self) -> None:

431

with pytest.raises(PredicateError):

432

parse_query("kind function") # no operator

433

434

def test_unclosed_paren(self) -> None:

435

with pytest.raises(PredicateError):

436

parse_query("(kind=function")

437

438

def test_unexpected_close_paren(self) -> None:

439

with pytest.raises(PredicateError):

440

parse_query("kind=function)")

441

442

def test_trailing_garbage(self) -> None:

443

# "kind=function" is valid, but then extra garbage

444

with pytest.raises(PredicateError):

445

parse_query("kind=function )")

446

447

def test_empty_not(self) -> None:

448

with pytest.raises(PredicateError):

449

parse_query("NOT")

450

451

def test_double_quoted_value(self) -> None:

452

# Double-quoted values are stripped correctly.

453

pred = parse_query('name="compute total"')

454

assert pred("f.py", _rec(name="compute total"))

455

456

def test_or_without_rhs(self) -> None:

457

with pytest.raises(PredicateError):

458

parse_query("kind=function OR")