tools/typing_audit.py · cgcardona/muse

typing_audit.py python

356 lines 13.4 KB

cf6e72f3 Add tools/typing_audit.py — regex + AST violation scanner Gabriel Cardona <gabriel@tellurstori.com> 3d ago

1	#!/usr/bin/env python3
2	"""Typing audit — find and count all banned type patterns in the codebase.
3
4	Checks every rule from .cursorrules and AGENTS.md:
5	- Any as a type (param, return, collection value)
6	- object as a type (same severity as Any)
7	- cast() usage (all usage banned — fix the callee)
8	- # type: ignore (blanket and specific)
9	- Bare collections (list, dict, set, tuple without type parameters)
10	- Optional[X] (use X \| None instead)
11	- Legacy typing imports (List, Dict, Set, Tuple — use lowercase builtins)
12
13	Outputs JSON (machine-readable) + a human summary to stdout.
14
15	Usage:
16	python tools/typing_audit.py # muse/ + tests/
17	python tools/typing_audit.py --json artifacts/typing_audit.json
18	python tools/typing_audit.py --dirs muse/ tests/
19	python tools/typing_audit.py --dirs muse/ --max-any 0
20	"""
21
22	from __future__ import annotations
23
24	import argparse
25	import ast
26	import json
27	import re
28	import sys
29	from collections import defaultdict
30	from pathlib import Path
31	from typing import Any
32
33
34	# ── Pattern matchers ──────────────────────────────────────────────────────────
35	# Grouped by category. Every key contributes to the total violation count.
36
37	_PATTERNS: dict[str, re.Pattern[str]] = {
38	# ── Any-as-type ──────────────────────────────────────────────────────
39	"dict_str_any": re.compile(
40	r"\bdict\[str,\sAny\]\|\bDict\[str,\sAny\]", re.IGNORECASE
41	),
42	"list_any": re.compile(r"\blist\[Any\]\|\bList\[Any\]", re.IGNORECASE),
43	"return_any": re.compile(r"->\s*Any\b"),
44	"param_any": re.compile(r":\s*Any\b"),
45	"mapping_any": re.compile(
46	r"\bMapping\[str,\s*Any\]", re.IGNORECASE
47	),
48	"optional_any": re.compile(r"\bOptional\[Any\]", re.IGNORECASE),
49	"sequence_any": re.compile(
50	r"\bSequence\[Any\]\|\bIterable\[Any\]", re.IGNORECASE
51	),
52	"tuple_any": re.compile(r"\btuple\[.Any.\]\|\bTuple\[.Any.\]"),
53
54	# ── object-as-type (same severity as Any) ────────────────────────────
55	"param_object": re.compile(r":\s*object\b"),
56	"return_object": re.compile(r"->\s*object\b"),
57	"collection_object": re.compile(
58	r"\b(?:dict\|list\|set\|tuple\|Sequence\|Mapping)\[[^]]*\bobject\b"
59	),
60
61	# ── cast() — all usage banned ────────────────────────────────────────
62	"cast_usage": re.compile(r"\bcast\("),
63
64	# ── type: ignore — suppresses real errors ────────────────────────────
65	"type_ignore": re.compile(r"#\stype:\signore"),
66
67	# ── Bare collections (no type parameters) ────────────────────────────
68	# Negative lookaheads: exclude parameterized [, constructor calls (,
69	# and prose patterns (": list of items" in docstrings).
70	"bare_list": re.compile(r"(?::\s\|->\s)list\b(?!\[\|\(\|\s+[a-z])"),
71	"bare_dict": re.compile(r"(?::\s\|->\s)dict\b(?!\[\|\(\|\s+[a-z])"),
72	"bare_set": re.compile(r"(?::\s\|->\s)set\b(?!\[\|\(\|\s+[a-z])"),
73	"bare_tuple": re.compile(r"(?::\s\|->\s)tuple\b(?!\[\|\(\|\s+[a-z])"),
74
75	# ── Optional[X] — use X \| None instead ───────────────────────────────
76	# Excludes Optional[Any] which is already caught by optional_any.
77	"optional_usage": re.compile(r"\bOptional\[(?!Any\b)"),
78
79	# ── Legacy typing imports (use lowercase builtins) ───────────────────
80	"legacy_List": re.compile(r"\bList\["),
81	"legacy_Dict": re.compile(r"\bDict\["),
82	"legacy_Set": re.compile(r"\bSet\["),
83	"legacy_Tuple": re.compile(r"\bTuple\["),
84	}
85
86
87	def _count_pattern_in_line(line: str, pattern: re.Pattern[str]) -> int:
88	return len(pattern.findall(line))
89
90
91	def _imports_any(source: str) -> bool:
92	"""Check if file imports Any from typing."""
93	return bool(re.search(r"from\s+typing\s+import\s+.*\bAny\b", source))
94
95
96	def _classify_type_ignores(line: str) -> str:
97	"""Return the ignore variant (blanket vs specific)."""
98	m = re.search(r"#\stype:\signore\[([^\]]+)\]", line)
99	if m:
100	return f"type_ignore[{m.group(1)}]"
101	return "type_ignore[blanket]"
102
103
104	# ── AST-based detection ──────────────────────────────────────────────────────
105
106
107	def _find_untyped_defs(source: str, filepath: str) -> list[dict[str, Any]]:
108	"""Find function defs missing return type or param annotations."""
109	results: list[dict[str, Any]] = []
110	try:
111	tree = ast.parse(source)
112	except SyntaxError:
113	return results
114
115	for node in ast.walk(tree):
116	if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
117	if node.returns is None:
118	results.append({
119	"file": filepath,
120	"line": node.lineno,
121	"name": node.name,
122	"issue": "missing_return_type",
123	})
124	for arg in node.args.args + node.args.kwonlyargs:
125	if arg.annotation is None and arg.arg != "self" and arg.arg != "cls":
126	results.append({
127	"file": filepath,
128	"line": node.lineno,
129	"name": f"{node.name}.{arg.arg}",
130	"issue": "missing_param_type",
131	})
132	return results
133
134
135	# ── File scanner ──────────────────────────────────────────────────────────────
136
137
138	def scan_file(filepath: Path) -> dict[str, Any]:
139	"""Scan a single Python file for typing violations."""
140	try:
141	source = filepath.read_text(encoding="utf-8")
142	except (OSError, UnicodeDecodeError):
143	return {}
144
145	lines = source.splitlines()
146	result: dict[str, Any] = {
147	"file": str(filepath),
148	"imports_any": _imports_any(source),
149	"patterns": defaultdict(int),
150	"pattern_lines": defaultdict(list),
151	"type_ignore_variants": defaultdict(int),
152	"untyped_defs": [],
153	}
154
155	for lineno, line in enumerate(lines, 1):
156	stripped = line.strip()
157	if not stripped or stripped.startswith("#"):
158	continue
159
160	for name, pattern in _PATTERNS.items():
161	count = _count_pattern_in_line(line, pattern)
162	if count > 0:
163	result["patterns"][name] += count
164	result["pattern_lines"][name].append(lineno)
165
166	if name == "type_ignore":
167	variant = _classify_type_ignores(line)
168	result["type_ignore_variants"][variant] += 1
169
170	result["untyped_defs"] = _find_untyped_defs(source, str(filepath))
171	return result
172
173
174	def scan_directory(directory: Path) -> list[dict[str, Any]]:
175	"""Scan all Python files in a directory tree."""
176	results: list[dict[str, Any]] = []
177	for py_file in sorted(directory.rglob("*.py")):
178	if "venv" in py_file.parts or "__pycache__" in py_file.parts:
179	continue
180	if ".git" in py_file.parts:
181	continue
182	file_result = scan_file(py_file)
183	if file_result:
184	results.append(file_result)
185	return results
186
187
188	# ── Report generation ─────────────────────────────────────────────────────────
189
190	# Display order: group patterns into logical categories for the report.
191	_CATEGORY_ORDER: list[tuple[str, list[str]]] = [
192	("Any-as-type", [
193	"dict_str_any", "list_any", "return_any", "param_any",
194	"mapping_any", "optional_any", "sequence_any", "tuple_any",
195	]),
196	("object-as-type", [
197	"param_object", "return_object", "collection_object",
198	]),
199	("cast() usage", ["cast_usage"]),
200	("type: ignore", ["type_ignore"]),
201	("Bare collections", [
202	"bare_list", "bare_dict", "bare_set", "bare_tuple",
203	]),
204	("Optional (use X \| None)", ["optional_usage"]),
205	("Legacy typing imports", [
206	"legacy_List", "legacy_Dict", "legacy_Set", "legacy_Tuple",
207	]),
208	]
209
210
211	def generate_report(results: list[dict[str, Any]]) -> dict[str, Any]:
212	"""Generate aggregate report from scan results."""
213	totals: dict[str, int] = defaultdict(int)
214	files_with_any_import = 0
215	per_file: dict[str, dict[str, int]] = {}
216	top_offenders: list[dict[str, Any]] = []
217	all_type_ignore_variants: dict[str, int] = defaultdict(int)
218	all_untyped_defs: list[dict[str, Any]] = []
219
220	for r in results:
221	filepath = r["file"]
222	if r.get("imports_any"):
223	files_with_any_import += 1
224
225	file_total = 0
226	file_patterns: dict[str, int] = {}
227	for pattern, count in r.get("patterns", {}).items():
228	totals[pattern] += count
229	file_patterns[pattern] = count
230	file_total += count
231
232	if file_total > 0:
233	per_file[filepath] = file_patterns
234	top_offenders.append({"file": filepath, "total": file_total, "patterns": file_patterns})
235
236	for variant, count in r.get("type_ignore_variants", {}).items():
237	all_type_ignore_variants[variant] += count
238
239	all_untyped_defs.extend(r.get("untyped_defs", []))
240
241	top_offenders.sort(key=lambda x: x["total"], reverse=True)
242
243	return {
244	"summary": {
245	"total_files_scanned": len(results),
246	"files_importing_any": files_with_any_import,
247	"total_any_patterns": sum(totals.values()),
248	"untyped_defs": len(all_untyped_defs),
249	},
250	"pattern_totals": dict(totals),
251	"type_ignore_variants": dict(all_type_ignore_variants),
252	"top_offenders": top_offenders[:30],
253	"per_file": per_file,
254	"untyped_defs": all_untyped_defs[:50],
255	}
256
257
258	def print_human_summary(report: dict[str, Any]) -> None:
259	"""Print a human-readable summary."""
260	s = report["summary"]
261	totals = report["pattern_totals"]
262	print("\n" + "=" * 70)
263	print(" TYPING AUDIT — Violation Report")
264	print("=" * 70)
265	print(f" Files scanned: {s['total_files_scanned']}")
266	print(f" Files importing Any: {s['files_importing_any']}")
267	print(f" Total violations: {s['total_any_patterns']}")
268	print(f" Untyped defs: {s['untyped_defs']}")
269	print()
270
271	for category, pattern_names in _CATEGORY_ORDER:
272	category_total = sum(totals.get(p, 0) for p in pattern_names)
273	if category_total == 0:
274	continue
275	print(f" {category}:")
276	for p in pattern_names:
277	count = totals.get(p, 0)
278	if count > 0:
279	print(f" {p:30s} {count:5d}")
280	print()
281
282	if not any(totals.get(p, 0) for _, pats in _CATEGORY_ORDER for p in pats):
283	print(" Pattern breakdown: (none)")
284	print()
285
286	if report["type_ignore_variants"]:
287	print(" # type: ignore variants:")
288	for variant, count in sorted(report["type_ignore_variants"].items(), key=lambda x: -x[1]):
289	print(f" {variant:40s} {count:5d}")
290	print()
291	print(" Top 15 offenders:")
292	for entry in report["top_offenders"][:15]:
293	print(f" {entry['total']:4d} {entry['file']}")
294	print("=" * 70 + "\n")
295
296
297	# ── CLI ───────────────────────────────────────────────────────────────────────
298
299
300	def main() -> None:
301	parser = argparse.ArgumentParser(
302	description="Audit typing violations: Any, object, cast, bare collections, "
303	"Optional, legacy imports, type: ignore, untyped defs",
304	)
305	parser.add_argument(
306	"--dirs",
307	nargs="+",
308	default=["muse/", "tests/"],
309	help="Directories to scan",
310	)
311	parser.add_argument("--json", type=str, help="Write JSON report to file")
312	parser.add_argument(
313	"--max-any",
314	type=int,
315	default=None,
316	help="Fail (exit 1) if total violations exceed this threshold",
317	)
318	args = parser.parse_args()
319
320	all_results: list[dict[str, Any]] = []
321	for d in args.dirs:
322	p = Path(d)
323	if p.exists():
324	all_results.extend(scan_directory(p))
325	else:
326	print(f"WARNING: {d} does not exist, skipping", file=sys.stderr)
327
328	report = generate_report(all_results)
329	print_human_summary(report)
330
331	if args.json:
332	Path(args.json).parent.mkdir(parents=True, exist_ok=True)
333	Path(args.json).write_text(
334	json.dumps(report, indent=2, default=str),
335	encoding="utf-8",
336	)
337	print(f" JSON report written to {args.json}")
338
339	if args.max_any is not None:
340	total = report["summary"]["total_any_patterns"]
341	if total > args.max_any:
342	print(
343	f"\n❌ RATCHET FAILED: {total} violations exceed "
344	f"threshold of {args.max_any}",
345	file=sys.stderr,
346	)
347	sys.exit(1)
348	else:
349	print(
350	f"\n✅ RATCHET OK: {total} violations within "
351	f"threshold of {args.max_any}",
352	)
353
354
355	if __name__ == "__main__":
356	main()

Content Address

Object ID (SHA-256)

b2faf34a8e31dbdb2f38209e1e5d4d9d5570c192cba3bf8c994edc875fd6ae4f

This file is immutable and content-addressed. The same SHA always refers to the same bytes, across every clone and every time.

File Info

Path tools/typing_audit.py

Lines 356

Size 13.4 KB

Language python

Ref cf6e72f3

Snapshot 9f8f1ff8cfac…

Last Modified

cf6e72f3

Add tools/typing_audit.py — regex + AST violation scanner

Gabriel Cardona <gabriel@tellurstori.com> 3d ago

View commit →

Links

Browse tree at cf6e72f3 All commits View raw