tools/typing_audit.py · cgcardona/muse

typing_audit.py python

354 lines 13.4 KB

e6786943 feat: upgrade to Python 3.14, drop from __future__ import annotations Gabriel Cardona <cgcardona@gmail.com> 1d ago

1	#!/usr/bin/env python3
2	"""Typing audit — find and count all banned type patterns in the codebase.
3
4	Checks every rule from .cursorrules and AGENTS.md:
5	- Any as a type (param, return, collection value)
6	- object as a type (same severity as Any)
7	- cast() usage (all usage banned — fix the callee)
8	- # type: ignore (blanket and specific)
9	- Bare collections (list, dict, set, tuple without type parameters)
10	- Optional[X] (use X \| None instead)
11	- Legacy typing imports (List, Dict, Set, Tuple — use lowercase builtins)
12
13	Outputs JSON (machine-readable) + a human summary to stdout.
14
15	Usage:
16	python tools/typing_audit.py # muse/ + tests/
17	python tools/typing_audit.py --json artifacts/typing_audit.json
18	python tools/typing_audit.py --dirs muse/ tests/
19	python tools/typing_audit.py --dirs muse/ --max-any 0
20	"""
21
22	import argparse
23	import ast
24	import json
25	import re
26	import sys
27	from collections import defaultdict
28	from pathlib import Path
29	from typing import Any
30
31
32	# ── Pattern matchers ──────────────────────────────────────────────────────────
33	# Grouped by category. Every key contributes to the total violation count.
34
35	_PATTERNS: dict[str, re.Pattern[str]] = {
36	# ── Any-as-type ──────────────────────────────────────────────────────
37	"dict_str_any": re.compile(
38	r"\bdict\[str,\sAny\]\|\bDict\[str,\sAny\]", re.IGNORECASE
39	),
40	"list_any": re.compile(r"\blist\[Any\]\|\bList\[Any\]", re.IGNORECASE),
41	"return_any": re.compile(r"->\s*Any\b"),
42	"param_any": re.compile(r":\s*Any\b"),
43	"mapping_any": re.compile(
44	r"\bMapping\[str,\s*Any\]", re.IGNORECASE
45	),
46	"optional_any": re.compile(r"\bOptional\[Any\]", re.IGNORECASE),
47	"sequence_any": re.compile(
48	r"\bSequence\[Any\]\|\bIterable\[Any\]", re.IGNORECASE
49	),
50	"tuple_any": re.compile(r"\btuple\[.Any.\]\|\bTuple\[.Any.\]"),
51
52	# ── object-as-type (same severity as Any) ────────────────────────────
53	"param_object": re.compile(r":\s*object\b"),
54	"return_object": re.compile(r"->\s*object\b"),
55	"collection_object": re.compile(
56	r"\b(?:dict\|list\|set\|tuple\|Sequence\|Mapping)\[[^]]*\bobject\b"
57	),
58
59	# ── cast() — all usage banned ────────────────────────────────────────
60	"cast_usage": re.compile(r"\bcast\("),
61
62	# ── type: ignore — suppresses real errors ────────────────────────────
63	"type_ignore": re.compile(r"#\stype:\signore"),
64
65	# ── Bare collections (no type parameters) ────────────────────────────
66	# Negative lookaheads: exclude parameterized [, constructor calls (,
67	# and prose patterns (": list of items" in docstrings).
68	"bare_list": re.compile(r"(?::\s\|->\s)list\b(?!\[\|\(\|\s+[a-z])"),
69	"bare_dict": re.compile(r"(?::\s\|->\s)dict\b(?!\[\|\(\|\s+[a-z])"),
70	"bare_set": re.compile(r"(?::\s\|->\s)set\b(?!\[\|\(\|\s+[a-z])"),
71	"bare_tuple": re.compile(r"(?::\s\|->\s)tuple\b(?!\[\|\(\|\s+[a-z])"),
72
73	# ── Optional[X] — use X \| None instead ───────────────────────────────
74	# Excludes Optional[Any] which is already caught by optional_any.
75	"optional_usage": re.compile(r"\bOptional\[(?!Any\b)"),
76
77	# ── Legacy typing imports (use lowercase builtins) ───────────────────
78	"legacy_List": re.compile(r"\bList\["),
79	"legacy_Dict": re.compile(r"\bDict\["),
80	"legacy_Set": re.compile(r"\bSet\["),
81	"legacy_Tuple": re.compile(r"\bTuple\["),
82	}
83
84
85	def _count_pattern_in_line(line: str, pattern: re.Pattern[str]) -> int:
86	return len(pattern.findall(line))
87
88
89	def _imports_any(source: str) -> bool:
90	"""Check if file imports Any from typing."""
91	return bool(re.search(r"from\s+typing\s+import\s+.*\bAny\b", source))
92
93
94	def _classify_type_ignores(line: str) -> str:
95	"""Return the ignore variant (blanket vs specific)."""
96	m = re.search(r"#\stype:\signore\[([^\]]+)\]", line)
97	if m:
98	return f"type_ignore[{m.group(1)}]"
99	return "type_ignore[blanket]"
100
101
102	# ── AST-based detection ──────────────────────────────────────────────────────
103
104
105	def _find_untyped_defs(source: str, filepath: str) -> list[dict[str, Any]]:
106	"""Find function defs missing return type or param annotations."""
107	results: list[dict[str, Any]] = []
108	try:
109	tree = ast.parse(source)
110	except SyntaxError:
111	return results
112
113	for node in ast.walk(tree):
114	if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
115	if node.returns is None:
116	results.append({
117	"file": filepath,
118	"line": node.lineno,
119	"name": node.name,
120	"issue": "missing_return_type",
121	})
122	for arg in node.args.args + node.args.kwonlyargs:
123	if arg.annotation is None and arg.arg != "self" and arg.arg != "cls":
124	results.append({
125	"file": filepath,
126	"line": node.lineno,
127	"name": f"{node.name}.{arg.arg}",
128	"issue": "missing_param_type",
129	})
130	return results
131
132
133	# ── File scanner ──────────────────────────────────────────────────────────────
134
135
136	def scan_file(filepath: Path) -> dict[str, Any]:
137	"""Scan a single Python file for typing violations."""
138	try:
139	source = filepath.read_text(encoding="utf-8")
140	except (OSError, UnicodeDecodeError):
141	return {}
142
143	lines = source.splitlines()
144	result: dict[str, Any] = {
145	"file": str(filepath),
146	"imports_any": _imports_any(source),
147	"patterns": defaultdict(int),
148	"pattern_lines": defaultdict(list),
149	"type_ignore_variants": defaultdict(int),
150	"untyped_defs": [],
151	}
152
153	for lineno, line in enumerate(lines, 1):
154	stripped = line.strip()
155	if not stripped or stripped.startswith("#"):
156	continue
157
158	for name, pattern in _PATTERNS.items():
159	count = _count_pattern_in_line(line, pattern)
160	if count > 0:
161	result["patterns"][name] += count
162	result["pattern_lines"][name].append(lineno)
163
164	if name == "type_ignore":
165	variant = _classify_type_ignores(line)
166	result["type_ignore_variants"][variant] += 1
167
168	result["untyped_defs"] = _find_untyped_defs(source, str(filepath))
169	return result
170
171
172	def scan_directory(directory: Path) -> list[dict[str, Any]]:
173	"""Scan all Python files in a directory tree."""
174	results: list[dict[str, Any]] = []
175	for py_file in sorted(directory.rglob("*.py")):
176	if "venv" in py_file.parts or "__pycache__" in py_file.parts:
177	continue
178	if ".git" in py_file.parts:
179	continue
180	file_result = scan_file(py_file)
181	if file_result:
182	results.append(file_result)
183	return results
184
185
186	# ── Report generation ─────────────────────────────────────────────────────────
187
188	# Display order: group patterns into logical categories for the report.
189	_CATEGORY_ORDER: list[tuple[str, list[str]]] = [
190	("Any-as-type", [
191	"dict_str_any", "list_any", "return_any", "param_any",
192	"mapping_any", "optional_any", "sequence_any", "tuple_any",
193	]),
194	("object-as-type", [
195	"param_object", "return_object", "collection_object",
196	]),
197	("cast() usage", ["cast_usage"]),
198	("type: ignore", ["type_ignore"]),
199	("Bare collections", [
200	"bare_list", "bare_dict", "bare_set", "bare_tuple",
201	]),
202	("Optional (use X \| None)", ["optional_usage"]),
203	("Legacy typing imports", [
204	"legacy_List", "legacy_Dict", "legacy_Set", "legacy_Tuple",
205	]),
206	]
207
208
209	def generate_report(results: list[dict[str, Any]]) -> dict[str, Any]:
210	"""Generate aggregate report from scan results."""
211	totals: dict[str, int] = defaultdict(int)
212	files_with_any_import = 0
213	per_file: dict[str, dict[str, int]] = {}
214	top_offenders: list[dict[str, Any]] = []
215	all_type_ignore_variants: dict[str, int] = defaultdict(int)
216	all_untyped_defs: list[dict[str, Any]] = []
217
218	for r in results:
219	filepath = r["file"]
220	if r.get("imports_any"):
221	files_with_any_import += 1
222
223	file_total = 0
224	file_patterns: dict[str, int] = {}
225	for pattern, count in r.get("patterns", {}).items():
226	totals[pattern] += count
227	file_patterns[pattern] = count
228	file_total += count
229
230	if file_total > 0:
231	per_file[filepath] = file_patterns
232	top_offenders.append({"file": filepath, "total": file_total, "patterns": file_patterns})
233
234	for variant, count in r.get("type_ignore_variants", {}).items():
235	all_type_ignore_variants[variant] += count
236
237	all_untyped_defs.extend(r.get("untyped_defs", []))
238
239	top_offenders.sort(key=lambda x: x["total"], reverse=True)
240
241	return {
242	"summary": {
243	"total_files_scanned": len(results),
244	"files_importing_any": files_with_any_import,
245	"total_any_patterns": sum(totals.values()),
246	"untyped_defs": len(all_untyped_defs),
247	},
248	"pattern_totals": dict(totals),
249	"type_ignore_variants": dict(all_type_ignore_variants),
250	"top_offenders": top_offenders[:30],
251	"per_file": per_file,
252	"untyped_defs": all_untyped_defs[:50],
253	}
254
255
256	def print_human_summary(report: dict[str, Any]) -> None:
257	"""Print a human-readable summary."""
258	s = report["summary"]
259	totals = report["pattern_totals"]
260	print("\n" + "=" * 70)
261	print(" TYPING AUDIT — Violation Report")
262	print("=" * 70)
263	print(f" Files scanned: {s['total_files_scanned']}")
264	print(f" Files importing Any: {s['files_importing_any']}")
265	print(f" Total violations: {s['total_any_patterns']}")
266	print(f" Untyped defs: {s['untyped_defs']}")
267	print()
268
269	for category, pattern_names in _CATEGORY_ORDER:
270	category_total = sum(totals.get(p, 0) for p in pattern_names)
271	if category_total == 0:
272	continue
273	print(f" {category}:")
274	for p in pattern_names:
275	count = totals.get(p, 0)
276	if count > 0:
277	print(f" {p:30s} {count:5d}")
278	print()
279
280	if not any(totals.get(p, 0) for _, pats in _CATEGORY_ORDER for p in pats):
281	print(" Pattern breakdown: (none)")
282	print()
283
284	if report["type_ignore_variants"]:
285	print(" # type: ignore variants:")
286	for variant, count in sorted(report["type_ignore_variants"].items(), key=lambda x: -x[1]):
287	print(f" {variant:40s} {count:5d}")
288	print()
289	print(" Top 15 offenders:")
290	for entry in report["top_offenders"][:15]:
291	print(f" {entry['total']:4d} {entry['file']}")
292	print("=" * 70 + "\n")
293
294
295	# ── CLI ───────────────────────────────────────────────────────────────────────
296
297
298	def main() -> None:
299	parser = argparse.ArgumentParser(
300	description="Audit typing violations: Any, object, cast, bare collections, "
301	"Optional, legacy imports, type: ignore, untyped defs",
302	)
303	parser.add_argument(
304	"--dirs",
305	nargs="+",
306	default=["muse/", "tests/"],
307	help="Directories to scan",
308	)
309	parser.add_argument("--json", type=str, help="Write JSON report to file")
310	parser.add_argument(
311	"--max-any",
312	type=int,
313	default=None,
314	help="Fail (exit 1) if total violations exceed this threshold",
315	)
316	args = parser.parse_args()
317
318	all_results: list[dict[str, Any]] = []
319	for d in args.dirs:
320	p = Path(d)
321	if p.exists():
322	all_results.extend(scan_directory(p))
323	else:
324	print(f"WARNING: {d} does not exist, skipping", file=sys.stderr)
325
326	report = generate_report(all_results)
327	print_human_summary(report)
328
329	if args.json:
330	Path(args.json).parent.mkdir(parents=True, exist_ok=True)
331	Path(args.json).write_text(
332	json.dumps(report, indent=2, default=str),
333	encoding="utf-8",
334	)
335	print(f" JSON report written to {args.json}")
336
337	if args.max_any is not None:
338	total = report["summary"]["total_any_patterns"]
339	if total > args.max_any:
340	print(
341	f"\n❌ RATCHET FAILED: {total} violations exceed "
342	f"threshold of {args.max_any}",
343	file=sys.stderr,
344	)
345	sys.exit(1)
346	else:
347	print(
348	f"\n✅ RATCHET OK: {total} violations within "
349	f"threshold of {args.max_any}",
350	)
351
352
353	if __name__ == "__main__":
354	main()

Content Address

Object ID (SHA-256)

58d065d54d5b539938c5ea097a4f9d90318626822bac7d8ad9d24e67db921cf1

This file is immutable and content-addressed. The same SHA always refers to the same bytes, across every clone and every time.

File Info

Path tools/typing_audit.py

Lines 354

Size 13.4 KB

Language python

Ref e6786943

Snapshot 6aec3b5c9ccb…

Last Modified

e6786943

feat: upgrade to Python 3.14, drop from __future__ import annotations

Gabriel Cardona <cgcardona@gmail.com> 1d ago

View commit →

Links

Browse tree at e6786943 All commits View raw