gabriel / muse public
languages.py python
153 lines 4.9 KB
f7645c07 feat(store): self-describing HEAD format with typed read/write API (#163) Gabriel Cardona <cgcardona@gmail.com> 3d ago
1 """muse languages — language breakdown of the current snapshot.
2
3 Shows the composition of the repository by programming language —
4 how many files, symbols, and which symbol kinds are present for
5 each language.
6
7 Usage::
8
9 muse languages
10 muse languages --commit HEAD~5
11 muse languages --json
12
13 Output::
14
15 Language breakdown — commit cb4afaed
16
17 Python 8 files 43 symbols (fn: 18 class: 5 method: 20)
18 TypeScript 3 files 12 symbols (fn: 4 class: 3 method: 5)
19 Go 2 files 8 symbols (fn: 6 method: 2)
20 Rust 1 file 4 symbols (fn: 2 method: 2)
21 ─────────────────────────────────────────────────────────────────
22 Total 14 files 67 symbols (4 languages)
23 """
24
25 from __future__ import annotations
26
27 import json
28 import logging
29 import pathlib
30 from typing import TypedDict
31
32 import typer
33
34 from muse.core.errors import ExitCode
35 from muse.core.repo import require_repo
36 from muse.core.store import get_commit_snapshot_manifest, read_current_branch, resolve_commit_ref
37 from muse.plugins.code._query import language_of, symbols_for_snapshot
38
39 logger = logging.getLogger(__name__)
40
41 app = typer.Typer()
42
43
44 class _LangEntry(TypedDict):
45 language: str
46 files: int
47 symbols: int
48 kinds: dict[str, int]
49
50
51 def _read_repo_id(root: pathlib.Path) -> str:
52 return str(json.loads((root / ".muse" / "repo.json").read_text())["repo_id"])
53
54
55 def _read_branch(root: pathlib.Path) -> str:
56 return read_current_branch(root)
57
58
59 @app.callback(invoke_without_command=True)
60 def languages(
61 ctx: typer.Context,
62 ref: str | None = typer.Option(
63 None, "--commit", "-c", metavar="REF",
64 help="Commit to inspect (default: HEAD).",
65 ),
66 as_json: bool = typer.Option(False, "--json", help="Emit results as JSON."),
67 ) -> None:
68 """Show the language composition of the repository.
69
70 Counts files and semantic symbols (functions, classes, methods) by
71 programming language. Only languages with AST-level support are shown
72 in the symbol breakdown — other file types are counted as files only.
73
74 Use ``--commit`` to inspect any historical snapshot.
75 """
76 root = require_repo()
77 repo_id = _read_repo_id(root)
78 branch = _read_branch(root)
79
80 commit = resolve_commit_ref(root, repo_id, branch, ref)
81 if commit is None:
82 typer.echo(f"❌ Commit '{ref or 'HEAD'}' not found.", err=True)
83 raise typer.Exit(code=ExitCode.USER_ERROR)
84
85 # Flat dict[str, str] of file_path → sha256.
86 manifest: dict[str, str] = get_commit_snapshot_manifest(root, commit.commit_id) or {}
87 symbol_map = symbols_for_snapshot(root, manifest)
88
89 # Accumulate per-language stats.
90 lang_files: dict[str, int] = {}
91 lang_symbols: dict[str, int] = {}
92 lang_kinds: dict[str, dict[str, int]] = {}
93
94 for file_path in manifest:
95 lang = language_of(file_path)
96 lang_files[lang] = lang_files.get(lang, 0) + 1
97
98 for file_path, tree in symbol_map.items():
99 lang = language_of(file_path)
100 lang_symbols[lang] = lang_symbols.get(lang, 0) + len(tree)
101 kinds = lang_kinds.setdefault(lang, {})
102 for rec in tree.values():
103 kinds[rec["kind"]] = kinds.get(rec["kind"], 0) + 1
104
105 all_langs = sorted(lang_files)
106
107 if as_json:
108 out: list[_LangEntry] = [
109 _LangEntry(
110 language=lang,
111 files=lang_files[lang],
112 symbols=lang_symbols.get(lang, 0),
113 kinds=lang_kinds.get(lang, {}),
114 )
115 for lang in all_langs
116 ]
117 typer.echo(json.dumps({"commit": commit.commit_id[:8], "languages": out}, indent=2))
118 return
119
120 typer.echo(f"\nLanguage breakdown — commit {commit.commit_id[:8]}")
121 typer.echo("")
122
123 max_lang = max((len(lang) for lang in all_langs), default=8)
124 total_files = 0
125 total_syms = 0
126
127 for lang in all_langs:
128 files = lang_files[lang]
129 syms = lang_symbols.get(lang, 0)
130 total_files += files
131 total_syms += syms
132 kinds = lang_kinds.get(lang, {})
133
134 kind_parts: list[str] = []
135 for k, label in [
136 ("function", "fn"), ("async_function", "fn~"),
137 ("class", "class"), ("method", "method"), ("async_method", "method~"),
138 ("variable", "var"),
139 ]:
140 if k in kinds:
141 kind_parts.append(f"{label}: {kinds[k]}")
142 kind_str = f" ({', '.join(kind_parts)})" if kind_parts else ""
143
144 file_label = "file " if files == 1 else "files"
145 typer.echo(
146 f" {lang:<{max_lang}} {files:>4} {file_label} {syms:>5} symbols{kind_str}"
147 )
148
149 typer.echo(" " + "─" * 60)
150 typer.echo(
151 f" {'Total':<{max_lang}} {total_files:>4} files {total_syms:>5} symbols"
152 f" ({len(all_langs)} languages)"
153 )