languages.py
python
| 1 | """muse languages — language breakdown of the current snapshot. |
| 2 | |
| 3 | Shows the composition of the repository by programming language — |
| 4 | how many files, symbols, and which symbol kinds are present for |
| 5 | each language. |
| 6 | |
| 7 | Usage:: |
| 8 | |
| 9 | muse languages |
| 10 | muse languages --commit HEAD~5 |
| 11 | muse languages --json |
| 12 | |
| 13 | Output:: |
| 14 | |
| 15 | Language breakdown — commit cb4afaed |
| 16 | |
| 17 | Python 8 files 43 symbols (fn: 18 class: 5 method: 20) |
| 18 | TypeScript 3 files 12 symbols (fn: 4 class: 3 method: 5) |
| 19 | Go 2 files 8 symbols (fn: 6 method: 2) |
| 20 | Rust 1 file 4 symbols (fn: 2 method: 2) |
| 21 | ───────────────────────────────────────────────────────────────── |
| 22 | Total 14 files 67 symbols (4 languages) |
| 23 | """ |
| 24 | |
| 25 | from __future__ import annotations |
| 26 | |
| 27 | import json |
| 28 | import logging |
| 29 | import pathlib |
| 30 | from typing import TypedDict |
| 31 | |
| 32 | import typer |
| 33 | |
| 34 | from muse.core.errors import ExitCode |
| 35 | from muse.core.repo import require_repo |
| 36 | from muse.core.store import get_commit_snapshot_manifest, read_current_branch, resolve_commit_ref |
| 37 | from muse.plugins.code._query import language_of, symbols_for_snapshot |
| 38 | |
| 39 | logger = logging.getLogger(__name__) |
| 40 | |
| 41 | app = typer.Typer() |
| 42 | |
| 43 | |
| 44 | class _LangEntry(TypedDict): |
| 45 | language: str |
| 46 | files: int |
| 47 | symbols: int |
| 48 | kinds: dict[str, int] |
| 49 | |
| 50 | |
| 51 | def _read_repo_id(root: pathlib.Path) -> str: |
| 52 | return str(json.loads((root / ".muse" / "repo.json").read_text())["repo_id"]) |
| 53 | |
| 54 | |
| 55 | def _read_branch(root: pathlib.Path) -> str: |
| 56 | return read_current_branch(root) |
| 57 | |
| 58 | |
| 59 | @app.callback(invoke_without_command=True) |
| 60 | def languages( |
| 61 | ctx: typer.Context, |
| 62 | ref: str | None = typer.Option( |
| 63 | None, "--commit", "-c", metavar="REF", |
| 64 | help="Commit to inspect (default: HEAD).", |
| 65 | ), |
| 66 | as_json: bool = typer.Option(False, "--json", help="Emit results as JSON."), |
| 67 | ) -> None: |
| 68 | """Show the language composition of the repository. |
| 69 | |
| 70 | Counts files and semantic symbols (functions, classes, methods) by |
| 71 | programming language. Only languages with AST-level support are shown |
| 72 | in the symbol breakdown — other file types are counted as files only. |
| 73 | |
| 74 | Use ``--commit`` to inspect any historical snapshot. |
| 75 | """ |
| 76 | root = require_repo() |
| 77 | repo_id = _read_repo_id(root) |
| 78 | branch = _read_branch(root) |
| 79 | |
| 80 | commit = resolve_commit_ref(root, repo_id, branch, ref) |
| 81 | if commit is None: |
| 82 | typer.echo(f"❌ Commit '{ref or 'HEAD'}' not found.", err=True) |
| 83 | raise typer.Exit(code=ExitCode.USER_ERROR) |
| 84 | |
| 85 | # Flat dict[str, str] of file_path → sha256. |
| 86 | manifest: dict[str, str] = get_commit_snapshot_manifest(root, commit.commit_id) or {} |
| 87 | symbol_map = symbols_for_snapshot(root, manifest) |
| 88 | |
| 89 | # Accumulate per-language stats. |
| 90 | lang_files: dict[str, int] = {} |
| 91 | lang_symbols: dict[str, int] = {} |
| 92 | lang_kinds: dict[str, dict[str, int]] = {} |
| 93 | |
| 94 | for file_path in manifest: |
| 95 | lang = language_of(file_path) |
| 96 | lang_files[lang] = lang_files.get(lang, 0) + 1 |
| 97 | |
| 98 | for file_path, tree in symbol_map.items(): |
| 99 | lang = language_of(file_path) |
| 100 | lang_symbols[lang] = lang_symbols.get(lang, 0) + len(tree) |
| 101 | kinds = lang_kinds.setdefault(lang, {}) |
| 102 | for rec in tree.values(): |
| 103 | kinds[rec["kind"]] = kinds.get(rec["kind"], 0) + 1 |
| 104 | |
| 105 | all_langs = sorted(lang_files) |
| 106 | |
| 107 | if as_json: |
| 108 | out: list[_LangEntry] = [ |
| 109 | _LangEntry( |
| 110 | language=lang, |
| 111 | files=lang_files[lang], |
| 112 | symbols=lang_symbols.get(lang, 0), |
| 113 | kinds=lang_kinds.get(lang, {}), |
| 114 | ) |
| 115 | for lang in all_langs |
| 116 | ] |
| 117 | typer.echo(json.dumps({"commit": commit.commit_id[:8], "languages": out}, indent=2)) |
| 118 | return |
| 119 | |
| 120 | typer.echo(f"\nLanguage breakdown — commit {commit.commit_id[:8]}") |
| 121 | typer.echo("") |
| 122 | |
| 123 | max_lang = max((len(lang) for lang in all_langs), default=8) |
| 124 | total_files = 0 |
| 125 | total_syms = 0 |
| 126 | |
| 127 | for lang in all_langs: |
| 128 | files = lang_files[lang] |
| 129 | syms = lang_symbols.get(lang, 0) |
| 130 | total_files += files |
| 131 | total_syms += syms |
| 132 | kinds = lang_kinds.get(lang, {}) |
| 133 | |
| 134 | kind_parts: list[str] = [] |
| 135 | for k, label in [ |
| 136 | ("function", "fn"), ("async_function", "fn~"), |
| 137 | ("class", "class"), ("method", "method"), ("async_method", "method~"), |
| 138 | ("variable", "var"), |
| 139 | ]: |
| 140 | if k in kinds: |
| 141 | kind_parts.append(f"{label}: {kinds[k]}") |
| 142 | kind_str = f" ({', '.join(kind_parts)})" if kind_parts else "" |
| 143 | |
| 144 | file_label = "file " if files == 1 else "files" |
| 145 | typer.echo( |
| 146 | f" {lang:<{max_lang}} {files:>4} {file_label} {syms:>5} symbols{kind_str}" |
| 147 | ) |
| 148 | |
| 149 | typer.echo(" " + "─" * 60) |
| 150 | typer.echo( |
| 151 | f" {'Total':<{max_lang}} {total_files:>4} files {total_syms:>5} symbols" |
| 152 | f" ({len(all_langs)} languages)" |
| 153 | ) |