tests/test_plumbing_stress.py · gabriel/muse

1

"""Stress and scale tests for the Muse plumbing layer.

2

3

These tests exercise plumbing commands at a scale that would reveal

4

O(n²) performance regressions, memory leaks, and missing edge-case

5

handling. Every test in this module is designed to complete in under

6

10 seconds on a modern laptop when running from an in-memory temp

7

directory — if any test consistently takes longer, it signals a

8

performance regression worth investigating.

9

10

Scenarios:

11

- commit-graph BFS on a 500-commit linear history

12

- merge-base on a 300-deep dag (shared ancestor at the root)

13

- name-rev multi-source BFS on a 200-commit diamond graph

14

- snapshot-diff on manifests with 2000 files each

15

- verify-object on 200 objects

16

- ls-files on a 2000-file snapshot

17

- for-each-ref on 100 branches

18

- show-ref on 100 branches

19

- pack-objects → unpack-objects with 100 commits and 100 objects

20

- read-commit on 200 sequential commits

21

"""

22

23

from __future__ import annotations

import datetime

import hashlib

import json

import pathlib

from tests.cli_test_helper import CliRunner

31

32

cli = None # argparse migration — CliRunner ignores this arg

33

from muse.core.object_store import write_object

34

from muse.core.store import CommitRecord, SnapshotRecord, write_commit, write_snapshot

runner = CliRunner()

# ---------------------------------------------------------------------------

40

# Helpers

41

# ---------------------------------------------------------------------------

42

43

44

def _sha(tag: str) -> str:

45

return hashlib.sha256(tag.encode()).hexdigest()

46

47

48

def _sha_bytes(data: bytes) -> str:

49

return hashlib.sha256(data).hexdigest()

50

51

52

def _init_repo(path: pathlib.Path) -> pathlib.Path:

53

muse = path / ".muse"

54

(muse / "commits").mkdir(parents=True)

55

(muse / "snapshots").mkdir(parents=True)

56

(muse / "objects").mkdir(parents=True)

57

(muse / "refs" / "heads").mkdir(parents=True)

58

(muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")

59

(muse / "repo.json").write_text(

60

json.dumps({"repo_id": "stress-repo", "domain": "midi"}), encoding="utf-8"

)

return path

def _env(repo: pathlib.Path) -> dict[str, str]:

66

return {"MUSE_REPO_ROOT": str(repo)}

67

68

69

def _snap(repo: pathlib.Path, manifest: dict[str, str] | None = None, tag: str = "s") -> str:

70

m = manifest or {}

71

sid = _sha(f"snap-{tag}")

write_snapshot(

repo,

SnapshotRecord(

snapshot_id=sid,

manifest=m,

created_at=datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc),

),

)

return sid

def _commit_raw(

repo: pathlib.Path,

cid: str,

sid: str,

message: str,

branch: str = "main",

89

parent: str | None = None,

90

parent2: str | None = None,

) -> None:

write_commit(

repo,

CommitRecord(

commit_id=cid,

repo_id="stress-repo",

branch=branch,

snapshot_id=sid,

message=message,

committed_at=datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc),

101

author="stress-tester",

102

parent_commit_id=parent,

103

parent2_commit_id=parent2,

),

)

def _set_branch(repo: pathlib.Path, branch: str, cid: str) -> None:

109

ref = repo / ".muse" / "refs" / "heads" / branch

110

ref.parent.mkdir(parents=True, exist_ok=True)

111

ref.write_text(cid, encoding="utf-8")

112

113

114

def _linear_chain(repo: pathlib.Path, n: int, sid: str, branch: str = "main") -> list[str]:

115

"""Build a linear chain of n commits. Returns list root→tip."""

116

cids: list[str] = []

117

parent: str | None = None

118

for i in range(n):

119

cid = _sha(f"linear-{branch}-{i}")

120

_commit_raw(repo, cid, sid, f"commit {i}", branch=branch, parent=parent)

121

cids.append(cid)

122

parent = cid

123

_set_branch(repo, branch, cids[-1])

return cids

def _obj(repo: pathlib.Path, tag: str) -> str:

128

content = tag.encode()

129

oid = _sha_bytes(content)

130

write_object(repo, oid, content)

return oid

# ---------------------------------------------------------------------------

135

# Stress: commit-graph

136

# ---------------------------------------------------------------------------

137

138

139

class TestCommitGraphStress:

140

def test_500_commit_linear_chain_full_traversal(self, tmp_path: pathlib.Path) -> None:

141

repo = _init_repo(tmp_path)

142

sid = _snap(repo)

143

cids = _linear_chain(repo, 500, sid)

144

result = runner.invoke(cli, ["plumbing", "commit-graph"], env=_env(repo))

145

assert result.exit_code == 0, result.output

146

data = json.loads(result.stdout)

147

assert data["count"] == 500

148

assert data["truncated"] is False

149

150

def test_500_commit_chain_stop_at_midpoint(self, tmp_path: pathlib.Path) -> None:

151

repo = _init_repo(tmp_path)

152

sid = _snap(repo)

153

cids = _linear_chain(repo, 500, sid)

154

result = runner.invoke(

155

cli,

156

["plumbing", "commit-graph", "--tip", cids[499], "--stop-at", cids[249]],

157

env=_env(repo),

158

)

159

assert result.exit_code == 0

160

data = json.loads(result.stdout)

161

assert data["count"] == 250

162

163

def test_count_flag_on_500_commits(self, tmp_path: pathlib.Path) -> None:

164

repo = _init_repo(tmp_path)

165

sid = _snap(repo)

166

_linear_chain(repo, 500, sid)

167

result = runner.invoke(cli, ["plumbing", "commit-graph", "--count"], env=_env(repo))

168

assert result.exit_code == 0

169

data = json.loads(result.stdout)

170

assert data["count"] == 500

171

assert "commits" not in data # --count suppresses node list

172

173

174

# ---------------------------------------------------------------------------

175

# Stress: merge-base

176

# ---------------------------------------------------------------------------

177

178

179

class TestMergeBaseStress:

180

def test_merge_base_300_deep_shared_root(self, tmp_path: pathlib.Path) -> None:

181

repo = _init_repo(tmp_path)

sid = _snap(repo)

# Shared root

root_cid = _sha("shared-root")

186

_commit_raw(repo, root_cid, sid, "root")

187

188

# Two 150-commit chains from the same root

189

main_chain = [root_cid]

190

feat_chain = [root_cid]

191

for i in range(150):

192

mc = _sha(f"main-{i}")

193

_commit_raw(repo, mc, sid, f"main-{i}", branch="main", parent=main_chain[-1])

194

main_chain.append(mc)

195

fc = _sha(f"feat-{i}")

196

_commit_raw(repo, fc, sid, f"feat-{i}", branch="feat", parent=feat_chain[-1])

197

feat_chain.append(fc)

198

199

_set_branch(repo, "main", main_chain[-1])

200

_set_branch(repo, "feat", feat_chain[-1])

201

(repo / ".muse" / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")

202

203

result = runner.invoke(

204

cli, ["plumbing", "merge-base", "main", "feat"], env=_env(repo)

205

)

206

assert result.exit_code == 0

207

data = json.loads(result.stdout)

208

assert data["merge_base"] == root_cid

209

210

211

# ---------------------------------------------------------------------------

212

# Stress: name-rev

213

# ---------------------------------------------------------------------------

214

215

216

class TestNameRevStress:

217

def test_name_rev_200_commit_chain_all_named(self, tmp_path: pathlib.Path) -> None:

218

repo = _init_repo(tmp_path)

219

sid = _snap(repo)

220

cids = _linear_chain(repo, 200, sid)

221

222

result = runner.invoke(cli, ["plumbing", "name-rev", *cids], env=_env(repo))

223

assert result.exit_code == 0

224

data = json.loads(result.stdout)

225

assert len(data["results"]) == 200

226

for entry in data["results"]:

227

assert not entry["undefined"]

228

229

def test_name_rev_tip_has_no_tilde_suffix(self, tmp_path: pathlib.Path) -> None:

230

"""distance=0 means the tip is the branch tip itself; name is bare branch name."""

231

repo = _init_repo(tmp_path)

232

sid = _snap(repo)

233

cids = _linear_chain(repo, 10, sid)

234

tip = cids[-1]

235

236

result = runner.invoke(cli, ["plumbing", "name-rev", tip], env=_env(repo))

237

assert result.exit_code == 0

238

entry = json.loads(result.stdout)["results"][0]

239

# name-rev emits "<branch>" (no ~0) for the exact branch tip.

240

assert entry["name"] == "main"

241

assert entry["distance"] == 0

242

243

244

# ---------------------------------------------------------------------------

245

# Stress: snapshot-diff

246

# ---------------------------------------------------------------------------

247

248

249

class TestSnapshotDiffStress:

250

def test_diff_2000_file_manifests(self, tmp_path: pathlib.Path) -> None:

251

repo = _init_repo(tmp_path)

252

oid = _sha("shared-blob")

253

254

# Manifest A: 2000 files

255

manifest_a = {f"track_{i:04d}.mid": oid for i in range(2000)}

256

# Manifest B: same 2000 files but first 200 have new IDs (modified)

257

new_oid = _sha("new-blob")

258

manifest_b = {f"track_{i:04d}.mid": (new_oid if i < 200 else oid) for i in range(2000)}

259

260

sid_a = _sha("big-snap-a")

261

sid_b = _sha("big-snap-b")

write_snapshot(

repo,

SnapshotRecord(

snapshot_id=sid_a,

manifest=manifest_a,

created_at=datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc),

),

)

write_snapshot(

repo,

SnapshotRecord(

snapshot_id=sid_b,

manifest=manifest_b,

created_at=datetime.datetime(2026, 1, 2, tzinfo=datetime.timezone.utc),

),

)

result = runner.invoke(cli, ["plumbing", "snapshot-diff", sid_a, sid_b], env=_env(repo))

280

assert result.exit_code == 0

281

data = json.loads(result.stdout)

282

assert data["total_changes"] == 200

283

assert len(data["modified"]) == 200

284

assert data["added"] == []

285

assert data["deleted"] == []

286

287

288

# ---------------------------------------------------------------------------

289

# Stress: verify-object

290

# ---------------------------------------------------------------------------

291

292

293

class TestVerifyObjectStress:

294

def test_200_objects_all_verified(self, tmp_path: pathlib.Path) -> None:

295

repo = _init_repo(tmp_path)

296

oids = [_obj(repo, f"stress-obj-{i}") for i in range(200)]

297

result = runner.invoke(cli, ["plumbing", "verify-object", *oids], env=_env(repo))

298

assert result.exit_code == 0

299

data = json.loads(result.stdout)

300

assert data["all_ok"] is True

301

assert data["checked"] == 200

302

assert data["failed"] == 0

303

304

def test_verify_1mib_object_no_crash(self, tmp_path: pathlib.Path) -> None:

305

repo = _init_repo(tmp_path)

306

content = b"Z" * (1024 * 1024)

307

oid = _sha_bytes(content)

308

write_object(repo, oid, content)

309

result = runner.invoke(cli, ["plumbing", "verify-object", oid], env=_env(repo))

310

assert result.exit_code == 0

311

assert json.loads(result.stdout)["all_ok"] is True

312

313

314

# ---------------------------------------------------------------------------

315

# Stress: ls-files

316

# ---------------------------------------------------------------------------

317

318

319

class TestLsFilesStress:

320

def test_ls_files_2000_file_snapshot(self, tmp_path: pathlib.Path) -> None:

321

repo = _init_repo(tmp_path)

322

oid = _sha("common-oid")

323

manifest = {f"track_{i:04d}.mid": oid for i in range(2000)}

324

sid = _snap(repo, manifest, "big")

325

cid = _sha("big-commit")

326

_commit_raw(repo, cid, sid, "big manifest", branch="main")

327

_set_branch(repo, "main", cid)

328

329

result = runner.invoke(cli, ["plumbing", "ls-files"], env=_env(repo))

330

assert result.exit_code == 0

331

data = json.loads(result.stdout)

332

assert data["file_count"] == 2000

333

334

335

# ---------------------------------------------------------------------------

336

# Stress: for-each-ref and show-ref

337

# ---------------------------------------------------------------------------

338

339

340

class TestRefCommandsStress:

341

def _build_100_branches(self, repo: pathlib.Path) -> None:

342

sid = _snap(repo, tag="multi-branch")

343

for i in range(100):

344

branch = f"feature-{i:03d}"

345

cid = _sha(f"branch-tip-{i}")

346

_commit_raw(repo, cid, sid, f"tip of {branch}", branch=branch)

347

_set_branch(repo, branch, cid)

348

349

def test_for_each_ref_100_branches(self, tmp_path: pathlib.Path) -> None:

350

repo = _init_repo(tmp_path)

351

self._build_100_branches(repo)

352

result = runner.invoke(cli, ["plumbing", "for-each-ref"], env=_env(repo))

353

assert result.exit_code == 0

354

data = json.loads(result.stdout)

355

assert len(data["refs"]) == 100

356

357

def test_show_ref_100_branches(self, tmp_path: pathlib.Path) -> None:

358

repo = _init_repo(tmp_path)

359

self._build_100_branches(repo)

360

result = runner.invoke(cli, ["plumbing", "show-ref"], env=_env(repo))

361

assert result.exit_code == 0

362

data = json.loads(result.stdout)

363

assert data["count"] == 100

364

365

def test_for_each_ref_pattern_filter_on_100(self, tmp_path: pathlib.Path) -> None:

366

repo = _init_repo(tmp_path)

367

self._build_100_branches(repo)

368

result = runner.invoke(

369

cli,

370

["plumbing", "for-each-ref", "--pattern", "refs/heads/feature-00*"],

371

env=_env(repo),

372

)

373

assert result.exit_code == 0

374

data = json.loads(result.stdout)

375

# feature-000 through feature-009 = 10 branches

376

assert len(data["refs"]) == 10

377

378

379

# ---------------------------------------------------------------------------

380

# Stress: pack-objects → unpack-objects

381

# ---------------------------------------------------------------------------

382

383

384

class TestPackUnpackStress:

385

def test_100_commit_100_object_round_trip(self, tmp_path: pathlib.Path) -> None:

386

from muse.core.object_store import has_object

387

from muse.core.store import read_commit

388

389

src = _init_repo(tmp_path / "src")

390

dst = _init_repo(tmp_path / "dst")

391

392

# Build 100 objects

393

oids = [_obj(src, f"blob-{i}") for i in range(100)]

394

manifest = {f"f{i}.mid": oids[i] for i in range(100)}

395

sid = _snap(src, manifest, "big-pack")

396

397

# Build 100-commit linear chain referencing that snapshot

398

parent: str | None = None

399

cids: list[str] = []

400

for i in range(100):

401

cid = _sha(f"pack-commit-{i}")

402

_commit_raw(src, cid, sid, f"pack-{i}", parent=parent)

403

cids.append(cid)

404

parent = cid

405

_set_branch(src, "main", cids[-1])

406

407

# Pack tip → unpack into dst

408

pack_result = runner.invoke(

409

cli, ["plumbing", "pack-objects", cids[-1]], env=_env(src)

410

)

411

assert pack_result.exit_code == 0

412

413

unpack_result = runner.invoke(

414

cli,

415

["plumbing", "unpack-objects"],

416

input=pack_result.stdout_bytes,

417

env=_env(dst),

418

)

419

assert unpack_result.exit_code == 0

420

counts = json.loads(unpack_result.stdout)

421

assert counts["commits_written"] == 100

422

assert counts["objects_written"] == 100

423

424

for cid in cids:

425

assert read_commit(dst, cid) is not None

426

for oid in oids:

427

assert has_object(dst, oid)

428

429

430

# ---------------------------------------------------------------------------

431

# Stress: read-commit sequential

432

# ---------------------------------------------------------------------------

433

434

435

class TestReadCommitStress:

436

def test_200_commits_all_readable(self, tmp_path: pathlib.Path) -> None:

437

repo = _init_repo(tmp_path)

438

sid = _snap(repo)

439

cids = _linear_chain(repo, 200, sid)

440

for cid in cids:

441

result = runner.invoke(cli, ["plumbing", "read-commit", cid], env=_env(repo))

442

assert result.exit_code == 0

443

data = json.loads(result.stdout)

444

assert data["commit_id"] == cid