From 5488e7bb2286e5d5770d68c2719acefe92efba41 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Thu, 7 May 2026 08:56:41 -0300 Subject: [PATCH] fix(miner): harden Windows mine against ONNX bad_alloc + silent partial exits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three small changes that together address the failure modes in #1296: 1. Add pnpm-lock.yaml and yarn.lock to SKIP_FILENAMES, mirroring the existing package-lock.json rule. A 24K-line pnpm-lock.yaml produced ~1124 chunks in one batch and tripped onnxruntime bad_alloc on Windows; pnpm/yarn lockfiles are no more useful to mine than npm's. 2. Skip any file that produces more than MAX_CHUNKS_PER_FILE (500) chunks, with a clear log line. Catches the broader class — generated CSV/JSON, build artifacts, etc. — that the named-file SKIP list will never fully cover. The cap is conservative (500 chunks * 800 chars ≈ 400 KB of source) so legitimate hand-written content still mines. 3. Print a partial-progress summary on any exception in _mine_impl, not just KeyboardInterrupt, then re-raise. Without this, an arbitrary exception (ONNX bad_alloc, chromadb HNSW error, OS fault) propagates silently — the operator sees only the last progress line and assumes the mine succeeded. The new path mirrors the KeyboardInterrupt summary (files_processed, drawers_filed, last_file) plus the exception type and message, then re-raises so the original traceback surfaces and the exit code is non-zero. Tests cover: SKIP_FILENAMES contents, the chunk-cap path returning (0, room) with no upserts, and the new mine-aborted summary surfacing both the partial counters and the exception class. --- mempalace/miner.py | 34 ++++++++++++++++++++ tests/test_miner.py | 77 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) diff --git a/mempalace/miner.py b/mempalace/miner.py index 88734c9..6aeddd4 100644 --- a/mempalace/miner.py +++ b/mempalace/miner.py @@ -66,6 +66,8 @@ SKIP_FILENAMES = { "mempal.yml", ".gitignore", "package-lock.json", + "pnpm-lock.yaml", + "yarn.lock", } CHUNK_SIZE = 800 # chars per drawer @@ -73,6 +75,13 @@ CHUNK_OVERLAP = 100 # overlap between chunks MIN_CHUNK_SIZE = 50 # skip tiny chunks DRAWER_UPSERT_BATCH_SIZE = 1000 MAX_FILE_SIZE = 500 * 1024 * 1024 # 500 MB — skip files larger than this. +# A single file producing more chunks than this is almost always a generated +# artifact (CSV/JSON dump, lockfile not in SKIP_FILENAMES, etc.). Embedding +# thousands of chunks from one file in one batch has triggered ONNX runtime +# `bad allocation` errors on Windows (#1296). The cap is conservative: a +# 500-chunk file at CHUNK_SIZE=800 is ~400 KB of source, which covers most +# legitimate hand-written content while bounding the worst-case batch. +MAX_CHUNKS_PER_FILE = 500 # Long Claude Code sessions and large transcript exports routinely exceed # 10 MB. The cap exists as a defensive rail against pathological binary # files, not as a limit on legitimate text. Per-drawer size is bounded @@ -825,6 +834,13 @@ def process_file( room = detect_room(filepath, content, rooms, project_path) chunks = chunk_text(content, source_file) + if len(chunks) > MAX_CHUNKS_PER_FILE: + print( + f" ! [skip] {filepath.name[:50]:50} produced {len(chunks)} chunks " + f"(> {MAX_CHUNKS_PER_FILE}); add to SKIP_FILENAMES or .gitignore" + ) + return 0, room + if dry_run: print(f" [DRY RUN] {filepath.name} -> room:{room} ({len(chunks)} drawers)") return len(chunks), room @@ -1167,6 +1183,24 @@ def _mine_impl( "already-filed drawers are\n upserted idempotently and will not duplicate.\n" ) sys.exit(130) + except Exception as exc: + # Without this, an arbitrary exception (ONNX bad_alloc, chromadb HNSW + # error, OS fault) propagates and the process exits with no completion + # banner — the operator sees only the final progress line and assumes + # the mine succeeded (#1296). Print the partial-progress summary the + # way we do for KeyboardInterrupt, then re-raise so the original + # traceback still surfaces and the exit code is non-zero. + print("\n\n Mine aborted by exception.") + print(f" files_processed: {files_processed}/{len(files)}") + print(f" drawers_filed: {total_drawers}") + print(f" last_file: {last_file or ''}") + print(f" error: {type(exc).__name__}: {exc}") + print( + f"\n Re-run `mempalace mine {shlex.quote(project_dir)}` after addressing " + "the cause — already-filed\n drawers are upserted idempotently and will " + "not duplicate.\n" + ) + raise finally: # Clean up the hooks-side PID lock if it points at us. Stale # entries already pass _pid_alive() == False on POSIX, but diff --git a/tests/test_miner.py b/tests/test_miner.py index 10124ee..10dd33d 100644 --- a/tests/test_miner.py +++ b/tests/test_miner.py @@ -699,6 +699,83 @@ def test_mine_keyboard_interrupt_quotes_path_with_spaces_in_resume_hint(tmp_path assert f"mempalace mine {shlex.quote(str(project_root))}" in out +def test_skip_filenames_includes_lockfiles(): + """pnpm-lock.yaml and yarn.lock must be skipped alongside package-lock.json + so a Windows mine over a typical JS monorepo doesn't OOM the ONNX embedder + on a 24K-line lockfile (#1296).""" + from mempalace import miner + + assert "package-lock.json" in miner.SKIP_FILENAMES + assert "pnpm-lock.yaml" in miner.SKIP_FILENAMES + assert "yarn.lock" in miner.SKIP_FILENAMES + + +def test_process_file_skips_when_chunks_exceed_max(tmp_path, monkeypatch): + """A file producing more than MAX_CHUNKS_PER_FILE chunks must be skipped + with a clear message and zero upserts. Generated artifacts (CSVs, lock + files not in SKIP_FILENAMES) hit this — the cap is what prevents ONNX + bad_alloc on Windows when the embedder is asked to swallow thousands of + chunks in one batch (#1296).""" + from unittest.mock import MagicMock + + from mempalace import miner + + monkeypatch.setattr(miner, "MAX_CHUNKS_PER_FILE", 5) + over_cap = [{"content": f"chunk {i}", "chunk_index": i} for i in range(7)] + monkeypatch.setattr(miner, "chunk_text", lambda content, source_file: over_cap) + + source = tmp_path / "huge.csv" + source.write_text("col1,col2\n" + "x,y\n" * 500, encoding="utf-8") + col = MagicMock() + col.get.return_value = {"ids": []} + + drawers, room = miner.process_file( + source, + tmp_path, + col, + "wing", + [{"name": "general", "description": "General"}], + "agent", + False, + ) + + assert drawers == 0 + col.upsert.assert_not_called() + + +def test_mine_arbitrary_exception_prints_summary_and_reraises(tmp_path, capsys): + """A non-KeyboardInterrupt exception mid-mine must surface a summary + banner before propagating, so users don't see a silent exit-0 with no + completion message (#1296 Failure 2). Re-raise preserves the traceback + and yields a non-zero exit code.""" + import pytest + from unittest.mock import patch + + project_root = tmp_path / "proj" + project_root.mkdir() + _make_minable_project(project_root, n_files=4) + palace_path = project_root / "palace" + + call_count = {"n": 0} + + def fake_process_file(*args, **kwargs): + call_count["n"] += 1 + if call_count["n"] == 2: + raise RuntimeError("simulated ONNX bad_alloc") + return (1, "general") + + with patch("mempalace.miner.process_file", side_effect=fake_process_file): + with pytest.raises(RuntimeError, match="simulated ONNX bad_alloc"): + mine(str(project_root), str(palace_path)) + + out = capsys.readouterr().out + assert "Mine aborted by exception." in out + assert "files_processed: 1/" in out + assert "drawers_filed:" in out + assert "RuntimeError: simulated ONNX bad_alloc" in out + assert "upserted idempotently" in out + + def test_mine_cleans_up_pid_file_on_interrupt(tmp_path): """Our own PID entry in mine.pid is removed in the finally clause.""" import pytest