diff --git a/mempalace/cli.py b/mempalace/cli.py index 14db5a8..6a531e7 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -662,6 +662,8 @@ def cmd_repair(args): _rebuild_collection_via_temp, check_extraction_safety, maybe_repair_poisoned_max_seq_id_before_rebuild, + print_sqlite_integrity_abort, + sqlite_integrity_errors, ) config = MempalaceConfig() @@ -743,6 +745,18 @@ def cmd_repair(args): print(f"\n No palace database found at {db_path}") return + # Run the SQLite integrity preflight before any chromadb client open. + # ChromaDB's rust binding raises pyo3_runtime.PanicException on a + # malformed page, which is not a regular Exception subclass and + # propagates past the try/except below — the user gets a 30-line + # stack trace instead of the friendly abort message. Run quick_check + # here so we can surface the clear recovery instructions and exit + # cleanly before chromadb's compactor touches the disk. + sqlite_errors = sqlite_integrity_errors(palace_path) + if sqlite_errors: + print_sqlite_integrity_abort(palace_path, sqlite_errors) + sys.exit(1) + preflight = maybe_repair_poisoned_max_seq_id_before_rebuild( palace_path, backup=getattr(args, "backup", True), diff --git a/mempalace/repair.py b/mempalace/repair.py index 6e170ef..dd4c46a 100644 --- a/mempalace/repair.py +++ b/mempalace/repair.py @@ -633,6 +633,17 @@ def rebuild_index( print(f"{'=' * 55}\n") print(f" Palace: {palace_path}") + # Run the SQLite integrity preflight before any chromadb client open. + # ChromaDB's rust binding raises pyo3_runtime.PanicException (which is + # not a regular Exception subclass) on a malformed page, propagating + # past the try/except around get_collection below. Catching the + # corruption here lets us surface the clear recovery instructions and + # exit cleanly before chromadb's compactor touches the disk. + sqlite_errors = sqlite_integrity_errors(palace_path) + if sqlite_errors: + print_sqlite_integrity_abort(palace_path, sqlite_errors) + return + preflight = maybe_repair_poisoned_max_seq_id_before_rebuild( palace_path, assume_yes=True, @@ -676,11 +687,6 @@ def rebuild_index( print(e.message) return - sqlite_errors = sqlite_integrity_errors(palace_path) - if sqlite_errors: - print_sqlite_integrity_abort(palace_path, sqlite_errors) - return - # Back up ONLY the SQLite database, not the bloated HNSW files sqlite_path = os.path.join(palace_path, "chroma.sqlite3") backup_path = sqlite_path + ".backup" diff --git a/tests/test_cli.py b/tests/test_cli.py index 0b61b0c..fa5680d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2,6 +2,7 @@ import argparse import shlex +import sqlite3 import sys from pathlib import Path from unittest.mock import MagicMock, call, patch @@ -774,7 +775,7 @@ def test_cmd_repair_requires_palace_database(mock_config_cls, tmp_path, capsys): def test_cmd_repair_error_reading(mock_config_cls, tmp_path, capsys): palace_dir = tmp_path / "palace" palace_dir.mkdir() - (palace_dir / "chroma.sqlite3").write_text("db") + sqlite3.connect(str(palace_dir / "chroma.sqlite3")).close() mock_config_cls.return_value.palace_path = str(palace_dir) mock_config_cls.return_value.collection_name = "mempalace_drawers" args = argparse.Namespace(palace=None) @@ -790,7 +791,7 @@ def test_cmd_repair_error_reading(mock_config_cls, tmp_path, capsys): def test_cmd_repair_zero_drawers(mock_config_cls, tmp_path, capsys): palace_dir = tmp_path / "palace" palace_dir.mkdir() - (palace_dir / "chroma.sqlite3").write_text("db") + sqlite3.connect(str(palace_dir / "chroma.sqlite3")).close() mock_config_cls.return_value.palace_path = str(palace_dir) mock_config_cls.return_value.collection_name = "mempalace_drawers" args = argparse.Namespace(palace=None) @@ -807,7 +808,7 @@ def test_cmd_repair_zero_drawers(mock_config_cls, tmp_path, capsys): def test_cmd_repair_success(mock_config_cls, tmp_path, capsys): palace_dir = tmp_path / "palace" palace_dir.mkdir() - (palace_dir / "chroma.sqlite3").write_text("db") + sqlite3.connect(str(palace_dir / "chroma.sqlite3")).close() mock_config_cls.return_value.palace_path = str(palace_dir) mock_config_cls.return_value.collection_name = "mempalace_drawers" args = argparse.Namespace(palace=None, yes=True) @@ -843,7 +844,7 @@ def test_cmd_repair_success(mock_config_cls, tmp_path, capsys): def test_cmd_repair_uses_configured_collection(mock_config_cls, tmp_path, capsys): palace_dir = tmp_path / "palace" palace_dir.mkdir() - (palace_dir / "chroma.sqlite3").write_text("db") + sqlite3.connect(str(palace_dir / "chroma.sqlite3")).close() mock_config_cls.return_value.palace_path = str(palace_dir) mock_config_cls.return_value.collection_name = "custom_drawers" args = argparse.Namespace(palace=None, yes=True) @@ -882,7 +883,7 @@ def test_cmd_repair_uses_configured_collection(mock_config_cls, tmp_path, capsys def test_cmd_repair_restores_backup_on_live_rebuild_failure(mock_config_cls, tmp_path, capsys): palace_dir = tmp_path / "palace" palace_dir.mkdir() - (palace_dir / "chroma.sqlite3").write_text("db") + sqlite3.connect(str(palace_dir / "chroma.sqlite3")).close() mock_config_cls.return_value.palace_path = str(palace_dir) mock_config_cls.return_value.collection_name = "mempalace_drawers" args = argparse.Namespace(palace=None, yes=True) @@ -916,7 +917,7 @@ def test_cmd_repair_restores_backup_on_live_rebuild_failure(mock_config_cls, tmp def test_cmd_repair_aborts_without_confirmation(mock_config_cls, tmp_path, capsys): palace_dir = tmp_path / "palace" palace_dir.mkdir() - (palace_dir / "chroma.sqlite3").write_text("db") + sqlite3.connect(str(palace_dir / "chroma.sqlite3")).close() mock_config_cls.return_value.palace_path = str(palace_dir) mock_config_cls.return_value.collection_name = "mempalace_drawers" args = argparse.Namespace(palace=None) diff --git a/tests/test_repair.py b/tests/test_repair.py index dda83ec..264561f 100644 --- a/tests/test_repair.py +++ b/tests/test_repair.py @@ -1153,6 +1153,57 @@ def test_rebuild_index_aborts_on_sqlite_integrity_errors_before_delete_collectio mock_shutil.copy2.assert_not_called() +def test_rebuild_index_runs_sqlite_preflight_before_chromadb_open(tmp_path, capsys): + """The SQLite integrity preflight must run BEFORE backend.get_collection. + + chromadb's rust binding raises pyo3_runtime.PanicException (which is not + a regular Exception subclass) on a malformed page, so any get_collection + call against a corrupt SQLite propagates past `except Exception` handlers + and produces a 30-line stack trace instead of the friendly abort message. + Regression test for the ordering bug where the preflight was placed after + the chromadb client open and therefore never reached on the cases it was + designed to catch (#1364 follow-up). + """ + palace = tmp_path / "palace" + palace.mkdir() + + # Build a real chromadb palace with one drawer so chroma.sqlite3 exists + # at full schema size, then mangle several middle pages so PRAGMA + # quick_check fails with "disk image is malformed". This matches the + # production failure mode users hit in #1362 / #1364. + from mempalace.backends.chroma import ChromaBackend + + backend = ChromaBackend() + try: + col = backend.create_collection(str(palace), "mempalace_drawers") + col.upsert( + ids=["d1"], + documents=["doc"], + metadatas=[{"wing": "w", "room": "r"}], + ) + finally: + backend.close() + + sqlite_path = palace / "chroma.sqlite3" + pre_size = sqlite_path.stat().st_size + assert pre_size > 16384, "need a multi-page sqlite db to mangle" + + with open(sqlite_path, "r+b") as f: + f.seek(40960) # page 10 + f.write(b"\xde\xad\xbe\xef" * 4096) # 16 KB of garbage + + # No chromadb mocks: rebuild_index must reach sqlite_integrity_errors + # before any code path that opens a chromadb client. If the preflight + # comes too late, the test fails with pyo3_runtime.PanicException + # instead of returning cleanly. + repair.rebuild_index(palace_path=str(palace)) + + out = capsys.readouterr().out + assert "SQLite-layer corruption detected before repair rebuild" in out + assert "PRAGMA quick_check" in out + assert "disk image is malformed" in out + + def test_max_seq_id_preflight_preserves_embeddings_queue(tmp_path): """#1295: default repair preflight must not drop queued writes."""