Merge pull request #1285 from mjc/hnsw-repair
fix: harden Chroma repair preflight and rollback recovery
This commit is contained in:
+276
-1
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
import pickle
|
||||
import shutil
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
@@ -19,6 +20,7 @@ from mempalace.backends.chroma import (
|
||||
ChromaCollection,
|
||||
_fix_blob_seq_ids,
|
||||
_pin_hnsw_threads,
|
||||
quarantine_invalid_hnsw_metadata,
|
||||
quarantine_stale_hnsw,
|
||||
)
|
||||
|
||||
@@ -755,7 +757,10 @@ def test_make_client_quarantines_only_on_first_call_per_palace(tmp_path, monkeyp
|
||||
"""Quarantine fires on first ``make_client()`` for a palace, then is
|
||||
skipped on subsequent calls — prevents runtime thrash where a daemon's
|
||||
own steady writes bump ``chroma.sqlite3`` faster than HNSW flushes,
|
||||
making the mtime heuristic falsely trigger every reconnect."""
|
||||
making the mtime heuristic falsely trigger every reconnect.
|
||||
|
||||
Invalid metadata quarantine shares the same cold-start gate here; the
|
||||
more aggressive refresh path lives in ``_client()``."""
|
||||
from mempalace.backends.chroma import ChromaBackend
|
||||
|
||||
palace_path = str(tmp_path / "palace")
|
||||
@@ -782,6 +787,34 @@ def test_make_client_quarantines_only_on_first_call_per_palace(tmp_path, monkeyp
|
||||
], "quarantine_stale_hnsw should fire once per palace per process, not on every reconnect"
|
||||
|
||||
|
||||
def test_make_client_gates_invalid_metadata_on_first_call(tmp_path, monkeypatch):
|
||||
"""Invalid metadata quarantine is gated on the first make_client() call."""
|
||||
from mempalace.backends.chroma import ChromaBackend
|
||||
|
||||
palace_path = str(tmp_path / "palace")
|
||||
os.makedirs(palace_path, exist_ok=True)
|
||||
(Path(palace_path) / "chroma.sqlite3").write_text("")
|
||||
|
||||
monkeypatch.setattr(ChromaBackend, "_quarantined_paths", set())
|
||||
|
||||
calls: list[str] = []
|
||||
|
||||
def _invalid(path, *args, **kwargs):
|
||||
calls.append(path)
|
||||
return []
|
||||
|
||||
def _stale(path, stale_seconds=300.0):
|
||||
return []
|
||||
|
||||
monkeypatch.setattr("mempalace.backends.chroma.quarantine_invalid_hnsw_metadata", _invalid)
|
||||
monkeypatch.setattr("mempalace.backends.chroma.quarantine_stale_hnsw", _stale)
|
||||
|
||||
ChromaBackend.make_client(palace_path)
|
||||
ChromaBackend.make_client(palace_path)
|
||||
|
||||
assert calls == [palace_path]
|
||||
|
||||
|
||||
def test_make_client_quarantines_each_palace_independently(tmp_path, monkeypatch):
|
||||
"""Two distinct palaces each get one quarantine attempt — the gate is
|
||||
keyed by palace path, not global."""
|
||||
@@ -919,3 +952,245 @@ def test_get_collection_applies_retrofit_on_existing_palace(tmp_path):
|
||||
)
|
||||
|
||||
assert wrapper._collection.configuration_json["hnsw"]["num_threads"] == 1
|
||||
|
||||
|
||||
def test_quarantine_invalid_hnsw_metadata_renames_missing_dimensionality(tmp_path):
|
||||
palace = tmp_path / "palace"
|
||||
palace.mkdir()
|
||||
seg = palace / "abcd-1234-5678"
|
||||
seg.mkdir()
|
||||
with open(seg / "index_metadata.pickle", "wb") as f:
|
||||
pickle.dump({"dimensionality": None, "id_to_label": {"a": 1}}, f)
|
||||
|
||||
moved = quarantine_invalid_hnsw_metadata(str(palace))
|
||||
|
||||
assert len(moved) == 1
|
||||
assert ".corrupt-" in moved[0]
|
||||
assert not seg.exists()
|
||||
|
||||
|
||||
def test_quarantine_invalid_hnsw_metadata_allows_uninitialized_segment(tmp_path):
|
||||
palace = tmp_path / "palace"
|
||||
palace.mkdir()
|
||||
seg = palace / "abcd-1234-5678"
|
||||
seg.mkdir()
|
||||
with open(seg / "index_metadata.pickle", "wb") as f:
|
||||
pickle.dump({"dimensionality": None, "id_to_label": {}}, f)
|
||||
|
||||
moved = quarantine_invalid_hnsw_metadata(str(palace))
|
||||
|
||||
assert moved == []
|
||||
assert seg.exists()
|
||||
|
||||
|
||||
def test_quarantine_invalid_hnsw_metadata_rejects_non_dict_id_to_label(tmp_path):
|
||||
palace = tmp_path / "palace"
|
||||
palace.mkdir()
|
||||
seg = palace / "abcd-1234-5678"
|
||||
seg.mkdir()
|
||||
with open(seg / "index_metadata.pickle", "wb") as f:
|
||||
pickle.dump({"dimensionality": 8, "id_to_label": ["a", "b"]}, f)
|
||||
|
||||
moved = quarantine_invalid_hnsw_metadata(str(palace))
|
||||
|
||||
assert len(moved) == 1
|
||||
assert ".corrupt-" in moved[0]
|
||||
assert not seg.exists()
|
||||
|
||||
|
||||
def test_quarantine_invalid_hnsw_metadata_rejects_non_schema_payload(tmp_path):
|
||||
palace = tmp_path / "palace"
|
||||
palace.mkdir()
|
||||
seg = palace / "abcd-1234-5678"
|
||||
seg.mkdir()
|
||||
with open(seg / "index_metadata.pickle", "wb") as f:
|
||||
pickle.dump(["not", "a", "metadata", "object"], f)
|
||||
|
||||
moved = quarantine_invalid_hnsw_metadata(str(palace))
|
||||
|
||||
assert len(moved) == 1
|
||||
assert ".corrupt-" in moved[0]
|
||||
assert not seg.exists()
|
||||
|
||||
|
||||
def _dangerous_pickle_payload_executed():
|
||||
raise AssertionError("unsafe pickle payload executed")
|
||||
|
||||
|
||||
class _DangerousPickle:
|
||||
def __reduce__(self):
|
||||
return (_dangerous_pickle_payload_executed, ())
|
||||
|
||||
|
||||
def test_quarantine_invalid_hnsw_metadata_rejects_unsafe_pickle(tmp_path):
|
||||
palace = tmp_path / "palace"
|
||||
palace.mkdir()
|
||||
seg = palace / "abcd-1234-5678"
|
||||
seg.mkdir()
|
||||
with open(seg / "index_metadata.pickle", "wb") as f:
|
||||
pickle.dump(_DangerousPickle(), f)
|
||||
|
||||
moved = quarantine_invalid_hnsw_metadata(str(palace))
|
||||
|
||||
assert len(moved) == 1
|
||||
assert ".corrupt-" in moved[0]
|
||||
assert not seg.exists()
|
||||
|
||||
|
||||
def test_quarantine_invalid_hnsw_metadata_skips_transient_read_errors(tmp_path, monkeypatch):
|
||||
palace = tmp_path / "palace"
|
||||
palace.mkdir()
|
||||
seg = palace / "abcd-1234-5678"
|
||||
seg.mkdir()
|
||||
meta = seg / "index_metadata.pickle"
|
||||
meta.write_bytes(b"partial")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"mempalace.backends.chroma._SafePersistentDataUnpickler.load",
|
||||
lambda path: (_ for _ in ()).throw(EOFError("flush in progress")),
|
||||
)
|
||||
|
||||
moved = quarantine_invalid_hnsw_metadata(str(palace))
|
||||
|
||||
assert moved == []
|
||||
assert seg.exists()
|
||||
|
||||
|
||||
def test_quarantine_invalid_hnsw_metadata_skips_truncated_pickle(tmp_path, monkeypatch):
|
||||
palace = tmp_path / "palace"
|
||||
palace.mkdir()
|
||||
seg = palace / "abcd-1234-5678"
|
||||
seg.mkdir()
|
||||
meta = seg / "index_metadata.pickle"
|
||||
meta.write_bytes(b"partial")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"mempalace.backends.chroma._SafePersistentDataUnpickler.load",
|
||||
lambda path: (_ for _ in ()).throw(pickle.UnpicklingError("pickle data was truncated")),
|
||||
)
|
||||
|
||||
moved = quarantine_invalid_hnsw_metadata(str(palace))
|
||||
|
||||
assert moved == []
|
||||
assert seg.exists()
|
||||
|
||||
|
||||
def test_chroma_backend_preflights_metadata_before_persistent_client(tmp_path, monkeypatch):
|
||||
palace = tmp_path / "palace"
|
||||
palace.mkdir()
|
||||
calls = []
|
||||
|
||||
def _record(name):
|
||||
def inner(path, *args, **kwargs):
|
||||
calls.append((name, path))
|
||||
return [] if name != "blob" else None
|
||||
|
||||
return inner
|
||||
|
||||
monkeypatch.setattr("mempalace.backends.chroma._fix_blob_seq_ids", _record("blob"))
|
||||
monkeypatch.setattr(
|
||||
"mempalace.backends.chroma.quarantine_invalid_hnsw_metadata", _record("invalid")
|
||||
)
|
||||
monkeypatch.setattr("mempalace.backends.chroma.quarantine_stale_hnsw", _record("stale"))
|
||||
|
||||
class DummyClient:
|
||||
pass
|
||||
|
||||
monkeypatch.setattr(
|
||||
"mempalace.backends.chroma.chromadb.PersistentClient", lambda path: DummyClient()
|
||||
)
|
||||
|
||||
backend = ChromaBackend()
|
||||
backend._client(str(palace))
|
||||
|
||||
assert calls == [
|
||||
("blob", str(palace)),
|
||||
("invalid", str(palace)),
|
||||
("stale", str(palace)),
|
||||
]
|
||||
|
||||
|
||||
def test_chroma_backend_stale_quarantine_is_cold_start_only_on_refresh(tmp_path, monkeypatch):
|
||||
palace = tmp_path / "palace"
|
||||
palace.mkdir()
|
||||
(palace / "chroma.sqlite3").write_text("")
|
||||
calls = []
|
||||
|
||||
def _record(name):
|
||||
def inner(path, *args, **kwargs):
|
||||
calls.append((name, path))
|
||||
return [] if name != "blob" else None
|
||||
|
||||
return inner
|
||||
|
||||
monkeypatch.setattr(ChromaBackend, "_quarantined_paths", set())
|
||||
monkeypatch.setattr("mempalace.backends.chroma._fix_blob_seq_ids", _record("blob"))
|
||||
monkeypatch.setattr(
|
||||
"mempalace.backends.chroma.quarantine_invalid_hnsw_metadata", _record("invalid")
|
||||
)
|
||||
monkeypatch.setattr("mempalace.backends.chroma.quarantine_stale_hnsw", _record("stale"))
|
||||
|
||||
class DummyClient:
|
||||
pass
|
||||
|
||||
monkeypatch.setattr(
|
||||
"mempalace.backends.chroma.chromadb.PersistentClient", lambda path: DummyClient()
|
||||
)
|
||||
|
||||
backend = ChromaBackend()
|
||||
stats = iter([(1, 1.0), (1, 1.0), (1, 2.0), (1, 2.0)])
|
||||
monkeypatch.setattr(backend, "_db_stat", lambda path: next(stats))
|
||||
|
||||
backend._client(str(palace))
|
||||
backend._client(str(palace))
|
||||
|
||||
assert calls == [
|
||||
("blob", str(palace)),
|
||||
("invalid", str(palace)),
|
||||
("stale", str(palace)),
|
||||
("blob", str(palace)),
|
||||
]
|
||||
|
||||
|
||||
def test_chroma_backend_requarantines_after_inode_replacement(tmp_path, monkeypatch):
|
||||
palace = tmp_path / "palace"
|
||||
palace.mkdir()
|
||||
(palace / "chroma.sqlite3").write_text("")
|
||||
calls = []
|
||||
|
||||
def _record(name):
|
||||
def inner(path, *args, **kwargs):
|
||||
calls.append((name, path))
|
||||
return [] if name != "blob" else None
|
||||
|
||||
return inner
|
||||
|
||||
monkeypatch.setattr(ChromaBackend, "_quarantined_paths", set())
|
||||
monkeypatch.setattr("mempalace.backends.chroma._fix_blob_seq_ids", _record("blob"))
|
||||
monkeypatch.setattr(
|
||||
"mempalace.backends.chroma.quarantine_invalid_hnsw_metadata", _record("invalid")
|
||||
)
|
||||
monkeypatch.setattr("mempalace.backends.chroma.quarantine_stale_hnsw", _record("stale"))
|
||||
|
||||
class DummyClient:
|
||||
pass
|
||||
|
||||
monkeypatch.setattr(
|
||||
"mempalace.backends.chroma.chromadb.PersistentClient", lambda path: DummyClient()
|
||||
)
|
||||
|
||||
backend = ChromaBackend()
|
||||
stats = iter([(1, 1.0), (1, 1.0), (2, 2.0), (2, 2.0)])
|
||||
monkeypatch.setattr(backend, "_db_stat", lambda path: next(stats))
|
||||
|
||||
backend._client(str(palace))
|
||||
backend._client(str(palace))
|
||||
|
||||
assert calls == [
|
||||
("blob", str(palace)),
|
||||
("invalid", str(palace)),
|
||||
("stale", str(palace)),
|
||||
("blob", str(palace)),
|
||||
("invalid", str(palace)),
|
||||
("stale", str(palace)),
|
||||
]
|
||||
|
||||
+46
-1
@@ -4,7 +4,7 @@ import argparse
|
||||
import shlex
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
from unittest.mock import MagicMock, call, patch
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -815,13 +815,58 @@ def test_cmd_repair_success(mock_config_cls, tmp_path, capsys):
|
||||
"documents": ["doc1", "doc2"],
|
||||
"metadatas": [{"wing": "a"}, {"wing": "b"}],
|
||||
}
|
||||
mock_temp_col = MagicMock()
|
||||
mock_temp_col.count.return_value = 2
|
||||
mock_new_col = MagicMock()
|
||||
mock_new_col.count.return_value = 2
|
||||
mock_backend = _mock_backend_for(col=mock_col, new_col=mock_new_col)
|
||||
mock_backend.create_collection.side_effect = [mock_temp_col, mock_new_col]
|
||||
with patch("mempalace.backends.chroma.ChromaBackend", return_value=mock_backend):
|
||||
cmd_repair(args)
|
||||
out = capsys.readouterr().out
|
||||
assert "Repair complete" in out
|
||||
assert "2 drawers rebuilt" in out
|
||||
assert mock_backend.delete_collection.call_args_list == [
|
||||
call(str(palace_dir), "mempalace_drawers__repair_tmp"),
|
||||
call(str(palace_dir), "mempalace_drawers"),
|
||||
call(str(palace_dir), "mempalace_drawers__repair_tmp"),
|
||||
]
|
||||
mock_temp_col.upsert.assert_called_once()
|
||||
mock_new_col.upsert.assert_called_once()
|
||||
mock_new_col.add.assert_not_called()
|
||||
|
||||
|
||||
@patch("mempalace.cli.MempalaceConfig")
|
||||
def test_cmd_repair_restores_backup_on_live_rebuild_failure(mock_config_cls, tmp_path, capsys):
|
||||
palace_dir = tmp_path / "palace"
|
||||
palace_dir.mkdir()
|
||||
(palace_dir / "chroma.sqlite3").write_text("db")
|
||||
mock_config_cls.return_value.palace_path = str(palace_dir)
|
||||
args = argparse.Namespace(palace=None, yes=True)
|
||||
mock_col = MagicMock()
|
||||
mock_col.count.return_value = 2
|
||||
mock_col.get.return_value = {
|
||||
"ids": ["id1", "id2"],
|
||||
"documents": ["doc1", "doc2"],
|
||||
"metadatas": [{"wing": "a"}, {"wing": "b"}],
|
||||
}
|
||||
mock_temp_col = MagicMock()
|
||||
mock_temp_col.count.return_value = 2
|
||||
mock_backend = _mock_backend_for(col=mock_col)
|
||||
mock_backend.create_collection.side_effect = [mock_temp_col, RuntimeError("live build failed")]
|
||||
with patch("mempalace.backends.chroma.ChromaBackend", return_value=mock_backend):
|
||||
with pytest.raises(SystemExit) as excinfo:
|
||||
cmd_repair(args)
|
||||
out = capsys.readouterr().out
|
||||
assert excinfo.value.code == 1
|
||||
assert "Repair failed" in out
|
||||
assert "restoring from backup" in out
|
||||
mock_backend.close_palace.assert_called_once_with(str(palace_dir))
|
||||
assert mock_backend.delete_collection.call_args_list == [
|
||||
call(str(palace_dir), "mempalace_drawers__repair_tmp"),
|
||||
call(str(palace_dir), "mempalace_drawers"),
|
||||
call(str(palace_dir), "mempalace_drawers__repair_tmp"),
|
||||
]
|
||||
|
||||
|
||||
@patch("mempalace.cli.MempalaceConfig")
|
||||
|
||||
+156
-4
@@ -238,14 +238,39 @@ def test_capacity_status_tolerates_flush_lag(tmp_path):
|
||||
assert info["status"] == "ok"
|
||||
|
||||
|
||||
def test_capacity_status_flags_unflushed_with_large_sqlite(tmp_path):
|
||||
"""No pickle + many sqlite rows is its own divergence signal."""
|
||||
def test_capacity_status_does_not_flag_unflushed_with_large_sqlite(tmp_path):
|
||||
"""No pickle + many sqlite rows is inconclusive, not divergence."""
|
||||
seg = "seg-noflush"
|
||||
_seed_chroma_db(str(tmp_path), sqlite_count=10_000, segment_id=seg)
|
||||
info = hnsw_capacity_status(str(tmp_path), COLLECTION)
|
||||
assert info["diverged"] is True
|
||||
assert info["diverged"] is False
|
||||
assert info["status"] == "unknown"
|
||||
assert info["divergence"] is None
|
||||
assert info["hnsw_count"] is None
|
||||
assert "never flushed" in info["message"]
|
||||
assert "capacity unavailable" in info["message"]
|
||||
assert "leaving vector search enabled" in info["message"]
|
||||
|
||||
|
||||
def test_mcp_probe_does_not_disable_vectors_for_unflushed_metadata(tmp_path, monkeypatch):
|
||||
"""The MCP preflight must not route all searches to BM25 on this signal."""
|
||||
from mempalace import mcp_server
|
||||
|
||||
seg = "seg-mcp-noflush"
|
||||
_seed_chroma_db(str(tmp_path), sqlite_count=10_000, segment_id=seg)
|
||||
|
||||
class _Cfg:
|
||||
palace_path = str(tmp_path)
|
||||
|
||||
monkeypatch.setattr(mcp_server, "_config", _Cfg())
|
||||
monkeypatch.setattr(mcp_server, "_vector_disabled", True)
|
||||
monkeypatch.setattr(mcp_server, "_vector_disabled_reason", "old divergence")
|
||||
|
||||
mcp_server._refresh_vector_disabled_flag()
|
||||
|
||||
assert mcp_server._vector_disabled is False
|
||||
assert mcp_server._vector_disabled_reason == ""
|
||||
assert mcp_server._vector_capacity_status["status"] == "unknown"
|
||||
assert "leaving vector search enabled" in mcp_server._vector_capacity_status["message"]
|
||||
|
||||
|
||||
def test_capacity_status_quiet_for_empty_palace(tmp_path):
|
||||
@@ -372,6 +397,17 @@ def _seed_drawers(palace: str, segment_id: str, drawers: list[tuple[str, dict, s
|
||||
conn.close()
|
||||
|
||||
|
||||
def _set_drawer_created_at(palace: str, timestamps: dict[int, str]) -> None:
|
||||
db_path = os.path.join(palace, "chroma.sqlite3")
|
||||
conn = sqlite3.connect(db_path)
|
||||
try:
|
||||
for emb_id, created_at in timestamps.items():
|
||||
conn.execute("UPDATE embeddings SET created_at = ? WHERE id = ?", (created_at, emb_id))
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def palace_with_drawers(tmp_path):
|
||||
seg = "seg-bm25"
|
||||
@@ -417,6 +453,122 @@ def test_bm25_fallback_filters_by_wing(palace_with_drawers):
|
||||
assert all(r["wing"] == "design" for r in out["results"])
|
||||
|
||||
|
||||
def test_bm25_fallback_applies_wing_before_fts_candidate_limit(tmp_path):
|
||||
seg = "seg-bm25-fts-limit"
|
||||
_seed_chroma_db(str(tmp_path), sqlite_count=0, segment_id=seg)
|
||||
_seed_drawers(
|
||||
str(tmp_path),
|
||||
seg,
|
||||
[
|
||||
(
|
||||
"shared token outside target wing",
|
||||
{"wing": "ops", "room": "incidents", "source_file": "/x/ops.md"},
|
||||
"d-1",
|
||||
),
|
||||
(
|
||||
"shared token inside target wing",
|
||||
{"wing": "project", "room": "diary", "source_file": "/x/project.md"},
|
||||
"d-2",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
out = _bm25_only_via_sqlite("shared token", str(tmp_path), wing="project", max_candidates=1)
|
||||
|
||||
assert out["total_before_filter"] == 1
|
||||
assert len(out["results"]) == 1
|
||||
assert out["results"][0]["wing"] == "project"
|
||||
|
||||
|
||||
def test_bm25_fallback_applies_room_before_fts_candidate_limit(tmp_path):
|
||||
seg = "seg-bm25-room-limit"
|
||||
_seed_chroma_db(str(tmp_path), sqlite_count=0, segment_id=seg)
|
||||
_seed_drawers(
|
||||
str(tmp_path),
|
||||
seg,
|
||||
[
|
||||
(
|
||||
"shared token wrong room",
|
||||
{"wing": "project", "room": "scratch", "source_file": "/x/scratch.md"},
|
||||
"d-1",
|
||||
),
|
||||
(
|
||||
"shared token right room",
|
||||
{"wing": "project", "room": "diary", "source_file": "/x/diary.md"},
|
||||
"d-2",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
out = _bm25_only_via_sqlite(
|
||||
"shared token",
|
||||
str(tmp_path),
|
||||
wing="project",
|
||||
room="diary",
|
||||
max_candidates=1,
|
||||
)
|
||||
|
||||
assert out["total_before_filter"] == 1
|
||||
assert len(out["results"]) == 1
|
||||
assert out["results"][0]["wing"] == "project"
|
||||
assert out["results"][0]["room"] == "diary"
|
||||
|
||||
|
||||
def test_bm25_fallback_applies_wing_before_recency_candidate_limit(tmp_path):
|
||||
seg = "seg-bm25-recency-limit"
|
||||
_seed_chroma_db(str(tmp_path), sqlite_count=0, segment_id=seg)
|
||||
_seed_drawers(
|
||||
str(tmp_path),
|
||||
seg,
|
||||
[
|
||||
(
|
||||
"target drawer for short query",
|
||||
{"wing": "project", "room": "diary", "source_file": "/x/project.md"},
|
||||
"d-1",
|
||||
),
|
||||
(
|
||||
"newer drawer outside target wing",
|
||||
{"wing": "ops", "room": "incidents", "source_file": "/x/ops.md"},
|
||||
"d-2",
|
||||
),
|
||||
],
|
||||
)
|
||||
_set_drawer_created_at(
|
||||
str(tmp_path),
|
||||
{
|
||||
1: "2026-01-01 00:00:00",
|
||||
2: "2026-02-01 00:00:00",
|
||||
},
|
||||
)
|
||||
|
||||
out = _bm25_only_via_sqlite("a", str(tmp_path), wing="project", max_candidates=1)
|
||||
|
||||
assert out["total_before_filter"] == 1
|
||||
assert len(out["results"]) == 1
|
||||
assert out["results"][0]["wing"] == "project"
|
||||
|
||||
|
||||
def test_bm25_fallback_returns_empty_when_filtered_wing_has_no_candidates(tmp_path):
|
||||
seg = "seg-bm25-empty-filter"
|
||||
_seed_chroma_db(str(tmp_path), sqlite_count=0, segment_id=seg)
|
||||
_seed_drawers(
|
||||
str(tmp_path),
|
||||
seg,
|
||||
[
|
||||
(
|
||||
"shared token outside target wing",
|
||||
{"wing": "ops", "room": "incidents", "source_file": "/x/ops.md"},
|
||||
"d-1",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
out = _bm25_only_via_sqlite("shared token", str(tmp_path), wing="project", max_candidates=1)
|
||||
|
||||
assert out["total_before_filter"] == 0
|
||||
assert out["results"] == []
|
||||
|
||||
|
||||
def test_bm25_fallback_no_palace(tmp_path):
|
||||
out = _bm25_only_via_sqlite("anything", str(tmp_path))
|
||||
assert "error" in out
|
||||
|
||||
+312
-7
@@ -2,7 +2,7 @@
|
||||
|
||||
import os
|
||||
import sqlite3
|
||||
from unittest.mock import MagicMock, patch
|
||||
from unittest.mock import MagicMock, call, patch
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -229,8 +229,11 @@ def test_rebuild_index_success(mock_backend_cls, mock_shutil, tmp_path):
|
||||
}
|
||||
|
||||
mock_new_col = MagicMock()
|
||||
mock_new_col.count.return_value = 2
|
||||
mock_temp_col = MagicMock()
|
||||
mock_temp_col.count.return_value = 2
|
||||
mock_backend = _install_mock_backend(mock_backend_cls, mock_col)
|
||||
mock_backend.create_collection.return_value = mock_new_col
|
||||
mock_backend.create_collection.side_effect = [mock_temp_col, mock_new_col]
|
||||
|
||||
repair.rebuild_index(palace_path=str(tmp_path))
|
||||
|
||||
@@ -239,14 +242,74 @@ def test_rebuild_index_success(mock_backend_cls, mock_shutil, tmp_path):
|
||||
assert "chroma.sqlite3" in str(mock_shutil.copy2.call_args)
|
||||
|
||||
# Verify: deleted and recreated (cosine is the backend default)
|
||||
mock_backend.delete_collection.assert_called_once_with(str(tmp_path), "mempalace_drawers")
|
||||
mock_backend.create_collection.assert_called_once_with(str(tmp_path), "mempalace_drawers")
|
||||
assert mock_backend.create_collection.call_args_list == [
|
||||
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||
call(str(tmp_path), "mempalace_drawers"),
|
||||
]
|
||||
assert mock_backend.delete_collection.call_args_list == [
|
||||
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||
call(str(tmp_path), "mempalace_drawers"),
|
||||
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||
]
|
||||
|
||||
# Verify: used upsert not add
|
||||
mock_temp_col.upsert.assert_called_once()
|
||||
mock_new_col.upsert.assert_called_once()
|
||||
mock_new_col.add.assert_not_called()
|
||||
|
||||
|
||||
@patch("mempalace.repair.shutil")
|
||||
@patch("mempalace.repair.ChromaBackend")
|
||||
def test_rebuild_index_ignores_missing_temp_collection_at_start(
|
||||
mock_backend_cls, mock_shutil, tmp_path
|
||||
):
|
||||
sqlite_path = tmp_path / "chroma.sqlite3"
|
||||
sqlite_path.write_text("fake")
|
||||
|
||||
def _fake_copy2(src, dst):
|
||||
with open(dst, "w") as handle:
|
||||
handle.write("backup")
|
||||
|
||||
mock_shutil.copy2.side_effect = _fake_copy2
|
||||
|
||||
mock_col = MagicMock()
|
||||
mock_col.count.return_value = 2
|
||||
mock_col.get.return_value = {
|
||||
"ids": ["id1", "id2"],
|
||||
"documents": ["doc1", "doc2"],
|
||||
"metadatas": [{"wing": "a"}, {"wing": "b"}],
|
||||
}
|
||||
|
||||
mock_new_col = MagicMock()
|
||||
mock_new_col.count.return_value = 2
|
||||
mock_temp_col = MagicMock()
|
||||
mock_temp_col.count.return_value = 2
|
||||
mock_backend = _install_mock_backend(mock_backend_cls, mock_col)
|
||||
mock_backend.create_collection.side_effect = [mock_temp_col, mock_new_col]
|
||||
mock_backend.delete_collection.side_effect = [
|
||||
ValueError("Collection [mempalace_drawers__repair_tmp] does not exist"),
|
||||
None,
|
||||
None,
|
||||
]
|
||||
|
||||
repair.rebuild_index(palace_path=str(tmp_path))
|
||||
|
||||
assert mock_shutil.copy2.call_count == 1
|
||||
assert mock_backend.delete_collection.call_args_list == [
|
||||
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||
call(str(tmp_path), "mempalace_drawers"),
|
||||
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||
]
|
||||
|
||||
|
||||
def test_delete_collection_if_exists_reraises_unexpected_value_error():
|
||||
mock_backend = MagicMock()
|
||||
mock_backend.delete_collection.side_effect = ValueError("invalid collection name")
|
||||
|
||||
with pytest.raises(ValueError, match="invalid collection name"):
|
||||
repair._delete_collection_if_exists(mock_backend, "/palace", "bad/name")
|
||||
|
||||
|
||||
@patch("mempalace.repair.shutil")
|
||||
@patch("mempalace.repair.ChromaBackend")
|
||||
def test_rebuild_index_error_reading(mock_backend_cls, mock_shutil, tmp_path):
|
||||
@@ -365,19 +428,261 @@ def test_rebuild_index_proceeds_with_override(mock_backend_cls, mock_shutil, tmp
|
||||
},
|
||||
{"ids": [], "documents": [], "metadatas": []},
|
||||
]
|
||||
mock_temp_col = MagicMock()
|
||||
mock_temp_col.count.return_value = 10_000
|
||||
mock_new_col = MagicMock()
|
||||
mock_new_col.count.return_value = 10_000
|
||||
mock_backend.get_collection.return_value = mock_col
|
||||
mock_backend.create_collection.return_value = mock_new_col
|
||||
mock_backend.create_collection.side_effect = [mock_temp_col, mock_new_col]
|
||||
mock_backend_cls.return_value = mock_backend
|
||||
|
||||
with patch("mempalace.repair.sqlite_drawer_count", return_value=67_580):
|
||||
repair.rebuild_index(palace_path=str(tmp_path), confirm_truncation_ok=True)
|
||||
|
||||
mock_backend.delete_collection.assert_called_once()
|
||||
mock_backend.create_collection.assert_called_once()
|
||||
assert mock_backend.delete_collection.call_count == 3
|
||||
assert mock_backend.create_collection.call_count == 2
|
||||
mock_temp_col.upsert.assert_called()
|
||||
mock_new_col.upsert.assert_called()
|
||||
|
||||
|
||||
@patch("mempalace.repair.shutil")
|
||||
@patch("mempalace.repair.ChromaBackend")
|
||||
def test_rebuild_index_stage_failure_leaves_live_collection_untouched(
|
||||
mock_backend_cls, mock_shutil, tmp_path
|
||||
):
|
||||
sqlite_path = tmp_path / "chroma.sqlite3"
|
||||
sqlite_path.write_text("fake")
|
||||
|
||||
mock_col = MagicMock()
|
||||
mock_col.count.return_value = 2
|
||||
mock_col.get.return_value = {
|
||||
"ids": ["id1", "id2"],
|
||||
"documents": ["doc1", "doc2"],
|
||||
"metadatas": [{"wing": "a"}, {"wing": "b"}],
|
||||
}
|
||||
mock_temp_col = MagicMock()
|
||||
mock_temp_col.count.return_value = 1
|
||||
mock_backend = _install_mock_backend(mock_backend_cls, mock_col)
|
||||
mock_backend.create_collection.return_value = mock_temp_col
|
||||
|
||||
with pytest.raises(repair.RebuildCollectionError) as excinfo:
|
||||
repair.rebuild_index(palace_path=str(tmp_path))
|
||||
|
||||
assert excinfo.value.live_replaced is False
|
||||
assert mock_shutil.copy2.call_count == 1
|
||||
assert mock_backend.delete_collection.call_args_list == [
|
||||
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||
]
|
||||
|
||||
|
||||
@patch("mempalace.repair.shutil")
|
||||
@patch("mempalace.repair.ChromaBackend")
|
||||
def test_rebuild_index_live_failure_restores_backup(mock_backend_cls, mock_shutil, tmp_path):
|
||||
sqlite_path = tmp_path / "chroma.sqlite3"
|
||||
sqlite_path.write_text("fake")
|
||||
|
||||
def _fake_copy2(src, dst):
|
||||
with open(dst, "w") as handle:
|
||||
handle.write("backup")
|
||||
|
||||
mock_shutil.copy2.side_effect = _fake_copy2
|
||||
|
||||
mock_col = MagicMock()
|
||||
mock_col.count.return_value = 2
|
||||
mock_col.get.return_value = {
|
||||
"ids": ["id1", "id2"],
|
||||
"documents": ["doc1", "doc2"],
|
||||
"metadatas": [{"wing": "a"}, {"wing": "b"}],
|
||||
}
|
||||
mock_temp_col = MagicMock()
|
||||
mock_temp_col.count.return_value = 2
|
||||
mock_new_col = MagicMock()
|
||||
mock_new_col.upsert.side_effect = RuntimeError("live upsert failed")
|
||||
active_backend = MagicMock()
|
||||
active_backend.get_collection.return_value = mock_col
|
||||
active_backend.create_collection.side_effect = [mock_temp_col, mock_new_col]
|
||||
helper_backend = MagicMock()
|
||||
mock_backend_cls.side_effect = [active_backend, helper_backend]
|
||||
|
||||
with pytest.raises(repair.RebuildCollectionError) as excinfo:
|
||||
repair.rebuild_index(palace_path=str(tmp_path))
|
||||
|
||||
assert excinfo.value.live_replaced is True
|
||||
assert mock_shutil.copy2.call_count == 2
|
||||
assert active_backend.delete_collection.call_args_list == [
|
||||
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||
call(str(tmp_path), "mempalace_drawers"),
|
||||
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||
call(str(tmp_path), "mempalace_drawers"),
|
||||
]
|
||||
active_backend.close_palace.assert_called_once_with(str(tmp_path))
|
||||
helper_backend.close_palace.assert_not_called()
|
||||
|
||||
|
||||
@patch("mempalace.repair.shutil")
|
||||
@patch("mempalace.repair.ChromaBackend")
|
||||
def test_rebuild_index_live_delete_missing_still_restores_backup(
|
||||
mock_backend_cls, mock_shutil, tmp_path
|
||||
):
|
||||
sqlite_path = tmp_path / "chroma.sqlite3"
|
||||
sqlite_path.write_text("fake")
|
||||
|
||||
def _fake_copy2(src, dst):
|
||||
with open(dst, "w") as handle:
|
||||
handle.write("backup")
|
||||
|
||||
mock_shutil.copy2.side_effect = _fake_copy2
|
||||
|
||||
mock_col = MagicMock()
|
||||
mock_col.count.return_value = 2
|
||||
mock_col.get.return_value = {
|
||||
"ids": ["id1", "id2"],
|
||||
"documents": ["doc1", "doc2"],
|
||||
"metadatas": [{"wing": "a"}, {"wing": "b"}],
|
||||
}
|
||||
mock_temp_col = MagicMock()
|
||||
mock_temp_col.count.return_value = 2
|
||||
mock_backend = _install_mock_backend(mock_backend_cls, mock_col)
|
||||
mock_backend.create_collection.side_effect = [mock_temp_col, RuntimeError("create failed")]
|
||||
mock_backend.delete_collection.side_effect = [
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
repair.ChromaNotFoundError("missing"),
|
||||
]
|
||||
|
||||
with pytest.raises(repair.RebuildCollectionError) as excinfo:
|
||||
repair.rebuild_index(palace_path=str(tmp_path))
|
||||
|
||||
assert excinfo.value.live_replaced is True
|
||||
assert mock_shutil.copy2.call_count == 2
|
||||
assert mock_backend.delete_collection.call_args_list == [
|
||||
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||
call(str(tmp_path), "mempalace_drawers"),
|
||||
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||
call(str(tmp_path), "mempalace_drawers"),
|
||||
]
|
||||
|
||||
|
||||
@patch("mempalace.repair.shutil")
|
||||
@patch("mempalace.repair.ChromaBackend")
|
||||
def test_rebuild_index_restore_failure_preserves_original_error(
|
||||
mock_backend_cls, mock_shutil, tmp_path, capsys
|
||||
):
|
||||
sqlite_path = tmp_path / "chroma.sqlite3"
|
||||
sqlite_path.write_text("fake")
|
||||
|
||||
def _copy2_side_effect(src, dst):
|
||||
if str(src).endswith(".backup"):
|
||||
raise PermissionError("locked sqlite")
|
||||
with open(dst, "w") as handle:
|
||||
handle.write("backup")
|
||||
|
||||
mock_shutil.copy2.side_effect = _copy2_side_effect
|
||||
|
||||
mock_col = MagicMock()
|
||||
mock_col.count.return_value = 2
|
||||
mock_col.get.return_value = {
|
||||
"ids": ["id1", "id2"],
|
||||
"documents": ["doc1", "doc2"],
|
||||
"metadatas": [{"wing": "a"}, {"wing": "b"}],
|
||||
}
|
||||
mock_temp_col = MagicMock()
|
||||
mock_temp_col.count.return_value = 2
|
||||
mock_new_col = MagicMock()
|
||||
mock_new_col.upsert.side_effect = RuntimeError("live upsert failed")
|
||||
mock_backend = _install_mock_backend(mock_backend_cls, mock_col)
|
||||
mock_backend.create_collection.side_effect = [mock_temp_col, mock_new_col]
|
||||
|
||||
with pytest.raises(repair.RebuildCollectionError) as excinfo:
|
||||
repair.rebuild_index(palace_path=str(tmp_path))
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert "locked sqlite" in out
|
||||
assert "Manual restore required" in out
|
||||
assert "live upsert failed" in str(excinfo.value)
|
||||
|
||||
|
||||
@patch("mempalace.repair.ChromaBackend")
|
||||
def test_rebuild_collection_via_temp_keeps_original_error_when_cleanup_fails(
|
||||
mock_backend_cls,
|
||||
):
|
||||
mock_col = MagicMock()
|
||||
mock_col.count.return_value = 2
|
||||
mock_temp_col = MagicMock()
|
||||
mock_temp_col.count.return_value = 2
|
||||
mock_backend = _install_mock_backend(mock_backend_cls, mock_col)
|
||||
mock_backend.create_collection.side_effect = [mock_temp_col, RuntimeError("live build failed")]
|
||||
mock_backend.delete_collection.side_effect = [
|
||||
None,
|
||||
None,
|
||||
RuntimeError("cleanup failed"),
|
||||
]
|
||||
|
||||
with pytest.raises(repair.RebuildCollectionError) as excinfo:
|
||||
repair._rebuild_collection_via_temp(
|
||||
mock_backend,
|
||||
"/palace",
|
||||
["id1", "id2"],
|
||||
["doc1", "doc2"],
|
||||
[{"wing": "a"}, {"wing": "b"}],
|
||||
batch_size=5000,
|
||||
progress=lambda *args, **kwargs: None,
|
||||
)
|
||||
|
||||
assert "live build failed" in str(excinfo.value)
|
||||
assert excinfo.value.live_replaced is True
|
||||
assert mock_backend.delete_collection.call_args_list == [
|
||||
call("/palace", "mempalace_drawers__repair_tmp"),
|
||||
call("/palace", "mempalace_drawers"),
|
||||
call("/palace", "mempalace_drawers__repair_tmp"),
|
||||
]
|
||||
|
||||
|
||||
@patch("mempalace.repair.shutil")
|
||||
@patch("mempalace.repair.ChromaBackend")
|
||||
def test_rebuild_index_ignores_temp_cleanup_failure_after_success(
|
||||
mock_backend_cls, mock_shutil, tmp_path
|
||||
):
|
||||
sqlite_path = tmp_path / "chroma.sqlite3"
|
||||
sqlite_path.write_text("fake")
|
||||
|
||||
def _fake_copy2(src, dst):
|
||||
with open(dst, "w") as handle:
|
||||
handle.write("backup")
|
||||
|
||||
mock_shutil.copy2.side_effect = _fake_copy2
|
||||
|
||||
mock_col = MagicMock()
|
||||
mock_col.count.return_value = 2
|
||||
mock_col.get.return_value = {
|
||||
"ids": ["id1", "id2"],
|
||||
"documents": ["doc1", "doc2"],
|
||||
"metadatas": [{"wing": "a"}, {"wing": "b"}],
|
||||
}
|
||||
mock_temp_col = MagicMock()
|
||||
mock_temp_col.count.return_value = 2
|
||||
mock_new_col = MagicMock()
|
||||
mock_new_col.count.return_value = 2
|
||||
mock_backend = _install_mock_backend(mock_backend_cls, mock_col)
|
||||
mock_backend.create_collection.side_effect = [mock_temp_col, mock_new_col]
|
||||
mock_backend.delete_collection.side_effect = [
|
||||
None,
|
||||
None,
|
||||
RuntimeError("cleanup failed"),
|
||||
]
|
||||
|
||||
repair.rebuild_index(palace_path=str(tmp_path))
|
||||
|
||||
assert mock_shutil.copy2.call_count == 1
|
||||
assert mock_backend.delete_collection.call_args_list == [
|
||||
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||
call(str(tmp_path), "mempalace_drawers"),
|
||||
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||
]
|
||||
|
||||
|
||||
# ── repair_max_seq_id ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user