fix(repair): rebuild collections through temp staging
This commit is contained in:
+129
-30
@@ -37,10 +37,13 @@ import time
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
from chromadb.errors import NotFoundError as ChromaNotFoundError
|
||||||
|
|
||||||
from .backends.chroma import ChromaBackend, hnsw_capacity_status
|
from .backends.chroma import ChromaBackend, hnsw_capacity_status
|
||||||
|
|
||||||
|
|
||||||
COLLECTION_NAME = "mempalace_drawers"
|
COLLECTION_NAME = "mempalace_drawers"
|
||||||
|
REPAIR_TEMP_COLLECTION = f"{COLLECTION_NAME}__repair_tmp"
|
||||||
|
|
||||||
|
|
||||||
def _get_palace_path():
|
def _get_palace_path():
|
||||||
@@ -83,6 +86,108 @@ def _paginate_ids(col, where=None):
|
|||||||
return ids
|
return ids
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_drawers(col, total: int, batch_size: int):
|
||||||
|
all_ids = []
|
||||||
|
all_docs = []
|
||||||
|
all_metas = []
|
||||||
|
offset = 0
|
||||||
|
while offset < total:
|
||||||
|
batch = col.get(limit=batch_size, offset=offset, include=["documents", "metadatas"])
|
||||||
|
if not batch["ids"]:
|
||||||
|
break
|
||||||
|
all_ids.extend(batch["ids"])
|
||||||
|
all_docs.extend(batch["documents"])
|
||||||
|
all_metas.extend(batch["metadatas"])
|
||||||
|
offset += len(batch["ids"])
|
||||||
|
return all_ids, all_docs, all_metas
|
||||||
|
|
||||||
|
|
||||||
|
def _verify_collection_count(col, expected: int, label: str) -> None:
|
||||||
|
actual = col.count()
|
||||||
|
if actual != expected:
|
||||||
|
raise RuntimeError(f"{label} count mismatch: expected {expected}, got {actual}")
|
||||||
|
|
||||||
|
|
||||||
|
def _is_missing_collection_value_error(exc: ValueError) -> bool:
|
||||||
|
message = str(exc).lower()
|
||||||
|
return "does not exist" in message or "not found" in message
|
||||||
|
|
||||||
|
|
||||||
|
def _delete_collection_if_exists(backend, palace_path: str, collection_name: str) -> None:
|
||||||
|
try:
|
||||||
|
backend.delete_collection(palace_path, collection_name)
|
||||||
|
except ValueError as exc:
|
||||||
|
if _is_missing_collection_value_error(exc):
|
||||||
|
return
|
||||||
|
raise
|
||||||
|
except (FileNotFoundError, ChromaNotFoundError):
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
class RebuildCollectionError(RuntimeError):
|
||||||
|
"""Raised when temp rebuild fails, carrying whether the live swap happened."""
|
||||||
|
|
||||||
|
def __init__(self, message: str, *, live_replaced: bool):
|
||||||
|
super().__init__(message)
|
||||||
|
self.live_replaced = live_replaced
|
||||||
|
|
||||||
|
|
||||||
|
def _rebuild_collection_via_temp(
|
||||||
|
backend,
|
||||||
|
palace_path: str,
|
||||||
|
all_ids,
|
||||||
|
all_docs,
|
||||||
|
all_metas,
|
||||||
|
batch_size: int,
|
||||||
|
progress=print,
|
||||||
|
) -> int:
|
||||||
|
expected = len(all_ids)
|
||||||
|
temp_name = REPAIR_TEMP_COLLECTION
|
||||||
|
live_replaced = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
_delete_collection_if_exists(backend, palace_path, temp_name)
|
||||||
|
|
||||||
|
progress(f" Building temporary collection: {temp_name}")
|
||||||
|
temp_col = backend.create_collection(palace_path, temp_name)
|
||||||
|
staged = 0
|
||||||
|
for i in range(0, expected, batch_size):
|
||||||
|
batch_ids = all_ids[i : i + batch_size]
|
||||||
|
batch_docs = all_docs[i : i + batch_size]
|
||||||
|
batch_metas = all_metas[i : i + batch_size]
|
||||||
|
temp_col.upsert(documents=batch_docs, ids=batch_ids, metadatas=batch_metas)
|
||||||
|
staged += len(batch_ids)
|
||||||
|
progress(f" Staged {staged}/{expected} drawers...")
|
||||||
|
_verify_collection_count(temp_col, expected, "temporary rebuild")
|
||||||
|
|
||||||
|
progress(" Rebuilding live collection...")
|
||||||
|
backend.delete_collection(palace_path, COLLECTION_NAME)
|
||||||
|
live_replaced = True
|
||||||
|
new_col = backend.create_collection(palace_path, COLLECTION_NAME)
|
||||||
|
|
||||||
|
rebuilt = 0
|
||||||
|
for i in range(0, expected, batch_size):
|
||||||
|
batch_ids = all_ids[i : i + batch_size]
|
||||||
|
batch_docs = all_docs[i : i + batch_size]
|
||||||
|
batch_metas = all_metas[i : i + batch_size]
|
||||||
|
new_col.upsert(documents=batch_docs, ids=batch_ids, metadatas=batch_metas)
|
||||||
|
rebuilt += len(batch_ids)
|
||||||
|
progress(f" Re-filed {rebuilt}/{expected} drawers...")
|
||||||
|
_verify_collection_count(new_col, expected, "rebuilt live collection")
|
||||||
|
|
||||||
|
try:
|
||||||
|
_delete_collection_if_exists(backend, palace_path, temp_name)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return rebuilt
|
||||||
|
except Exception as exc:
|
||||||
|
try:
|
||||||
|
_delete_collection_if_exists(backend, palace_path, temp_name)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
raise RebuildCollectionError(str(exc), live_replaced=live_replaced) from exc
|
||||||
|
|
||||||
|
|
||||||
def scan_palace(palace_path=None, only_wing=None):
|
def scan_palace(palace_path=None, only_wing=None):
|
||||||
"""Scan the palace for corrupt/unfetchable IDs.
|
"""Scan the palace for corrupt/unfetchable IDs.
|
||||||
|
|
||||||
@@ -373,18 +478,7 @@ def rebuild_index(palace_path=None, confirm_truncation_ok: bool = False):
|
|||||||
# Extract all drawers in batches
|
# Extract all drawers in batches
|
||||||
print("\n Extracting drawers...")
|
print("\n Extracting drawers...")
|
||||||
batch_size = 5000
|
batch_size = 5000
|
||||||
all_ids = []
|
all_ids, all_docs, all_metas = _extract_drawers(col, total, batch_size)
|
||||||
all_docs = []
|
|
||||||
all_metas = []
|
|
||||||
offset = 0
|
|
||||||
while offset < total:
|
|
||||||
batch = col.get(limit=batch_size, offset=offset, include=["documents", "metadatas"])
|
|
||||||
if not batch["ids"]:
|
|
||||||
break
|
|
||||||
all_ids.extend(batch["ids"])
|
|
||||||
all_docs.extend(batch["documents"])
|
|
||||||
all_metas.extend(batch["metadatas"])
|
|
||||||
offset += len(batch["ids"])
|
|
||||||
print(f" Extracted {len(all_ids)} drawers")
|
print(f" Extracted {len(all_ids)} drawers")
|
||||||
|
|
||||||
# ── #1208 guard ──────────────────────────────────────────────────
|
# ── #1208 guard ──────────────────────────────────────────────────
|
||||||
@@ -407,28 +501,33 @@ def rebuild_index(palace_path=None, confirm_truncation_ok: bool = False):
|
|||||||
|
|
||||||
# Rebuild with correct HNSW settings
|
# Rebuild with correct HNSW settings
|
||||||
print(" Rebuilding collection with hnsw:space=cosine...")
|
print(" Rebuilding collection with hnsw:space=cosine...")
|
||||||
backend.delete_collection(palace_path, COLLECTION_NAME)
|
|
||||||
new_col = backend.create_collection(palace_path, COLLECTION_NAME)
|
|
||||||
|
|
||||||
filed = 0
|
|
||||||
try:
|
try:
|
||||||
for i in range(0, len(all_ids), batch_size):
|
filed = _rebuild_collection_via_temp(
|
||||||
batch_ids = all_ids[i : i + batch_size]
|
backend,
|
||||||
batch_docs = all_docs[i : i + batch_size]
|
palace_path,
|
||||||
batch_metas = all_metas[i : i + batch_size]
|
all_ids,
|
||||||
new_col.upsert(documents=batch_docs, ids=batch_ids, metadatas=batch_metas)
|
all_docs,
|
||||||
filed += len(batch_ids)
|
all_metas,
|
||||||
print(f" Re-filed {filed}/{len(all_ids)} drawers...")
|
batch_size,
|
||||||
except Exception as e:
|
progress=print,
|
||||||
|
)
|
||||||
|
except RebuildCollectionError as e:
|
||||||
print(f"\n ERROR during rebuild: {e}")
|
print(f"\n ERROR during rebuild: {e}")
|
||||||
print(f" Only {filed}/{len(all_ids)} drawers were re-filed.")
|
print(" Rebuild aborted before completion.")
|
||||||
if os.path.exists(backup_path):
|
if e.live_replaced and os.path.exists(backup_path):
|
||||||
print(f" Restoring from backup: {backup_path}")
|
print(f" Restoring from backup: {backup_path}")
|
||||||
backend.delete_collection(palace_path, COLLECTION_NAME)
|
try:
|
||||||
shutil.copy2(backup_path, sqlite_path)
|
_close_chroma_handles(palace_path)
|
||||||
print(" Backup restored. Palace is back to pre-repair state.")
|
_delete_collection_if_exists(backend, palace_path, COLLECTION_NAME)
|
||||||
else:
|
shutil.copy2(backup_path, sqlite_path)
|
||||||
|
print(" Backup restored. Palace is back to pre-repair state.")
|
||||||
|
except Exception as restore_error:
|
||||||
|
print(f" Backup restore failed: {restore_error}")
|
||||||
|
print(f" Manual restore required from: {backup_path}")
|
||||||
|
elif e.live_replaced:
|
||||||
print(" No backup available. Re-mine from source files to recover.")
|
print(" No backup available. Re-mine from source files to recover.")
|
||||||
|
else:
|
||||||
|
print(" Live collection was not replaced; leaving the original palace untouched.")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
print(f"\n Repair complete. {filed} drawers rebuilt.")
|
print(f"\n Repair complete. {filed} drawers rebuilt.")
|
||||||
|
|||||||
+307
-7
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, call, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
@@ -229,8 +229,11 @@ def test_rebuild_index_success(mock_backend_cls, mock_shutil, tmp_path):
|
|||||||
}
|
}
|
||||||
|
|
||||||
mock_new_col = MagicMock()
|
mock_new_col = MagicMock()
|
||||||
|
mock_new_col.count.return_value = 2
|
||||||
|
mock_temp_col = MagicMock()
|
||||||
|
mock_temp_col.count.return_value = 2
|
||||||
mock_backend = _install_mock_backend(mock_backend_cls, mock_col)
|
mock_backend = _install_mock_backend(mock_backend_cls, mock_col)
|
||||||
mock_backend.create_collection.return_value = mock_new_col
|
mock_backend.create_collection.side_effect = [mock_temp_col, mock_new_col]
|
||||||
|
|
||||||
repair.rebuild_index(palace_path=str(tmp_path))
|
repair.rebuild_index(palace_path=str(tmp_path))
|
||||||
|
|
||||||
@@ -239,14 +242,74 @@ def test_rebuild_index_success(mock_backend_cls, mock_shutil, tmp_path):
|
|||||||
assert "chroma.sqlite3" in str(mock_shutil.copy2.call_args)
|
assert "chroma.sqlite3" in str(mock_shutil.copy2.call_args)
|
||||||
|
|
||||||
# Verify: deleted and recreated (cosine is the backend default)
|
# Verify: deleted and recreated (cosine is the backend default)
|
||||||
mock_backend.delete_collection.assert_called_once_with(str(tmp_path), "mempalace_drawers")
|
assert mock_backend.create_collection.call_args_list == [
|
||||||
mock_backend.create_collection.assert_called_once_with(str(tmp_path), "mempalace_drawers")
|
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||||
|
call(str(tmp_path), "mempalace_drawers"),
|
||||||
|
]
|
||||||
|
assert mock_backend.delete_collection.call_args_list == [
|
||||||
|
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||||
|
call(str(tmp_path), "mempalace_drawers"),
|
||||||
|
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||||
|
]
|
||||||
|
|
||||||
# Verify: used upsert not add
|
# Verify: used upsert not add
|
||||||
|
mock_temp_col.upsert.assert_called_once()
|
||||||
mock_new_col.upsert.assert_called_once()
|
mock_new_col.upsert.assert_called_once()
|
||||||
mock_new_col.add.assert_not_called()
|
mock_new_col.add.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
@patch("mempalace.repair.shutil")
|
||||||
|
@patch("mempalace.repair.ChromaBackend")
|
||||||
|
def test_rebuild_index_ignores_missing_temp_collection_at_start(
|
||||||
|
mock_backend_cls, mock_shutil, tmp_path
|
||||||
|
):
|
||||||
|
sqlite_path = tmp_path / "chroma.sqlite3"
|
||||||
|
sqlite_path.write_text("fake")
|
||||||
|
|
||||||
|
def _fake_copy2(src, dst):
|
||||||
|
with open(dst, "w") as handle:
|
||||||
|
handle.write("backup")
|
||||||
|
|
||||||
|
mock_shutil.copy2.side_effect = _fake_copy2
|
||||||
|
|
||||||
|
mock_col = MagicMock()
|
||||||
|
mock_col.count.return_value = 2
|
||||||
|
mock_col.get.return_value = {
|
||||||
|
"ids": ["id1", "id2"],
|
||||||
|
"documents": ["doc1", "doc2"],
|
||||||
|
"metadatas": [{"wing": "a"}, {"wing": "b"}],
|
||||||
|
}
|
||||||
|
|
||||||
|
mock_new_col = MagicMock()
|
||||||
|
mock_new_col.count.return_value = 2
|
||||||
|
mock_temp_col = MagicMock()
|
||||||
|
mock_temp_col.count.return_value = 2
|
||||||
|
mock_backend = _install_mock_backend(mock_backend_cls, mock_col)
|
||||||
|
mock_backend.create_collection.side_effect = [mock_temp_col, mock_new_col]
|
||||||
|
mock_backend.delete_collection.side_effect = [
|
||||||
|
ValueError("Collection [mempalace_drawers__repair_tmp] does not exist"),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
]
|
||||||
|
|
||||||
|
repair.rebuild_index(palace_path=str(tmp_path))
|
||||||
|
|
||||||
|
assert mock_shutil.copy2.call_count == 1
|
||||||
|
assert mock_backend.delete_collection.call_args_list == [
|
||||||
|
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||||
|
call(str(tmp_path), "mempalace_drawers"),
|
||||||
|
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_delete_collection_if_exists_reraises_unexpected_value_error():
|
||||||
|
mock_backend = MagicMock()
|
||||||
|
mock_backend.delete_collection.side_effect = ValueError("invalid collection name")
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="invalid collection name"):
|
||||||
|
repair._delete_collection_if_exists(mock_backend, "/palace", "bad/name")
|
||||||
|
|
||||||
|
|
||||||
@patch("mempalace.repair.shutil")
|
@patch("mempalace.repair.shutil")
|
||||||
@patch("mempalace.repair.ChromaBackend")
|
@patch("mempalace.repair.ChromaBackend")
|
||||||
def test_rebuild_index_error_reading(mock_backend_cls, mock_shutil, tmp_path):
|
def test_rebuild_index_error_reading(mock_backend_cls, mock_shutil, tmp_path):
|
||||||
@@ -365,19 +428,256 @@ def test_rebuild_index_proceeds_with_override(mock_backend_cls, mock_shutil, tmp
|
|||||||
},
|
},
|
||||||
{"ids": [], "documents": [], "metadatas": []},
|
{"ids": [], "documents": [], "metadatas": []},
|
||||||
]
|
]
|
||||||
|
mock_temp_col = MagicMock()
|
||||||
|
mock_temp_col.count.return_value = 10_000
|
||||||
mock_new_col = MagicMock()
|
mock_new_col = MagicMock()
|
||||||
|
mock_new_col.count.return_value = 10_000
|
||||||
mock_backend.get_collection.return_value = mock_col
|
mock_backend.get_collection.return_value = mock_col
|
||||||
mock_backend.create_collection.return_value = mock_new_col
|
mock_backend.create_collection.side_effect = [mock_temp_col, mock_new_col]
|
||||||
mock_backend_cls.return_value = mock_backend
|
mock_backend_cls.return_value = mock_backend
|
||||||
|
|
||||||
with patch("mempalace.repair.sqlite_drawer_count", return_value=67_580):
|
with patch("mempalace.repair.sqlite_drawer_count", return_value=67_580):
|
||||||
repair.rebuild_index(palace_path=str(tmp_path), confirm_truncation_ok=True)
|
repair.rebuild_index(palace_path=str(tmp_path), confirm_truncation_ok=True)
|
||||||
|
|
||||||
mock_backend.delete_collection.assert_called_once()
|
assert mock_backend.delete_collection.call_count == 3
|
||||||
mock_backend.create_collection.assert_called_once()
|
assert mock_backend.create_collection.call_count == 2
|
||||||
|
mock_temp_col.upsert.assert_called()
|
||||||
mock_new_col.upsert.assert_called()
|
mock_new_col.upsert.assert_called()
|
||||||
|
|
||||||
|
|
||||||
|
@patch("mempalace.repair.shutil")
|
||||||
|
@patch("mempalace.repair.ChromaBackend")
|
||||||
|
def test_rebuild_index_stage_failure_leaves_live_collection_untouched(
|
||||||
|
mock_backend_cls, mock_shutil, tmp_path
|
||||||
|
):
|
||||||
|
sqlite_path = tmp_path / "chroma.sqlite3"
|
||||||
|
sqlite_path.write_text("fake")
|
||||||
|
|
||||||
|
mock_col = MagicMock()
|
||||||
|
mock_col.count.return_value = 2
|
||||||
|
mock_col.get.return_value = {
|
||||||
|
"ids": ["id1", "id2"],
|
||||||
|
"documents": ["doc1", "doc2"],
|
||||||
|
"metadatas": [{"wing": "a"}, {"wing": "b"}],
|
||||||
|
}
|
||||||
|
mock_temp_col = MagicMock()
|
||||||
|
mock_temp_col.count.return_value = 1
|
||||||
|
mock_backend = _install_mock_backend(mock_backend_cls, mock_col)
|
||||||
|
mock_backend.create_collection.return_value = mock_temp_col
|
||||||
|
|
||||||
|
with pytest.raises(repair.RebuildCollectionError) as excinfo:
|
||||||
|
repair.rebuild_index(palace_path=str(tmp_path))
|
||||||
|
|
||||||
|
assert excinfo.value.live_replaced is False
|
||||||
|
assert mock_shutil.copy2.call_count == 1
|
||||||
|
assert mock_backend.delete_collection.call_args_list == [
|
||||||
|
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||||
|
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@patch("mempalace.repair.shutil")
|
||||||
|
@patch("mempalace.repair.ChromaBackend")
|
||||||
|
def test_rebuild_index_live_failure_restores_backup(mock_backend_cls, mock_shutil, tmp_path):
|
||||||
|
sqlite_path = tmp_path / "chroma.sqlite3"
|
||||||
|
sqlite_path.write_text("fake")
|
||||||
|
|
||||||
|
def _fake_copy2(src, dst):
|
||||||
|
with open(dst, "w") as handle:
|
||||||
|
handle.write("backup")
|
||||||
|
|
||||||
|
mock_shutil.copy2.side_effect = _fake_copy2
|
||||||
|
|
||||||
|
mock_col = MagicMock()
|
||||||
|
mock_col.count.return_value = 2
|
||||||
|
mock_col.get.return_value = {
|
||||||
|
"ids": ["id1", "id2"],
|
||||||
|
"documents": ["doc1", "doc2"],
|
||||||
|
"metadatas": [{"wing": "a"}, {"wing": "b"}],
|
||||||
|
}
|
||||||
|
mock_temp_col = MagicMock()
|
||||||
|
mock_temp_col.count.return_value = 2
|
||||||
|
mock_new_col = MagicMock()
|
||||||
|
mock_new_col.upsert.side_effect = RuntimeError("live upsert failed")
|
||||||
|
mock_backend = _install_mock_backend(mock_backend_cls, mock_col)
|
||||||
|
mock_backend.create_collection.side_effect = [mock_temp_col, mock_new_col]
|
||||||
|
|
||||||
|
with pytest.raises(repair.RebuildCollectionError) as excinfo:
|
||||||
|
repair.rebuild_index(palace_path=str(tmp_path))
|
||||||
|
|
||||||
|
assert excinfo.value.live_replaced is True
|
||||||
|
assert mock_shutil.copy2.call_count == 2
|
||||||
|
assert mock_backend.delete_collection.call_args_list == [
|
||||||
|
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||||
|
call(str(tmp_path), "mempalace_drawers"),
|
||||||
|
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||||
|
call(str(tmp_path), "mempalace_drawers"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@patch("mempalace.repair.shutil")
|
||||||
|
@patch("mempalace.repair.ChromaBackend")
|
||||||
|
def test_rebuild_index_live_delete_missing_still_restores_backup(
|
||||||
|
mock_backend_cls, mock_shutil, tmp_path
|
||||||
|
):
|
||||||
|
sqlite_path = tmp_path / "chroma.sqlite3"
|
||||||
|
sqlite_path.write_text("fake")
|
||||||
|
|
||||||
|
def _fake_copy2(src, dst):
|
||||||
|
with open(dst, "w") as handle:
|
||||||
|
handle.write("backup")
|
||||||
|
|
||||||
|
mock_shutil.copy2.side_effect = _fake_copy2
|
||||||
|
|
||||||
|
mock_col = MagicMock()
|
||||||
|
mock_col.count.return_value = 2
|
||||||
|
mock_col.get.return_value = {
|
||||||
|
"ids": ["id1", "id2"],
|
||||||
|
"documents": ["doc1", "doc2"],
|
||||||
|
"metadatas": [{"wing": "a"}, {"wing": "b"}],
|
||||||
|
}
|
||||||
|
mock_temp_col = MagicMock()
|
||||||
|
mock_temp_col.count.return_value = 2
|
||||||
|
mock_backend = _install_mock_backend(mock_backend_cls, mock_col)
|
||||||
|
mock_backend.create_collection.side_effect = [mock_temp_col, RuntimeError("create failed")]
|
||||||
|
mock_backend.delete_collection.side_effect = [
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
repair.ChromaNotFoundError("missing"),
|
||||||
|
]
|
||||||
|
|
||||||
|
with pytest.raises(repair.RebuildCollectionError) as excinfo:
|
||||||
|
repair.rebuild_index(palace_path=str(tmp_path))
|
||||||
|
|
||||||
|
assert excinfo.value.live_replaced is True
|
||||||
|
assert mock_shutil.copy2.call_count == 2
|
||||||
|
assert mock_backend.delete_collection.call_args_list == [
|
||||||
|
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||||
|
call(str(tmp_path), "mempalace_drawers"),
|
||||||
|
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||||
|
call(str(tmp_path), "mempalace_drawers"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@patch("mempalace.repair.shutil")
|
||||||
|
@patch("mempalace.repair.ChromaBackend")
|
||||||
|
def test_rebuild_index_restore_failure_preserves_original_error(
|
||||||
|
mock_backend_cls, mock_shutil, tmp_path, capsys
|
||||||
|
):
|
||||||
|
sqlite_path = tmp_path / "chroma.sqlite3"
|
||||||
|
sqlite_path.write_text("fake")
|
||||||
|
|
||||||
|
def _copy2_side_effect(src, dst):
|
||||||
|
if str(src).endswith(".backup"):
|
||||||
|
raise PermissionError("locked sqlite")
|
||||||
|
with open(dst, "w") as handle:
|
||||||
|
handle.write("backup")
|
||||||
|
|
||||||
|
mock_shutil.copy2.side_effect = _copy2_side_effect
|
||||||
|
|
||||||
|
mock_col = MagicMock()
|
||||||
|
mock_col.count.return_value = 2
|
||||||
|
mock_col.get.return_value = {
|
||||||
|
"ids": ["id1", "id2"],
|
||||||
|
"documents": ["doc1", "doc2"],
|
||||||
|
"metadatas": [{"wing": "a"}, {"wing": "b"}],
|
||||||
|
}
|
||||||
|
mock_temp_col = MagicMock()
|
||||||
|
mock_temp_col.count.return_value = 2
|
||||||
|
mock_new_col = MagicMock()
|
||||||
|
mock_new_col.upsert.side_effect = RuntimeError("live upsert failed")
|
||||||
|
mock_backend = _install_mock_backend(mock_backend_cls, mock_col)
|
||||||
|
mock_backend.create_collection.side_effect = [mock_temp_col, mock_new_col]
|
||||||
|
|
||||||
|
with pytest.raises(repair.RebuildCollectionError) as excinfo:
|
||||||
|
repair.rebuild_index(palace_path=str(tmp_path))
|
||||||
|
|
||||||
|
out = capsys.readouterr().out
|
||||||
|
assert "locked sqlite" in out
|
||||||
|
assert "Manual restore required" in out
|
||||||
|
assert "live upsert failed" in str(excinfo.value)
|
||||||
|
|
||||||
|
|
||||||
|
@patch("mempalace.repair.ChromaBackend")
|
||||||
|
def test_rebuild_collection_via_temp_keeps_original_error_when_cleanup_fails(
|
||||||
|
mock_backend_cls,
|
||||||
|
):
|
||||||
|
mock_col = MagicMock()
|
||||||
|
mock_col.count.return_value = 2
|
||||||
|
mock_temp_col = MagicMock()
|
||||||
|
mock_temp_col.count.return_value = 2
|
||||||
|
mock_backend = _install_mock_backend(mock_backend_cls, mock_col)
|
||||||
|
mock_backend.create_collection.side_effect = [mock_temp_col, RuntimeError("live build failed")]
|
||||||
|
mock_backend.delete_collection.side_effect = [
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
RuntimeError("cleanup failed"),
|
||||||
|
]
|
||||||
|
|
||||||
|
with pytest.raises(repair.RebuildCollectionError) as excinfo:
|
||||||
|
repair._rebuild_collection_via_temp(
|
||||||
|
mock_backend,
|
||||||
|
"/palace",
|
||||||
|
["id1", "id2"],
|
||||||
|
["doc1", "doc2"],
|
||||||
|
[{"wing": "a"}, {"wing": "b"}],
|
||||||
|
batch_size=5000,
|
||||||
|
progress=lambda *args, **kwargs: None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "live build failed" in str(excinfo.value)
|
||||||
|
assert excinfo.value.live_replaced is True
|
||||||
|
assert mock_backend.delete_collection.call_args_list == [
|
||||||
|
call("/palace", "mempalace_drawers__repair_tmp"),
|
||||||
|
call("/palace", "mempalace_drawers"),
|
||||||
|
call("/palace", "mempalace_drawers__repair_tmp"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@patch("mempalace.repair.shutil")
|
||||||
|
@patch("mempalace.repair.ChromaBackend")
|
||||||
|
def test_rebuild_index_ignores_temp_cleanup_failure_after_success(
|
||||||
|
mock_backend_cls, mock_shutil, tmp_path
|
||||||
|
):
|
||||||
|
sqlite_path = tmp_path / "chroma.sqlite3"
|
||||||
|
sqlite_path.write_text("fake")
|
||||||
|
|
||||||
|
def _fake_copy2(src, dst):
|
||||||
|
with open(dst, "w") as handle:
|
||||||
|
handle.write("backup")
|
||||||
|
|
||||||
|
mock_shutil.copy2.side_effect = _fake_copy2
|
||||||
|
|
||||||
|
mock_col = MagicMock()
|
||||||
|
mock_col.count.return_value = 2
|
||||||
|
mock_col.get.return_value = {
|
||||||
|
"ids": ["id1", "id2"],
|
||||||
|
"documents": ["doc1", "doc2"],
|
||||||
|
"metadatas": [{"wing": "a"}, {"wing": "b"}],
|
||||||
|
}
|
||||||
|
mock_temp_col = MagicMock()
|
||||||
|
mock_temp_col.count.return_value = 2
|
||||||
|
mock_new_col = MagicMock()
|
||||||
|
mock_new_col.count.return_value = 2
|
||||||
|
mock_backend = _install_mock_backend(mock_backend_cls, mock_col)
|
||||||
|
mock_backend.create_collection.side_effect = [mock_temp_col, mock_new_col]
|
||||||
|
mock_backend.delete_collection.side_effect = [
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
RuntimeError("cleanup failed"),
|
||||||
|
]
|
||||||
|
|
||||||
|
repair.rebuild_index(palace_path=str(tmp_path))
|
||||||
|
|
||||||
|
assert mock_shutil.copy2.call_count == 1
|
||||||
|
assert mock_backend.delete_collection.call_args_list == [
|
||||||
|
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||||
|
call(str(tmp_path), "mempalace_drawers"),
|
||||||
|
call(str(tmp_path), "mempalace_drawers__repair_tmp"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
# ── repair_max_seq_id ─────────────────────────────────────────────────
|
# ── repair_max_seq_id ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user