fix(backends/chroma): wire quarantine_stale_hnsw into _client() to prevent SIGSEGV on stale HNSW (#1121, #1132, #1263)
PR #1173 wired quarantine_stale_hnsw into the static make_client() helper but not into the instance _client() method. As a result every non-MCP entry point (CLI mining, search, repair, status) — which all use get_collection / _get_or_create_collection / _client() — skipped the cold-start quarantine pass and could SIGSEGV on a stale HNSW segment left over from a partial flush, replicated palace, or crashed-mid-write. Refactor: extract the (_fix_blob_seq_ids + gated quarantine_stale_hnsw) pre-open pass into a single private static helper ChromaBackend._prepare_palace_for_open(). Both make_client() and _client() now route through it, so the _quarantined_paths once-per- palace-per-process gate is preserved (no runtime thrash on hot paths) and behaviour stays identical — the fix is purely about extending the existing protection to the path that was missing it. Tests: - test_client_quarantines_corrupt_segment_on_first_open mirrors the existing make_client test and verifies _client() actually renames a corrupt segment on first open. - test_client_quarantines_only_on_first_call_per_palace verifies the cache gate prevents re-running quarantine across repeated _client() calls — important because _client() is hit on every backend op. Closes #1121. Closes #1132. Closes #1263. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -764,6 +764,67 @@ def test_make_client_quarantines_each_palace_independently(tmp_path, monkeypatch
|
||||
assert calls == [palace_a, palace_b]
|
||||
|
||||
|
||||
# ── _client() cold-start gate (#1121, #1132, #1263) ──────────────────────
|
||||
|
||||
|
||||
def test_client_quarantines_corrupt_segment_on_first_open(tmp_path, monkeypatch):
|
||||
"""The instance ``_client()`` path must run ``quarantine_stale_hnsw``
|
||||
on first open, mirroring the ``make_client()`` static helper. Before
|
||||
PR #1173's wiring was extended here, CLI mining / search / repair /
|
||||
status all skipped the quarantine pass and would SIGSEGV on a stale
|
||||
HNSW segment (#1121, #1132, #1263)."""
|
||||
now = 1_700_000_000.0
|
||||
palace, seg = _make_palace_with_segment(
|
||||
tmp_path,
|
||||
hnsw_mtime=now - 7200,
|
||||
sqlite_mtime=now,
|
||||
meta_bytes=_CORRUPT_META,
|
||||
)
|
||||
|
||||
monkeypatch.setattr(ChromaBackend, "_quarantined_paths", set())
|
||||
|
||||
backend = ChromaBackend()
|
||||
try:
|
||||
backend._client(str(palace))
|
||||
finally:
|
||||
backend.close()
|
||||
|
||||
assert not seg.exists(), "_client() should have quarantined the corrupt segment"
|
||||
drift_dirs = [p for p in palace.iterdir() if ".drift-" in p.name]
|
||||
assert len(drift_dirs) == 1
|
||||
|
||||
|
||||
def test_client_quarantines_only_on_first_call_per_palace(tmp_path, monkeypatch):
|
||||
"""Repeated ``_client()`` calls for the same palace re-run quarantine
|
||||
at most once — the ``_quarantined_paths`` gate prevents runtime
|
||||
thrash on hot paths (``_client()`` is hit on every backend op)."""
|
||||
palace_path = str(tmp_path / "palace")
|
||||
os.makedirs(palace_path, exist_ok=True)
|
||||
(Path(palace_path) / "chroma.sqlite3").write_text("")
|
||||
|
||||
monkeypatch.setattr(ChromaBackend, "_quarantined_paths", set())
|
||||
|
||||
calls: list[str] = []
|
||||
|
||||
def _spy(path, stale_seconds=300.0):
|
||||
calls.append(path)
|
||||
return []
|
||||
|
||||
monkeypatch.setattr("mempalace.backends.chroma.quarantine_stale_hnsw", _spy)
|
||||
|
||||
backend = ChromaBackend()
|
||||
try:
|
||||
backend._client(palace_path)
|
||||
backend._client(palace_path)
|
||||
backend._client(palace_path)
|
||||
finally:
|
||||
backend.close()
|
||||
|
||||
assert calls == [palace_path], (
|
||||
"quarantine_stale_hnsw should fire once per palace per process from _client(), not on every call"
|
||||
)
|
||||
|
||||
|
||||
# ── _pin_hnsw_threads (per-process retrofit, separate from this PR's gate) ──
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user