fix(hnsw): address @igorls's #1173 review
Restore three `_pin_hnsw_threads` tests that the previous integrity-gate commit deleted. The function is still live code on develop (defined at chroma.py:207, called from chroma.py:705 + mcp_server.py), so the deletion left the import unused (ruff F401) and dropped coverage on a function unrelated to this PR's scope. Restored verbatim from main. Plus three nits @igorls flagged: - **Thread-safety doc**: `_quarantined_paths` mutation is lock-free; documented that idempotency of `quarantine_stale_hnsw` is the safety property (concurrent same-palace calls produce a benign redundant rename attempt that no-ops, no need for a lock). - **Pickle protocol assumption**: `_segment_appears_healthy` requires PROTO ≥ 2 (`0x80`). Documented; matches what chromadb writes today, and a future protocol-0/1 emission would conservatively quarantine + lazy-rebuild rather than mis-classify as healthy. - Class-level vs module-level scope: keeping class-level — the conftest reset is the controlled case, and module-level wouldn't remove the foot-gun, just relocate it. Conftest reset documented in the existing comment is the right pattern for test isolation. Style nit (`Path(marker).touch()` vs `open(marker, "a").close()`) deferred — that pattern lives in #1177's territory, not #1173's. 37/37 tests pass on the PR branch. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -578,3 +578,52 @@ def test_make_client_quarantines_each_palace_independently(tmp_path, monkeypatch
|
||||
assert calls == [palace_a, palace_b]
|
||||
|
||||
|
||||
# ── _pin_hnsw_threads (per-process retrofit, separate from this PR's gate) ──
|
||||
|
||||
|
||||
def test_pin_hnsw_threads_retrofits_legacy_collection(tmp_path):
|
||||
"""Legacy collections (created without num_threads) get the retrofit applied."""
|
||||
palace_path = tmp_path / "legacy-palace"
|
||||
palace_path.mkdir()
|
||||
|
||||
client = chromadb.PersistentClient(path=str(palace_path))
|
||||
col = client.create_collection(
|
||||
"mempalace_drawers",
|
||||
metadata={"hnsw:space": "cosine"}, # no num_threads — legacy
|
||||
)
|
||||
assert col.configuration_json.get("hnsw", {}).get("num_threads") is None
|
||||
|
||||
_pin_hnsw_threads(col)
|
||||
|
||||
assert col.configuration_json["hnsw"]["num_threads"] == 1
|
||||
|
||||
|
||||
def test_pin_hnsw_threads_swallows_all_errors():
|
||||
"""Retrofit never raises even when collection.modify explodes."""
|
||||
|
||||
class _ExplodingCollection:
|
||||
def modify(self, *args, **kwargs):
|
||||
raise RuntimeError("boom")
|
||||
|
||||
_pin_hnsw_threads(_ExplodingCollection()) # must not raise
|
||||
|
||||
|
||||
def test_get_collection_applies_retrofit_on_existing_palace(tmp_path):
|
||||
"""ChromaBackend.get_collection(create=False) applies the retrofit."""
|
||||
palace_path = tmp_path / "palace"
|
||||
palace_path.mkdir()
|
||||
|
||||
# Simulate a legacy palace: create collection without num_threads
|
||||
bootstrap_client = chromadb.PersistentClient(path=str(palace_path))
|
||||
bootstrap_client.create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"})
|
||||
del bootstrap_client # drop reference so a fresh client reopens cleanly
|
||||
|
||||
wrapper = ChromaBackend().get_collection(
|
||||
str(palace_path),
|
||||
collection_name="mempalace_drawers",
|
||||
create=False,
|
||||
)
|
||||
|
||||
assert wrapper._collection.configuration_json["hnsw"]["num_threads"] == 1
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user