From d7f4638157a9d8e17a506dd8b1bae96e768fb9db Mon Sep 17 00:00:00 2001 From: Legion345 Date: Tue, 28 Apr 2026 13:08:04 -0700 Subject: [PATCH] fix(storage): stop ChromaDB from crashing when reopening an existing palace --- mempalace/backends/chroma.py | 22 +++++++++++++--------- tests/test_backends.py | 26 ++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/mempalace/backends/chroma.py b/mempalace/backends/chroma.py index 048f760..85684ac 100644 --- a/mempalace/backends/chroma.py +++ b/mempalace/backends/chroma.py @@ -8,6 +8,7 @@ from pathlib import Path from typing import Any, Optional import chromadb +from chromadb.errors import NotFoundError as _ChromaNotFoundError from .base import ( BaseBackend, @@ -1037,15 +1038,18 @@ class ChromaBackend(BaseBackend): ef_kwargs = {"embedding_function": ef} if ef is not None else {} if create: - collection = client.get_or_create_collection( - collection_name, - metadata={ - "hnsw:space": hnsw_space, - "hnsw:num_threads": 1, - **_HNSW_BLOAT_GUARD, - }, - **ef_kwargs, - ) + try: + collection = client.get_collection(collection_name, **ef_kwargs) + except _ChromaNotFoundError: + collection = client.create_collection( + collection_name, + metadata={ + "hnsw:space": hnsw_space, + "hnsw:num_threads": 1, + **_HNSW_BLOAT_GUARD, + }, + **ef_kwargs, + ) else: collection = client.get_collection(collection_name, **ef_kwargs) _pin_hnsw_threads(collection) diff --git a/tests/test_backends.py b/tests/test_backends.py index b48b69f..5efa71b 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -372,6 +372,32 @@ def test_chroma_backend_create_collection_sets_hnsw_bloat_guard(tmp_path): assert col.metadata.get("hnsw:sync_threshold") == 50_000 +def test_get_collection_create_true_is_idempotent(tmp_path): + """Calling get_collection(create=True) twice on the same name must not crash. + + ChromaDB 1.5.x's Rust bindings SIGSEGV when get_or_create_collection is + called with metadata that differs from the stored collection metadata. The + fix splits the call into get_collection -> fallback create_collection so the + metadata-comparison codepath in chromadb_rust_bindings is never reached for + existing collections. Regression guard for issue #1089. + """ + palace = str(tmp_path / "palace") + backend = ChromaBackend() + backend.get_collection(palace, collection_name="mempalace_drawers", create=True) + col2 = backend.get_collection(palace, collection_name="mempalace_drawers", create=True) + assert isinstance(col2, ChromaCollection) + + +def test_get_collection_create_true_preserves_existing_metadata(tmp_path): + """Existing collection metadata is not overwritten when reopened with create=True.""" + palace = str(tmp_path / "palace") + backend = ChromaBackend() + backend.get_collection(palace, collection_name="mempalace_drawers", create=True) + col = backend.get_collection(palace, collection_name="mempalace_drawers", create=True) + assert col._collection.metadata["hnsw:space"] == "cosine" + assert col._collection.metadata.get("hnsw:batch_size") == 50_000 + + def test_fix_blob_seq_ids_converts_blobs_to_integers(tmp_path): """Simulate a ChromaDB 0.6.x database with BLOB seq_ids and verify repair.""" db_path = tmp_path / "chroma.sqlite3"