diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index 43897c8..cce7a49 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -57,6 +57,8 @@ from .config import ( # noqa: E402 sanitize_content, ) from .version import __version__ # noqa: E402 +from chromadb.errors import NotFoundError as _ChromaNotFoundError # noqa: E402 + from .backends.chroma import ( # noqa: E402 ChromaBackend, ChromaCollection, @@ -284,14 +286,22 @@ def _get_collection(create=False): # palaces whose collections were created before this fix (the # runtime config does not persist cross-process in chromadb 1.5.x, # so the retrofit runs every time _get_collection opens a cache). - raw = client.get_or_create_collection( - _config.collection_name, - metadata={ - "hnsw:space": "cosine", - "hnsw:num_threads": 1, - **_HNSW_BLOAT_GUARD, - }, - ) + # + # ChromaDB 1.5.x's Rust binding SIGSEGVs when get_or_create_collection + # is called with metadata that differs from what's stored. The split + # below skips the metadata-comparison codepath for existing + # collections, mirroring the backend-layer fix from #1262. + try: + raw = client.get_collection(_config.collection_name) + except _ChromaNotFoundError: + raw = client.create_collection( + _config.collection_name, + metadata={ + "hnsw:space": "cosine", + "hnsw:num_threads": 1, + **_HNSW_BLOAT_GUARD, + }, + ) _pin_hnsw_threads(raw) _collection_cache = ChromaCollection(raw) _metadata_cache = None diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 480b6bd..46e5f4a 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -874,3 +874,48 @@ class TestCacheInvalidation: assert result["success"] is True assert "Reconnected" in result["message"] assert isinstance(result["drawers"], int) + + def test_get_collection_create_true_avoids_get_or_create_on_reopen( + self, monkeypatch, config, palace_path, kg + ): + """Regression for the MCP-server half of #1262. + + ChromaDB 1.5.x's Rust bindings SIGSEGV when + ``client.get_or_create_collection`` is called with metadata that + differs from the collection's stored metadata. The Stop hook + path (``tool_diary_write`` -> ``_get_collection(create=True)``) + was reaching that codepath on every session-end; #1262 fixed + the equivalent crash class in ``ChromaBackend`` but left this + site untouched. ``_get_collection(create=True)`` must call + ``client.get_collection`` first and only fall back to + ``client.create_collection`` when the collection does not yet + exist on disk. + """ + _patch_mcp_server(monkeypatch, config, kg) + from mempalace import mcp_server + + col1 = mcp_server._get_collection(create=True) + assert col1 is not None + + client = mcp_server._client_cache + assert client is not None + + # Patch at the class level — chromadb's mtime-change detection + # may rebuild the client between calls, so an instance-level + # spy would not survive. + client_cls = type(client) + calls: list[tuple] = [] + + def _spy(self, *args, **kwargs): + calls.append((args, kwargs)) + raise AssertionError( + "get_or_create_collection must not be called on reopen " + "(SIGSEGV path on metadata mismatch)" + ) + + monkeypatch.setattr(client_cls, "get_or_create_collection", _spy) + mcp_server._collection_cache = None + + col2 = mcp_server._get_collection(create=True) + assert col2 is not None + assert calls == [], f"get_or_create_collection was called: {calls}"