fix(mcp_server): split get_or_create_collection on reopen (follow-up to #1262)

#1262 split `get_or_create_collection` into `get_collection` + fallback
`create_collection` inside `ChromaBackend.get_collection`, fixing the
chromadb 1.5.x Rust-binding SIGSEGV that fires when stored collection
metadata differs from the call-site's `_HNSW_BLOAT_GUARD` payload.

The MCP server's `_get_collection(create=True)` carries the same metadata
payload at `mcp_server.py:287` and routes through chromadb's Python
client directly, bypassing the backend layer. Both `tool_add_drawer`
and `tool_diary_write` reach this site on every invocation, and the
Stop hook fires `mempalace_diary_write` at session end — which was
exactly the crash path #1089 named.

Apply the same try/except split here so legacy palaces whose stored
metadata predates the bloat-guard expansion no longer crash on the
MCP-server reopen path. Regression test patches
`get_or_create_collection` at the chromadb client class level (not the
instance — chromadb's mtime-change detection rebuilds the client between
calls, so an instance-level spy doesn't survive) and asserts the second
`_get_collection(create=True)` call never reaches it.
This commit is contained in:
Igor Lins e Silva
2026-04-30 22:35:18 -03:00
parent 73541d1606
commit 9dd56ecb0a
2 changed files with 63 additions and 8 deletions
+18 -8
View File
@@ -57,6 +57,8 @@ from .config import ( # noqa: E402
sanitize_content,
)
from .version import __version__ # noqa: E402
from chromadb.errors import NotFoundError as _ChromaNotFoundError # noqa: E402
from .backends.chroma import ( # noqa: E402
ChromaBackend,
ChromaCollection,
@@ -284,14 +286,22 @@ def _get_collection(create=False):
# palaces whose collections were created before this fix (the
# runtime config does not persist cross-process in chromadb 1.5.x,
# so the retrofit runs every time _get_collection opens a cache).
raw = client.get_or_create_collection(
_config.collection_name,
metadata={
"hnsw:space": "cosine",
"hnsw:num_threads": 1,
**_HNSW_BLOAT_GUARD,
},
)
#
# ChromaDB 1.5.x's Rust binding SIGSEGVs when get_or_create_collection
# is called with metadata that differs from what's stored. The split
# below skips the metadata-comparison codepath for existing
# collections, mirroring the backend-layer fix from #1262.
try:
raw = client.get_collection(_config.collection_name)
except _ChromaNotFoundError:
raw = client.create_collection(
_config.collection_name,
metadata={
"hnsw:space": "cosine",
"hnsw:num_threads": 1,
**_HNSW_BLOAT_GUARD,
},
)
_pin_hnsw_threads(raw)
_collection_cache = ChromaCollection(raw)
_metadata_cache = None
+45
View File
@@ -874,3 +874,48 @@ class TestCacheInvalidation:
assert result["success"] is True
assert "Reconnected" in result["message"]
assert isinstance(result["drawers"], int)
def test_get_collection_create_true_avoids_get_or_create_on_reopen(
self, monkeypatch, config, palace_path, kg
):
"""Regression for the MCP-server half of #1262.
ChromaDB 1.5.x's Rust bindings SIGSEGV when
``client.get_or_create_collection`` is called with metadata that
differs from the collection's stored metadata. The Stop hook
path (``tool_diary_write`` -> ``_get_collection(create=True)``)
was reaching that codepath on every session-end; #1262 fixed
the equivalent crash class in ``ChromaBackend`` but left this
site untouched. ``_get_collection(create=True)`` must call
``client.get_collection`` first and only fall back to
``client.create_collection`` when the collection does not yet
exist on disk.
"""
_patch_mcp_server(monkeypatch, config, kg)
from mempalace import mcp_server
col1 = mcp_server._get_collection(create=True)
assert col1 is not None
client = mcp_server._client_cache
assert client is not None
# Patch at the class level — chromadb's mtime-change detection
# may rebuild the client between calls, so an instance-level
# spy would not survive.
client_cls = type(client)
calls: list[tuple] = []
def _spy(self, *args, **kwargs):
calls.append((args, kwargs))
raise AssertionError(
"get_or_create_collection must not be called on reopen "
"(SIGSEGV path on metadata mismatch)"
)
monkeypatch.setattr(client_cls, "get_or_create_collection", _spy)
mcp_server._collection_cache = None
col2 = mcp_server._get_collection(create=True)
assert col2 is not None
assert calls == [], f"get_or_create_collection was called: {calls}"