fix(mcp_server): pass embedding_function= on collection reopen (#1299)
`mcp_server._get_collection` bypassed `ChromaBackend.get_collection` and called `client.get_collection` / `client.create_collection` without `embedding_function=`. ChromaDB 1.x does not persist the EF identity with the collection, so the MCP server's reopen silently bound chromadb's built-in `DefaultEmbeddingFunction` while the miner / Stop hook ingest path bound `mempalace.embedding.get_embedding_function()`. On bleeding-edge interpreters (python 3.14 + chromadb 1.5.x on Apple Silicon, per #1299) the default EF's lazy ONNX provider selection could SIGSEGV the host process on first `col.add()`, killing the MCP stdio server and leaving every subsequent tool call returning `Connection closed` until Claude Code was relaunched. Reads worked because `col.get(ids=...)` and metadata fetches don't invoke the EF; the auto-ingest path worked because mining routes through the backend abstraction. Diary writes were the consistent failure surface. Resolve the EF up front (matching `ChromaBackend._resolve_embedding_function`) and pass it into both reopen branches. Falls back to the chromadb default only if `mempalace.embedding.get_embedding_function` itself raises. Regression test patches the chromadb client class to capture `embedding_function=` on every `get_collection` / `create_collection` call from `_get_collection(create=True)` and `_get_collection()`, and fails if any call omits it. Follow-up to #1262 / #1289 (which fixed the metadata-mismatch SIGSEGV path); this addresses the EF-mismatch SIGSEGV path on the same surface.
This commit is contained in:
@@ -919,3 +919,59 @@ class TestCacheInvalidation:
|
||||
col2 = mcp_server._get_collection(create=True)
|
||||
assert col2 is not None
|
||||
assert calls == [], f"get_or_create_collection was called: {calls}"
|
||||
|
||||
def test_get_collection_passes_embedding_function(self, monkeypatch, config, palace_path, kg):
|
||||
"""Regression for #1299.
|
||||
|
||||
``mcp_server._get_collection`` must pass ``embedding_function=`` into
|
||||
both ``client.get_collection`` and ``client.create_collection``,
|
||||
mirroring ``ChromaBackend.get_collection``. Without it, ChromaDB 1.x
|
||||
falls back to its built-in ``DefaultEmbeddingFunction`` (whose lazy
|
||||
ONNX provider selection has SIGSEGV'd on python 3.14 + Apple Silicon),
|
||||
and writers/readers can disagree with the miner about which EF is
|
||||
bound to the collection. The miner / Stop hook ingest path routes
|
||||
through ``ChromaBackend.get_collection`` which does this correctly;
|
||||
the MCP server must match.
|
||||
"""
|
||||
_patch_mcp_server(monkeypatch, config, kg)
|
||||
from mempalace import mcp_server
|
||||
|
||||
client = mcp_server._get_client()
|
||||
client_cls = type(client)
|
||||
captured: dict[str, list[dict]] = {"get": [], "create": []}
|
||||
real_get = client_cls.get_collection
|
||||
real_create = client_cls.create_collection
|
||||
|
||||
def _spy_get(self, name, **kwargs):
|
||||
captured["get"].append(dict(kwargs))
|
||||
return real_get(self, name, **kwargs)
|
||||
|
||||
def _spy_create(self, name, **kwargs):
|
||||
captured["create"].append(dict(kwargs))
|
||||
return real_create(self, name, **kwargs)
|
||||
|
||||
monkeypatch.setattr(client_cls, "get_collection", _spy_get)
|
||||
monkeypatch.setattr(client_cls, "create_collection", _spy_create)
|
||||
mcp_server._collection_cache = None
|
||||
|
||||
col = mcp_server._get_collection(create=True)
|
||||
assert col is not None
|
||||
|
||||
all_calls = captured["get"] + captured["create"]
|
||||
assert all_calls, "expected get_collection or create_collection to be called"
|
||||
for kwargs in all_calls:
|
||||
assert (
|
||||
"embedding_function" in kwargs
|
||||
), f"missing embedding_function= in chromadb call: {kwargs}"
|
||||
assert kwargs["embedding_function"] is not None
|
||||
|
||||
# Same expectation on the create=False (cache-miss) reopen path.
|
||||
mcp_server._collection_cache = None
|
||||
captured["get"].clear()
|
||||
captured["create"].clear()
|
||||
col2 = mcp_server._get_collection()
|
||||
assert col2 is not None
|
||||
assert captured["get"], "expected get_collection on cache-miss reopen"
|
||||
for kwargs in captured["get"]:
|
||||
assert "embedding_function" in kwargs
|
||||
assert kwargs["embedding_function"] is not None
|
||||
|
||||
Reference in New Issue
Block a user