diff --git a/mempalace/backends/chroma.py b/mempalace/backends/chroma.py index d43b884..13976a2 100644 --- a/mempalace/backends/chroma.py +++ b/mempalace/backends/chroma.py @@ -4,6 +4,7 @@ import datetime as _dt import logging import os import sqlite3 +from pathlib import Path from typing import Any, Optional import chromadb @@ -239,6 +240,9 @@ def _pin_hnsw_threads(collection) -> None: logger.debug("_pin_hnsw_threads modify failed", exc_info=True) +_BLOB_FIX_MARKER = ".blob_seq_ids_migrated" + + def _fix_blob_seq_ids(palace_path: str) -> None: """Fix ChromaDB 0.6.x -> 1.5.x migration bug: BLOB seq_ids -> INTEGER. @@ -248,10 +252,19 @@ def _fix_blob_seq_ids(palace_path: str) -> None: type INTEGER) is not compatible with SQL type BLOB". Must run BEFORE PersistentClient is created (the compactor fires on init). + + Opening a Python sqlite3 connection against a ChromaDB 1.5.x WAL-mode + database leaves state that segfaults the next PersistentClient call. After + the migration has run once successfully, a marker file is written so + subsequent opens skip the sqlite connection entirely. Already-migrated + palaces can touch the marker manually to opt into the fast path. """ db_path = os.path.join(palace_path, "chroma.sqlite3") if not os.path.isfile(db_path): return + marker = os.path.join(palace_path, _BLOB_FIX_MARKER) + if os.path.isfile(marker): + return try: with sqlite3.connect(db_path) as conn: for table in ("embeddings", "max_seq_id"): @@ -269,6 +282,14 @@ def _fix_blob_seq_ids(palace_path: str) -> None: conn.commit() except Exception: logger.exception("Could not fix BLOB seq_ids in %s", db_path) + return + # Write marker whether or not rows needed migration — the palace is now + # confirmed to be in the INTEGER-seq_id state and future opens can skip the + # sqlite3.connect() entirely. + try: + Path(marker).touch() + except OSError: + logger.exception("Could not write migration marker %s", marker) # --------------------------------------------------------------------------- diff --git a/tests/test_backends.py b/tests/test_backends.py index e632bdc..9fe5ca1 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -382,6 +382,72 @@ def test_fix_blob_seq_ids_noop_without_database(tmp_path): _fix_blob_seq_ids(str(tmp_path)) # should not raise +def test_fix_blob_seq_ids_writes_marker_after_blob_path(tmp_path): + """The .blob_seq_ids_migrated marker is written after a successful BLOB → INTEGER conversion.""" + from mempalace.backends.chroma import _BLOB_FIX_MARKER + + db_path = tmp_path / "chroma.sqlite3" + conn = sqlite3.connect(str(db_path)) + conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id)") + conn.execute("INSERT INTO embeddings (seq_id) VALUES (?)", ((42).to_bytes(8, "big"),)) + conn.commit() + conn.close() + + marker = tmp_path / _BLOB_FIX_MARKER + assert not marker.exists() + + _fix_blob_seq_ids(str(tmp_path)) + + assert marker.is_file(), "marker must be written after a successful migration" + + +def test_fix_blob_seq_ids_writes_marker_when_already_integer(tmp_path): + """The marker is written even when the migration is a no-op (already INTEGER). + + The point of the marker is to skip the sqlite3 open on subsequent calls, + not to record that a conversion happened. So a clean palace gets the + marker on first run too — next ``_fix_blob_seq_ids`` call short-circuits + before touching the sqlite3 file. + """ + from mempalace.backends.chroma import _BLOB_FIX_MARKER + + db_path = tmp_path / "chroma.sqlite3" + conn = sqlite3.connect(str(db_path)) + conn.execute("CREATE TABLE embeddings (rowid INTEGER PRIMARY KEY, seq_id INTEGER)") + conn.execute("INSERT INTO embeddings (seq_id) VALUES (42)") + conn.commit() + conn.close() + + marker = tmp_path / _BLOB_FIX_MARKER + assert not marker.exists() + + _fix_blob_seq_ids(str(tmp_path)) + + assert marker.is_file(), "marker must be written even when no BLOBs found" + + +def test_fix_blob_seq_ids_skips_sqlite_when_marker_present(tmp_path): + """When the marker exists, ``_fix_blob_seq_ids`` does not open sqlite3. + + This is the load-bearing property of the marker — opening Python's + sqlite3 against a live ChromaDB 1.5.x WAL DB corrupts the next + PersistentClient call (#1090). Once a palace has been migrated, we + never want to open it again, even read-only. + """ + from unittest.mock import patch + from mempalace.backends.chroma import _BLOB_FIX_MARKER + + # Pre-create the marker so the function should short-circuit. + db_path = tmp_path / "chroma.sqlite3" + db_path.write_bytes(b"sentinel") # presence required for the function to proceed + (tmp_path / _BLOB_FIX_MARKER).touch() + + with patch("mempalace.backends.chroma.sqlite3.connect") as mock_connect: + _fix_blob_seq_ids(str(tmp_path)) + + mock_connect.assert_not_called() + + # ── quarantine_stale_hnsw ─────────────────────────────────────────────────