diff --git a/mempalace/layers.py b/mempalace/layers.py index a61542b..ff2ff1c 100644 --- a/mempalace/layers.py +++ b/mempalace/layers.py @@ -23,7 +23,7 @@ from collections import defaultdict from .config import MempalaceConfig from .palace import get_collection as _get_collection -from .searcher import build_where_filter +from .searcher import _first_or_empty, build_where_filter # --------------------------------------------------------------------------- @@ -272,9 +272,9 @@ class Layer3: except Exception as e: return f"Search error: {e}" - docs = results["documents"][0] - metas = results["metadatas"][0] - dists = results["distances"][0] + docs = _first_or_empty(results, "documents") + metas = _first_or_empty(results, "metadatas") + dists = _first_or_empty(results, "distances") if not docs: return "No results found." @@ -323,9 +323,9 @@ class Layer3: hits = [] for doc, meta, dist in zip( - results["documents"][0], - results["metadatas"][0], - results["distances"][0], + _first_or_empty(results, "documents"), + _first_or_empty(results, "metadatas"), + _first_or_empty(results, "distances"), ): hits.append( { diff --git a/mempalace/searcher.py b/mempalace/searcher.py index dea300d..57d851f 100644 --- a/mempalace/searcher.py +++ b/mempalace/searcher.py @@ -30,6 +30,20 @@ class SearchError(Exception): _TOKEN_RE = re.compile(r"\w{2,}", re.UNICODE) +def _first_or_empty(results: dict, key: str) -> list: + """Return the first inner list of a ChromaDB query result, or []. + + ChromaDB returns shapes like ``{"documents": [["a", "b"]], ...}`` for a + successful query, but ``{"documents": [], ...}`` (empty outer list) when + the collection is empty or the filter excludes everything. Indexing + ``[0]`` blindly raises IndexError in that case (issue #195). + """ + outer = results.get(key) + if not outer: + return [] + return outer[0] or [] + + def _tokenize(text: str) -> list: """Lowercase + strip to alphanumeric tokens of length ≥ 2.""" return _TOKEN_RE.findall(text.lower()) @@ -251,9 +265,9 @@ def search(query: str, palace_path: str, wing: str = None, room: str = None, n_r print(f"\n Search error: {e}") raise SearchError(f"Search error: {e}") from e - docs = results["documents"][0] - metas = results["metadatas"][0] - dists = results["distances"][0] + docs = _first_or_empty(results, "documents") + metas = _first_or_empty(results, "metadatas") + dists = _first_or_empty(results, "distances") if not docs: print(f'\n No results found for: "{query}"') @@ -353,9 +367,9 @@ def search_memories( closet_results = closets_col.query(**ckwargs) for rank, (cdoc, cmeta, cdist) in enumerate( zip( - closet_results["documents"][0], - closet_results["metadatas"][0], - closet_results["distances"][0], + _first_or_empty(closet_results, "documents"), + _first_or_empty(closet_results, "metadatas"), + _first_or_empty(closet_results, "distances"), ) ): source = cmeta.get("source_file", "") @@ -372,9 +386,9 @@ def search_memories( scored: list = [] for doc, meta, dist in zip( - drawer_results["documents"][0], - drawer_results["metadatas"][0], - drawer_results["distances"][0], + _first_or_empty(drawer_results, "documents"), + _first_or_empty(drawer_results, "metadatas"), + _first_or_empty(drawer_results, "distances"), ): # Filter on raw distance before rounding to avoid precision loss. if max_distance > 0.0 and dist > max_distance: @@ -482,6 +496,6 @@ def search_memories( return { "query": query, "filters": {"wing": wing, "room": room}, - "total_before_filter": len(drawer_results["documents"][0]), + "total_before_filter": len(_first_or_empty(drawer_results, "documents")), "results": hits, } diff --git a/tests/test_empty_chromadb_results.py b/tests/test_empty_chromadb_results.py new file mode 100644 index 0000000..2fed342 --- /dev/null +++ b/tests/test_empty_chromadb_results.py @@ -0,0 +1,48 @@ +"""Regression tests for issue #195 — IndexError on empty ChromaDB results. + +Before the fix, `searcher.search()`, `searcher.search_memories()`, and +`Layer3.search()` indexed `results["documents"][0]` without checking the +outer list, so a query against an empty collection (or a wing/room +filter that excluded everything) crashed with IndexError instead of +returning a graceful "no results" response. +""" + +import pytest + +from mempalace.searcher import _first_or_empty + + +def test_first_or_empty_handles_empty_outer_list(): + """The shape ChromaDB returns from an empty collection (issue #195).""" + results = {"documents": [], "metadatas": [], "distances": []} + assert _first_or_empty(results, "documents") == [] + assert _first_or_empty(results, "metadatas") == [] + assert _first_or_empty(results, "distances") == [] + + +def test_first_or_empty_handles_outer_with_empty_inner(): + """ChromaDB also returns ``{"documents": [[]]}`` in some versions — + must yield [] either way.""" + assert _first_or_empty({"documents": [[]]}, "documents") == [] + + +def test_first_or_empty_handles_missing_key(): + assert _first_or_empty({}, "documents") == [] + + +def test_first_or_empty_handles_none_inner(): + """``[None]`` (unusual but observed) must not blow up.""" + assert _first_or_empty({"documents": [None]}, "documents") == [] + + +def test_first_or_empty_returns_inner_list_for_normal_result(): + results = {"documents": [["a", "b", "c"]]} + assert _first_or_empty(results, "documents") == ["a", "b", "c"] + + +def test_raw_indexing_still_raises_to_document_the_bug(): + """Document the original failure mode so future readers understand + why _first_or_empty exists.""" + results = {"documents": []} + with pytest.raises(IndexError): + _ = results["documents"][0]