fix(searcher): guard against empty ChromaDB query results (#195) (#865)

Fixes #195.

When ChromaDB returns no documents (empty palace, or wing/room filter
that excludes everything), it returns the shape:

    {"documents": [], "metadatas": [], "distances": []}

Indexing `results["documents"][0]` blindly raises IndexError instead of
the expected 'no results' response. Affected: searcher.search(),
searcher.search_memories() (drawer + closet branches plus the
total_before_filter aggregate), and Layer3.search() / Layer3.search_raw().

Adds a tiny private helper `searcher._first_or_empty(results, key)` that
safely extracts the inner list, returning [] for any of: missing key,
empty outer list, [None], or [[]]. layers.py imports the same helper to
avoid duplicating the guard.

Tests: tests/test_empty_chromadb_results.py covers all observed shapes
plus a documentation-style test that pins the original IndexError so
future readers understand why the helper exists.
This commit is contained in:
Arnold Wender
2026-04-15 09:26:38 +02:00
committed by GitHub
parent 54a386d925
commit 6a73eb2e20
3 changed files with 79 additions and 17 deletions
+48
View File
@@ -0,0 +1,48 @@
"""Regression tests for issue #195 — IndexError on empty ChromaDB results.
Before the fix, `searcher.search()`, `searcher.search_memories()`, and
`Layer3.search()` indexed `results["documents"][0]` without checking the
outer list, so a query against an empty collection (or a wing/room
filter that excluded everything) crashed with IndexError instead of
returning a graceful "no results" response.
"""
import pytest
from mempalace.searcher import _first_or_empty
def test_first_or_empty_handles_empty_outer_list():
"""The shape ChromaDB returns from an empty collection (issue #195)."""
results = {"documents": [], "metadatas": [], "distances": []}
assert _first_or_empty(results, "documents") == []
assert _first_or_empty(results, "metadatas") == []
assert _first_or_empty(results, "distances") == []
def test_first_or_empty_handles_outer_with_empty_inner():
"""ChromaDB also returns ``{"documents": [[]]}`` in some versions —
must yield [] either way."""
assert _first_or_empty({"documents": [[]]}, "documents") == []
def test_first_or_empty_handles_missing_key():
assert _first_or_empty({}, "documents") == []
def test_first_or_empty_handles_none_inner():
"""``[None]`` (unusual but observed) must not blow up."""
assert _first_or_empty({"documents": [None]}, "documents") == []
def test_first_or_empty_returns_inner_list_for_normal_result():
results = {"documents": [["a", "b", "c"]]}
assert _first_or_empty(results, "documents") == ["a", "b", "c"]
def test_raw_indexing_still_raises_to_document_the_bug():
"""Document the original failure mode so future readers understand
why _first_or_empty exists."""
results = {"documents": []}
with pytest.raises(IndexError):
_ = results["documents"][0]