Merge pull request #999 from jphein/fix/searcher-none-metadata

fix(searcher): guard against None metadata in CLI print path
This commit is contained in:
Ben Sigman
2026-04-18 13:41:52 -07:00
committed by GitHub
6 changed files with 120 additions and 0 deletions
+32
View File
@@ -250,6 +250,38 @@ class TestReadTools:
assert "project" in result["wings"]
assert "notes" in result["wings"]
def test_status_handles_none_metadata_without_partial(
self, monkeypatch, config, palace_path, kg
):
"""tool_status must not crash or go partial when the metadata cache
returns a ``None`` entry — palaces can contain drawers with no
metadata (older mining paths, third-party writes). Before the guard,
``m.get("wing")`` raised AttributeError mid-tally and the result
carried ``"error"`` + ``"partial": True`` even though the data was
perfectly fetchable."""
from unittest.mock import patch as _patch
_patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_status
# Inject a metadata cache where one entry is None
with _patch("mempalace.mcp_server._get_collection") as mock_get_col:
fake_col = type("C", (), {"count": lambda self: 2})()
mock_get_col.return_value = fake_col
with _patch(
"mempalace.mcp_server._get_cached_metadata",
return_value=[{"wing": "proj", "room": "r"}, None],
):
result = tool_status()
# The None-metadata drawer falls under 'unknown/unknown' — no crash,
# no partial flag.
assert "error" not in result
assert result.get("partial") is not True
assert result["total_drawers"] == 2
assert result["wings"].get("proj") == 1
assert result["wings"].get("unknown") == 1
def test_list_wings(self, monkeypatch, config, palace_path, seeded_collection, kg):
_patch_mcp_server(monkeypatch, config, kg)
from mempalace.mcp_server import tool_list_wings
+30
View File
@@ -343,6 +343,36 @@ def test_status_missing_palace_does_not_create_empty_collection(tmp_path, capsys
assert not palace_path.exists()
def test_status_handles_none_metadata_without_crash(tmp_path, capsys):
"""status must not crash when col.get returns a None entry in metadatas.
Palaces can contain drawers whose metadata was never set (older mining
paths, drawers written by third-party tools). Before the guard, status
crashed mid-tally with ``AttributeError: 'NoneType' object has no
attribute 'get'`` at the wing/room histogram line."""
from unittest.mock import patch
class FakeCol:
def count(self):
return 2
def get(self, *args, **kwargs):
return {
"ids": ["a", "b"],
"documents": ["doc a", "doc b"],
"metadatas": [{"wing": "proj", "room": "r"}, None],
}
with patch("mempalace.miner.get_collection", return_value=FakeCol()):
status(str(tmp_path))
out = capsys.readouterr().out
# No crash; the None-metadata row is counted under the ?/? fallback
# alongside the real wing=proj row.
assert "WING: ?" in out
assert "WING: proj" in out
# ── normalize_version schema gate ───────────────────────────────────────
#
# When the normalization pipeline changes shape (e.g., strip_noise lands),
+50
View File
@@ -89,6 +89,37 @@ class TestSearchMemories:
assert result["filters"]["wing"] == "project"
assert result["filters"]["room"] == "backend"
def test_search_memories_handles_none_metadata(self):
"""API path: `None` entries in the drawer results' metadatas list must
fall back to the sentinel strings (wing/room 'unknown', source '?')
rather than raising `AttributeError: 'NoneType' object has no
attribute 'get'` while the rest of the result set renders."""
mock_col = MagicMock()
mock_col.query.return_value = {
"documents": [["first doc", "second doc"]],
"metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]],
"distances": [[0.1, 0.2]],
"ids": [["d1", "d2"]],
}
def mock_get_collection(path, create=False):
# First call: drawers. Second call: closets — raise so hybrid
# degrades to pure drawer search (the catch block covers it).
if not hasattr(mock_get_collection, "_called"):
mock_get_collection._called = True
return mock_col
raise RuntimeError("no closets")
with patch("mempalace.searcher.get_collection", side_effect=mock_get_collection):
result = search_memories("anything", "/fake/path")
assert "results" in result
assert len(result["results"]) == 2
# The None-metadata hit renders with sentinel values, not a crash.
none_hit = result["results"][1]
assert none_hit["text"] == "second doc"
assert none_hit["wing"] == "unknown"
assert none_hit["room"] == "unknown"
# ── search() (CLI print function) ─────────────────────────────────────
@@ -141,3 +172,22 @@ class TestSearchCLI:
captured = capsys.readouterr()
# Should have output with at least one result block
assert "[1]" in captured.out
def test_search_handles_none_metadata_without_crash(self, palace_path, capsys):
"""ChromaDB can return `None` entries in the metadatas list when a
drawer has no metadata. The CLI print path must not crash on them
mid-render — it used to raise `AttributeError: 'NoneType' object has
no attribute 'get'` after printing earlier results."""
mock_col = MagicMock()
mock_col.query.return_value = {
"documents": [["first doc", "second doc"]],
"metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]],
"distances": [[0.1, 0.2]],
}
with patch("mempalace.searcher.get_collection", return_value=mock_col):
search("anything", "/fake/path")
captured = capsys.readouterr()
assert "[1]" in captured.out
assert "[2]" in captured.out
# Second result renders with fallback '?' values instead of crashing
assert "second doc" in captured.out