From a3c778210b652221489c49f61a99579749152896 Mon Sep 17 00:00:00 2001 From: jp Date: Sat, 18 Apr 2026 10:00:59 -0700 Subject: [PATCH 1/4] fix(searcher): guard against None metadata in CLI print path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `col.query(...)` can return `None` entries in the inner ``metadatas`` list for drawers whose metadata was never set (older palaces, rows written outside the normal mining path). The CLI `search()` function would render earlier results successfully and then crash mid-loop with: AttributeError: 'NoneType' object has no attribute 'get' at ``searcher.py:286`` — ``meta.get("source_file", "?")``. The user sees partial output followed by a traceback, with no indication of which drawers rendered OK and which were skipped. Guard with ``meta = meta or {}`` inside the loop so entries with missing metadata fall back to the existing ``"?"`` defaults instead of crashing, matching the hit dict assembly in ``search_memories()`` which already uses ``meta.get("wing", "unknown")`` etc. against the same data. Adds a regression test that mocks a ChromaDB result with a ``None`` metadata entry in the middle of the inner list and asserts both result blocks render to stdout. --- mempalace/searcher.py | 1 + tests/test_searcher.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/mempalace/searcher.py b/mempalace/searcher.py index db809d9..ef951e2 100644 --- a/mempalace/searcher.py +++ b/mempalace/searcher.py @@ -283,6 +283,7 @@ def search(query: str, palace_path: str, wing: str = None, room: str = None, n_r for i, (doc, meta, dist) in enumerate(zip(docs, metas, dists), 1): similarity = round(max(0.0, 1 - dist), 3) + meta = meta or {} source = Path(meta.get("source_file", "?")).name wing_name = meta.get("wing", "?") room_name = meta.get("room", "?") diff --git a/tests/test_searcher.py b/tests/test_searcher.py index 11e788d..127e95f 100644 --- a/tests/test_searcher.py +++ b/tests/test_searcher.py @@ -141,3 +141,22 @@ class TestSearchCLI: captured = capsys.readouterr() # Should have output with at least one result block assert "[1]" in captured.out + + def test_search_handles_none_metadata_without_crash(self, palace_path, capsys): + """ChromaDB can return `None` entries in the metadatas list when a + drawer has no metadata. The CLI print path must not crash on them + mid-render — it used to raise `AttributeError: 'NoneType' object has + no attribute 'get'` after printing earlier results.""" + mock_col = MagicMock() + mock_col.query.return_value = { + "documents": [["first doc", "second doc"]], + "metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]], + "distances": [[0.1, 0.2]], + } + with patch("mempalace.searcher.get_collection", return_value=mock_col): + search("anything", "/fake/path") + captured = capsys.readouterr() + assert "[1]" in captured.out + assert "[2]" in captured.out + # Second result renders with fallback '?' values instead of crashing + assert "second doc" in captured.out From feba7e8043113716d4e35b2753bc160f42c54af7 Mon Sep 17 00:00:00 2001 From: jp Date: Sat, 18 Apr 2026 10:26:11 -0700 Subject: [PATCH 2/4] fix(miner): same None-metadata guard for status() histogram loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `status()` walks `col.get(include=["metadatas"])` and buckets each drawer into a `wing_rooms[wing][room]` histogram. The same ChromaDB return shape fixed in the search print path — `None` entries in the `metadatas` list for drawers with no stored metadata — crashes the status command with: AttributeError: 'NoneType' object has no attribute 'get' Applies the matching ``m = m or {}`` guard so None-metadata drawers roll up under the existing `?/?` fallback bucket instead of killing the command mid-tally. Reproduced on a 135K-drawer palace where two drawers had `metadata=None`; both now show under `WING: ? / ROOM: ?` in the tally while the command prints the full histogram as designed. Adds a regression test that feeds `status()` a fake collection whose `get()` returns a `None` in the middle of the metadatas list and asserts both the fallback bucket and the real wing render. --- mempalace/miner.py | 1 + tests/test_miner.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/mempalace/miner.py b/mempalace/miner.py index 18e748c..ae54017 100644 --- a/mempalace/miner.py +++ b/mempalace/miner.py @@ -854,6 +854,7 @@ def status(palace_path: str): wing_rooms = defaultdict(lambda: defaultdict(int)) for m in metas: + m = m or {} wing_rooms[m.get("wing", "?")][m.get("room", "?")] += 1 print(f"\n{'=' * 55}") diff --git a/tests/test_miner.py b/tests/test_miner.py index 18f4e50..0c81dff 100644 --- a/tests/test_miner.py +++ b/tests/test_miner.py @@ -343,6 +343,36 @@ def test_status_missing_palace_does_not_create_empty_collection(tmp_path, capsys assert not palace_path.exists() +def test_status_handles_none_metadata_without_crash(tmp_path, capsys): + """status must not crash when col.get returns a None entry in metadatas. + + Palaces can contain drawers whose metadata was never set (older mining + paths, drawers written by third-party tools). Before the guard, status + crashed mid-tally with ``AttributeError: 'NoneType' object has no + attribute 'get'`` at the wing/room histogram line.""" + from unittest.mock import patch + + class FakeCol: + def count(self): + return 2 + + def get(self, *args, **kwargs): + return { + "ids": ["a", "b"], + "documents": ["doc a", "doc b"], + "metadatas": [{"wing": "proj", "room": "r"}, None], + } + + with patch("mempalace.miner.get_collection", return_value=FakeCol()): + status(str(tmp_path)) + + out = capsys.readouterr().out + # No crash; the None-metadata row is counted under the ?/? fallback + # alongside the real wing=proj row. + assert "WING: ?" in out + assert "WING: proj" in out + + # ── normalize_version schema gate ─────────────────────────────────────── # # When the normalization pipeline changes shape (e.g., strip_noise lands), From 7690574dde9b804040939a9958fd7ce3a57383de Mon Sep 17 00:00:00 2001 From: jp Date: Sat, 18 Apr 2026 10:37:05 -0700 Subject: [PATCH 3/4] fix(searcher): guard API path + closet loop against None metadata too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Copilot review on the CLI-only PR (#999): search_memories() has the same vulnerability in two additional spots, since ChromaDB can return None entries in the inner metadatas list for either the drawer query or the closets query. Without guards, the API path crashes with: AttributeError: 'NoneType' object has no attribute 'get' at either \`cmeta.get("source_file", "")\` in the closet boost lookup or \`meta.get("source_file", "") or ""\` in the drawer scoring loop. Applies the matching \`meta = meta or {}\` / \`cmeta = cmeta or {}\` guard at both sites and adds an API-path regression test that mocks a drawer query result with a None metadata entry and asserts both hits render — the None-metadata hit with the existing \`"unknown"\` sentinel values the scoring loop already writes for missing keys. Verified both the new API test and the existing CLI test fail without the guards (AttributeError) and pass with them. --- mempalace/searcher.py | 2 ++ tests/test_searcher.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/mempalace/searcher.py b/mempalace/searcher.py index ef951e2..c9b5ac3 100644 --- a/mempalace/searcher.py +++ b/mempalace/searcher.py @@ -373,6 +373,7 @@ def search_memories( _first_or_empty(closet_results, "distances"), ) ): + cmeta = cmeta or {} source = cmeta.get("source_file", "") if source and source not in closet_boost_by_source: closet_boost_by_source[source] = (rank, cdist, cdoc[:200]) @@ -395,6 +396,7 @@ def search_memories( if max_distance > 0.0 and dist > max_distance: continue + meta = meta or {} source = meta.get("source_file", "") or "" boost = 0.0 matched_via = "drawer" diff --git a/tests/test_searcher.py b/tests/test_searcher.py index 127e95f..3ccfb2d 100644 --- a/tests/test_searcher.py +++ b/tests/test_searcher.py @@ -89,6 +89,37 @@ class TestSearchMemories: assert result["filters"]["wing"] == "project" assert result["filters"]["room"] == "backend" + def test_search_memories_handles_none_metadata(self): + """API path: `None` entries in the drawer results' metadatas list must + fall back to the sentinel strings (wing/room 'unknown', source '?') + rather than raising `AttributeError: 'NoneType' object has no + attribute 'get'` while the rest of the result set renders.""" + mock_col = MagicMock() + mock_col.query.return_value = { + "documents": [["first doc", "second doc"]], + "metadatas": [[{"source_file": "a.md", "wing": "w", "room": "r"}, None]], + "distances": [[0.1, 0.2]], + "ids": [["d1", "d2"]], + } + + def mock_get_collection(path, create=False): + # First call: drawers. Second call: closets — raise so hybrid + # degrades to pure drawer search (the catch block covers it). + if not hasattr(mock_get_collection, "_called"): + mock_get_collection._called = True + return mock_col + raise RuntimeError("no closets") + + with patch("mempalace.searcher.get_collection", side_effect=mock_get_collection): + result = search_memories("anything", "/fake/path") + assert "results" in result + assert len(result["results"]) == 2 + # The None-metadata hit renders with sentinel values, not a crash. + none_hit = result["results"][1] + assert none_hit["text"] == "second doc" + assert none_hit["wing"] == "unknown" + assert none_hit["room"] == "unknown" + # ── search() (CLI print function) ───────────────────────────────────── From 3f0cfd5ed45712ee09430ea7b4bceb9f104b85ec Mon Sep 17 00:00:00 2001 From: jp Date: Sat, 18 Apr 2026 12:28:03 -0700 Subject: [PATCH 4/4] fix(mcp): guard tool_status/list_wings/list_rooms/get_taxonomy against None metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four more MCP handlers iterate a metadata list and call m.get(...) unconditionally. When the cache contains a None entry (drawers with no metadata, common on older mining paths), the try block catches the AttributeError and marks the response "partial: true" with an error message — visible as {"error": "'NoneType' object has no attribute 'get'", "partial": true} returned from mempalace_status even though the palace data is otherwise fetchable. Same m = m or {} guard we applied to searcher.py (d3a2d22, a51c3c2) and miner.status() (66f08a1). None-metadata drawers now roll up under the existing "unknown" fallback bucket instead of poisoning the response with a misleading partial flag. Regression test: mock the metadata cache with a None in the middle, assert tool_status returns clean counts and no error/partial fields. Verified the test fails without the guard. 998 tests pass. --- mempalace/mcp_server.py | 4 ++++ tests/test_mcp_server.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index 3918a19..06355c4 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -315,6 +315,7 @@ def tool_status(): try: all_meta = _get_cached_metadata(col) for m in all_meta: + m = m or {} w = m.get("wing", "unknown") r = m.get("room", "unknown") wings[w] = wings.get(w, 0) + 1 @@ -368,6 +369,7 @@ def tool_list_wings(): try: all_meta = _get_cached_metadata(col) for m in all_meta: + m = m or {} w = m.get("wing", "unknown") wings[w] = wings.get(w, 0) + 1 except Exception as e: @@ -391,6 +393,7 @@ def tool_list_rooms(wing: str = None): where = {"wing": wing} if wing else None all_meta = _fetch_all_metadata(col, where=where) for m in all_meta: + m = m or {} r = m.get("room", "unknown") rooms[r] = rooms.get(r, 0) + 1 except Exception as e: @@ -409,6 +412,7 @@ def tool_get_taxonomy(): try: all_meta = _get_cached_metadata(col) for m in all_meta: + m = m or {} w = m.get("wing", "unknown") r = m.get("room", "unknown") if w not in taxonomy: diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 09073f5..899e6a7 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -250,6 +250,38 @@ class TestReadTools: assert "project" in result["wings"] assert "notes" in result["wings"] + def test_status_handles_none_metadata_without_partial( + self, monkeypatch, config, palace_path, kg + ): + """tool_status must not crash or go partial when the metadata cache + returns a ``None`` entry — palaces can contain drawers with no + metadata (older mining paths, third-party writes). Before the guard, + ``m.get("wing")`` raised AttributeError mid-tally and the result + carried ``"error"`` + ``"partial": True`` even though the data was + perfectly fetchable.""" + from unittest.mock import patch as _patch + + _patch_mcp_server(monkeypatch, config, kg) + from mempalace.mcp_server import tool_status + + # Inject a metadata cache where one entry is None + with _patch("mempalace.mcp_server._get_collection") as mock_get_col: + fake_col = type("C", (), {"count": lambda self: 2})() + mock_get_col.return_value = fake_col + with _patch( + "mempalace.mcp_server._get_cached_metadata", + return_value=[{"wing": "proj", "room": "r"}, None], + ): + result = tool_status() + + # The None-metadata drawer falls under 'unknown/unknown' — no crash, + # no partial flag. + assert "error" not in result + assert result.get("partial") is not True + assert result["total_drawers"] == 2 + assert result["wings"].get("proj") == 1 + assert result["wings"].get("unknown") == 1 + def test_list_wings(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_list_wings