From feba7e8043113716d4e35b2753bc160f42c54af7 Mon Sep 17 00:00:00 2001 From: jp Date: Sat, 18 Apr 2026 10:26:11 -0700 Subject: [PATCH] fix(miner): same None-metadata guard for status() histogram loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `status()` walks `col.get(include=["metadatas"])` and buckets each drawer into a `wing_rooms[wing][room]` histogram. The same ChromaDB return shape fixed in the search print path — `None` entries in the `metadatas` list for drawers with no stored metadata — crashes the status command with: AttributeError: 'NoneType' object has no attribute 'get' Applies the matching ``m = m or {}`` guard so None-metadata drawers roll up under the existing `?/?` fallback bucket instead of killing the command mid-tally. Reproduced on a 135K-drawer palace where two drawers had `metadata=None`; both now show under `WING: ? / ROOM: ?` in the tally while the command prints the full histogram as designed. Adds a regression test that feeds `status()` a fake collection whose `get()` returns a `None` in the middle of the metadatas list and asserts both the fallback bucket and the real wing render. --- mempalace/miner.py | 1 + tests/test_miner.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/mempalace/miner.py b/mempalace/miner.py index 18e748c..ae54017 100644 --- a/mempalace/miner.py +++ b/mempalace/miner.py @@ -854,6 +854,7 @@ def status(palace_path: str): wing_rooms = defaultdict(lambda: defaultdict(int)) for m in metas: + m = m or {} wing_rooms[m.get("wing", "?")][m.get("room", "?")] += 1 print(f"\n{'=' * 55}") diff --git a/tests/test_miner.py b/tests/test_miner.py index 18f4e50..0c81dff 100644 --- a/tests/test_miner.py +++ b/tests/test_miner.py @@ -343,6 +343,36 @@ def test_status_missing_palace_does_not_create_empty_collection(tmp_path, capsys assert not palace_path.exists() +def test_status_handles_none_metadata_without_crash(tmp_path, capsys): + """status must not crash when col.get returns a None entry in metadatas. + + Palaces can contain drawers whose metadata was never set (older mining + paths, drawers written by third-party tools). Before the guard, status + crashed mid-tally with ``AttributeError: 'NoneType' object has no + attribute 'get'`` at the wing/room histogram line.""" + from unittest.mock import patch + + class FakeCol: + def count(self): + return 2 + + def get(self, *args, **kwargs): + return { + "ids": ["a", "b"], + "documents": ["doc a", "doc b"], + "metadatas": [{"wing": "proj", "room": "r"}, None], + } + + with patch("mempalace.miner.get_collection", return_value=FakeCol()): + status(str(tmp_path)) + + out = capsys.readouterr().out + # No crash; the None-metadata row is counted under the ?/? fallback + # alongside the real wing=proj row. + assert "WING: ?" in out + assert "WING: proj" in out + + # ── normalize_version schema gate ─────────────────────────────────────── # # When the normalization pipeline changes shape (e.g., strip_noise lands),