merge: pr/cross-wing-tunnels + rebuild drawer-grep on hardened closet path
Merges the full hardened stack (#788 closets, #789 entity/BM25/diary, #790 tunnels) and reimplements the drawer-grep feature in a way that composes with the chunk-level closet-first search instead of fighting it. ## Background The original PR added "drawer-grep" on top of the pre-hardening closet code that returned whole-file blobs. My #788 hardening changed that path to return *chunk-level* hits by parsing each closet's ``→drawer_id`` pointers and hydrating exactly those drawers. That made the original drawer-grep grep-over-all-drawers logic redundant — the closet already points at the relevant chunk. What remained valuable from the original PR was the *context expansion* idea: a chunk boundary can clip a thought mid-stride (matched chunk says "here's a breakdown:" and the breakdown lives in the next chunk), so callers want ±1 neighbor chunks for free rather than a follow-up get_drawer call. ## Change New ``_expand_with_neighbors(drawers_col, doc, meta, radius=1)`` helper in searcher.py: * Reads ``source_file`` + ``chunk_index`` from the matched drawer's metadata. * Fetches the ±radius sibling chunks in a SINGLE ChromaDB query using ``$and + $in`` — no "fetch all drawers for source" blowup. * Sorts retrieved chunks by chunk_index, joins with ``\n\n``. * Does a cheap metadata-only second query to compute ``total_drawers`` so callers know where in the file they landed. * Graceful fallback to the matched doc alone on any ChromaDB failure or missing metadata — search never breaks because expansion failed. ``_closet_first_hits`` now calls this helper and tags each hit with ``drawer_index`` + ``total_drawers``. Hit shape stays consistent with the direct-search path (both still carry ``matched_via``) so callers can't tell which path produced a given hit except via that field. ## Tests 6 new cases in TestDrawerGrepExpansion: * neighbors returned in chunk_index order (not hash order) * edge case: matched chunk at index 0 — only next neighbor surfaces * edge case: matched chunk at last index — only prev neighbor surfaces * edge case: 1-drawer file — returns just the matched doc * missing/non-int chunk_index metadata — graceful fallback * end-to-end via ``search_memories`` — closet-first hit carries drawer_index, total_drawers, and includes ±1 neighbors 761/761 suite pass; ruff + format clean on CI-pinned 0.4.x. Merge resolutions: miner.py kept develop's purge+NORMALIZE_VERSION; searcher.py dropped the old whole-file-blob block entirely in favor of rebuilding context expansion on top of ``_closet_first_hits``; test_closets.py took develop's 47-test baseline and appended TestDrawerGrepExpansion.
This commit is contained in:
+770
-117
File diff suppressed because it is too large
Load Diff
@@ -75,3 +75,86 @@ def test_mine_convos_does_not_reprocess_empty_chunk_files(capsys):
|
||||
assert "Files skipped (already filed): 1" in out2
|
||||
finally:
|
||||
shutil.rmtree(tmpdir, ignore_errors=True)
|
||||
|
||||
|
||||
def test_mine_convos_rebuilds_stale_drawers_after_schema_bump(capsys):
|
||||
"""When stored drawers have an older normalize_version, the next mine
|
||||
silently purges them and refiles — no manual erase required.
|
||||
|
||||
This is what makes the strip_noise upgrade apply to existing corpora:
|
||||
users just run `mempalace mine` again and old noise-filled drawers get
|
||||
replaced with clean ones."""
|
||||
from mempalace.palace import NORMALIZE_VERSION
|
||||
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
convo_path = Path(tmpdir) / "chat.txt"
|
||||
convo_path.write_text(
|
||||
"> What is memory?\nMemory is persistence.\n\n"
|
||||
"> Why does it matter?\nIt enables continuity.\n\n"
|
||||
"> How do we build it?\nWith structured storage.\n"
|
||||
)
|
||||
palace_path = os.path.join(tmpdir, "palace")
|
||||
|
||||
# First mine — stamps drawers with NORMALIZE_VERSION
|
||||
mine_convos(tmpdir, palace_path, wing="test")
|
||||
capsys.readouterr()
|
||||
|
||||
client = chromadb.PersistentClient(path=palace_path)
|
||||
col = client.get_collection("mempalace_drawers")
|
||||
resolved = str(Path(tmpdir).resolve() / "chat.txt")
|
||||
first_pass = col.get(where={"source_file": resolved})
|
||||
first_ids = set(first_pass["ids"])
|
||||
assert first_ids, "first mine should produce drawers"
|
||||
for meta in first_pass["metadatas"]:
|
||||
assert meta.get("normalize_version") == NORMALIZE_VERSION
|
||||
|
||||
# Simulate pre-v2 drawers: rewrite metadata to an older version,
|
||||
# and replace content with "noise" so we can see it get cleaned up.
|
||||
stale_metas = []
|
||||
for meta in first_pass["metadatas"]:
|
||||
stale = dict(meta)
|
||||
stale["normalize_version"] = 1
|
||||
stale_metas.append(stale)
|
||||
col.update(
|
||||
ids=list(first_pass["ids"]),
|
||||
documents=["STALE NOISE"] * len(first_pass["ids"]),
|
||||
metadatas=stale_metas,
|
||||
)
|
||||
# Add an extra orphan drawer that should also be purged.
|
||||
col.add(
|
||||
ids=["orphan_drawer"],
|
||||
documents=["OLD ORPHAN"],
|
||||
metadatas=[
|
||||
{
|
||||
"wing": "test",
|
||||
"room": "default",
|
||||
"source_file": resolved,
|
||||
"chunk_index": 999,
|
||||
"normalize_version": 1,
|
||||
}
|
||||
],
|
||||
)
|
||||
del col, client
|
||||
|
||||
# Second mine — version gate should trigger rebuild
|
||||
mine_convos(tmpdir, palace_path, wing="test")
|
||||
out = capsys.readouterr().out
|
||||
assert (
|
||||
"Files skipped (already filed): 0" in out
|
||||
), "stale drawers should force a rebuild, not a skip"
|
||||
|
||||
client = chromadb.PersistentClient(path=palace_path)
|
||||
col = client.get_collection("mempalace_drawers")
|
||||
rebuilt = col.get(where={"source_file": resolved})
|
||||
# Orphan is gone
|
||||
assert "orphan_drawer" not in rebuilt["ids"]
|
||||
# No stale content survived
|
||||
assert all("STALE NOISE" not in d for d in rebuilt["documents"])
|
||||
assert all("OLD ORPHAN" not in d for d in rebuilt["documents"])
|
||||
# All rebuilt drawers carry the current version
|
||||
for meta in rebuilt["metadatas"]:
|
||||
assert meta.get("normalize_version") == NORMALIZE_VERSION
|
||||
del col, client
|
||||
finally:
|
||||
shutil.rmtree(tmpdir, ignore_errors=True)
|
||||
|
||||
@@ -6,6 +6,7 @@ dispatch layer (integration-level). Uses isolated palace + KG fixtures
|
||||
via monkeypatch to avoid touching real data.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
import json
|
||||
import sys
|
||||
|
||||
@@ -643,6 +644,48 @@ class TestDiaryTools:
|
||||
r = tool_diary_read(agent_name="Nobody")
|
||||
assert r["entries"] == []
|
||||
|
||||
def test_diary_write_same_second_shared_prefix_no_collision(
|
||||
self, monkeypatch, config, palace_path, kg
|
||||
):
|
||||
_patch_mcp_server(monkeypatch, config, kg)
|
||||
_client, _col = _get_collection(palace_path, create=True)
|
||||
del _client
|
||||
|
||||
from mempalace import mcp_server
|
||||
|
||||
class FrozenDateTime:
|
||||
calls = [
|
||||
datetime(2026, 4, 13, 22, 15, 30, 123456),
|
||||
datetime(2026, 4, 13, 22, 15, 30, 123457),
|
||||
]
|
||||
fallback = datetime(2026, 4, 13, 22, 15, 30, 123457)
|
||||
|
||||
@classmethod
|
||||
def now(cls):
|
||||
if cls.calls:
|
||||
return cls.calls.pop(0)
|
||||
return cls.fallback
|
||||
|
||||
monkeypatch.setattr(mcp_server, "datetime", FrozenDateTime)
|
||||
|
||||
from mempalace.mcp_server import tool_diary_read, tool_diary_write
|
||||
|
||||
entry1 = "A" * 50 + " entry one"
|
||||
entry2 = "A" * 50 + " entry two"
|
||||
|
||||
result1 = tool_diary_write(agent_name="TestAgent", entry=entry1, topic="status")
|
||||
result2 = tool_diary_write(agent_name="TestAgent", entry=entry2, topic="status")
|
||||
|
||||
assert result1["success"] is True
|
||||
assert result2["success"] is True
|
||||
assert result1["entry_id"] != result2["entry_id"]
|
||||
|
||||
read_result = tool_diary_read(agent_name="TestAgent")
|
||||
contents = [entry["content"] for entry in read_result["entries"]]
|
||||
assert read_result["total"] == 2
|
||||
assert entry1 in contents
|
||||
assert entry2 in contents
|
||||
|
||||
|
||||
# ── Cache Invalidation (inode/mtime) ──────────────────────────────────
|
||||
|
||||
|
||||
+90
-4
@@ -7,7 +7,7 @@ import chromadb
|
||||
import yaml
|
||||
|
||||
from mempalace.miner import mine, scan_project, status
|
||||
from mempalace.palace import file_already_mined
|
||||
from mempalace.palace import NORMALIZE_VERSION, file_already_mined
|
||||
|
||||
|
||||
def write_file(path: Path, content: str):
|
||||
@@ -227,11 +227,17 @@ def test_file_already_mined_check_mtime():
|
||||
assert file_already_mined(col, test_file) is False
|
||||
assert file_already_mined(col, test_file, check_mtime=True) is False
|
||||
|
||||
# Add it with mtime
|
||||
# Add it with mtime + current normalize_version
|
||||
col.add(
|
||||
ids=["d1"],
|
||||
documents=["hello world"],
|
||||
metadatas=[{"source_file": test_file, "source_mtime": str(mtime)}],
|
||||
metadatas=[
|
||||
{
|
||||
"source_file": test_file,
|
||||
"source_mtime": str(mtime),
|
||||
"normalize_version": NORMALIZE_VERSION,
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
# Already mined (no mtime check)
|
||||
@@ -253,7 +259,12 @@ def test_file_already_mined_check_mtime():
|
||||
col.add(
|
||||
ids=["d2"],
|
||||
documents=["other"],
|
||||
metadatas=[{"source_file": "/fake/no_mtime.txt"}],
|
||||
metadatas=[
|
||||
{
|
||||
"source_file": "/fake/no_mtime.txt",
|
||||
"normalize_version": NORMALIZE_VERSION,
|
||||
}
|
||||
],
|
||||
)
|
||||
assert file_already_mined(col, "/fake/no_mtime.txt", check_mtime=True) is False
|
||||
finally:
|
||||
@@ -296,3 +307,78 @@ def test_status_missing_palace_does_not_create_empty_collection(tmp_path, capsys
|
||||
out = capsys.readouterr().out
|
||||
assert "No palace found" in out
|
||||
assert not palace_path.exists()
|
||||
|
||||
|
||||
# ── normalize_version schema gate ───────────────────────────────────────
|
||||
#
|
||||
# When the normalization pipeline changes shape (e.g., strip_noise lands),
|
||||
# `NORMALIZE_VERSION` is bumped so pre-existing drawers can be silently
|
||||
# rebuilt on the next mine. These tests pin that contract.
|
||||
|
||||
|
||||
def test_file_already_mined_returns_false_for_stale_normalize_version():
|
||||
"""Pre-v2 drawers (no field, or older integer) must not short-circuit."""
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
palace_path = os.path.join(tmpdir, "palace")
|
||||
os.makedirs(palace_path)
|
||||
client = chromadb.PersistentClient(path=palace_path)
|
||||
col = client.get_or_create_collection("mempalace_drawers")
|
||||
|
||||
# Pre-v2 drawer: no normalize_version field at all
|
||||
col.add(
|
||||
ids=["d_old"],
|
||||
documents=["old"],
|
||||
metadatas=[{"source_file": "/fake/old.jsonl"}],
|
||||
)
|
||||
assert file_already_mined(col, "/fake/old.jsonl") is False
|
||||
|
||||
# Explicitly older version
|
||||
col.add(
|
||||
ids=["d_v1"],
|
||||
documents=["v1"],
|
||||
metadatas=[{"source_file": "/fake/v1.jsonl", "normalize_version": 1}],
|
||||
)
|
||||
assert file_already_mined(col, "/fake/v1.jsonl") is False
|
||||
|
||||
# Current version — short-circuits
|
||||
col.add(
|
||||
ids=["d_current"],
|
||||
documents=["cur"],
|
||||
metadatas=[
|
||||
{
|
||||
"source_file": "/fake/current.jsonl",
|
||||
"normalize_version": NORMALIZE_VERSION,
|
||||
}
|
||||
],
|
||||
)
|
||||
assert file_already_mined(col, "/fake/current.jsonl") is True
|
||||
finally:
|
||||
del col, client
|
||||
shutil.rmtree(tmpdir, ignore_errors=True)
|
||||
|
||||
|
||||
def test_add_drawer_stamps_normalize_version(tmp_path):
|
||||
"""Fresh drawers carry the current schema version so future upgrades work."""
|
||||
from mempalace.miner import add_drawer
|
||||
|
||||
palace_path = tmp_path / "palace"
|
||||
palace_path.mkdir()
|
||||
client = chromadb.PersistentClient(path=str(palace_path))
|
||||
col = client.get_or_create_collection("mempalace_drawers")
|
||||
try:
|
||||
added = add_drawer(
|
||||
collection=col,
|
||||
wing="test",
|
||||
room="notes",
|
||||
content="hello",
|
||||
source_file=str(tmp_path / "src.md"),
|
||||
chunk_index=0,
|
||||
agent="unit",
|
||||
)
|
||||
assert added is True
|
||||
stored = col.get(limit=1)
|
||||
meta = stored["metadatas"][0]
|
||||
assert meta["normalize_version"] == NORMALIZE_VERSION
|
||||
finally:
|
||||
del col, client
|
||||
|
||||
@@ -13,6 +13,7 @@ from mempalace.normalize import (
|
||||
_try_normalize_json,
|
||||
_try_slack_json,
|
||||
normalize,
|
||||
strip_noise,
|
||||
)
|
||||
|
||||
|
||||
@@ -1048,3 +1049,148 @@ def test_normalize_rejects_large_file():
|
||||
assert False, "Should have raised IOError"
|
||||
except IOError as e:
|
||||
assert "too large" in str(e).lower()
|
||||
|
||||
|
||||
# ── strip_noise() — verbatim-safety boundary tests ─────────────────────
|
||||
#
|
||||
# The "Verbatim always" design principle requires that we never delete
|
||||
# user-authored text. These tests pin down the boundary between system
|
||||
# noise (which we strip) and user prose that happens to mention the same
|
||||
# strings (which must survive untouched).
|
||||
|
||||
|
||||
class TestStripNoisePreservesUserContent:
|
||||
"""User prose that mentions noise strings inline must be preserved."""
|
||||
|
||||
def test_user_discusses_stop_hook_in_prose(self):
|
||||
# Regression: original regex with IGNORECASE + `.*\n?` ate the second
|
||||
# sentence from real user commentary.
|
||||
text = (
|
||||
"> User:\n"
|
||||
"> Our CI has a stop hook that rejects merges after 5pm. "
|
||||
"Ran 2 stop hooks last week.\n"
|
||||
"> Assistant:\n"
|
||||
"> Got it."
|
||||
)
|
||||
assert strip_noise(text) == text.strip()
|
||||
|
||||
def test_user_mentions_system_reminder_inline(self):
|
||||
# Inline <system-reminder> tags inside user prose (e.g. documenting
|
||||
# Claude Code behavior) must not be stripped.
|
||||
text = (
|
||||
"> User:\n"
|
||||
"> Here is what Claude Code emits: "
|
||||
"<system-reminder>Auto-save reminder...</system-reminder>"
|
||||
" — I want to ignore it."
|
||||
)
|
||||
assert strip_noise(text) == text.strip()
|
||||
|
||||
def test_ctrl_o_hint_in_prose_preserved(self):
|
||||
# Regression: original `.*\(ctrl\+o to expand\).*\n?` nuked the whole
|
||||
# line whenever a user documented the TUI shortcut.
|
||||
text = (
|
||||
"> User:\n"
|
||||
"> In the TUI you hit (ctrl+o to expand) to see more. "
|
||||
"That is the shortcut I want to document."
|
||||
)
|
||||
assert strip_noise(text) == text.strip()
|
||||
|
||||
def test_current_time_inline_in_prose(self):
|
||||
text = "> User:\n> At CURRENT TIME: the meeting starts, not before."
|
||||
assert strip_noise(text) == text.strip()
|
||||
|
||||
def test_plus_n_lines_marker_inline(self):
|
||||
text = "> User:\n> The log showed … +50 lines of stack trace, useful."
|
||||
assert strip_noise(text) == text.strip()
|
||||
|
||||
def test_dangling_open_tag_does_not_span_messages(self):
|
||||
# THE span-eating bug: a stray unclosed <system-reminder> in one
|
||||
# message must NOT merge with a closing tag in another message and
|
||||
# silently delete everything in between.
|
||||
text = (
|
||||
"> User 1: normal content <system-reminder>A\n"
|
||||
"> Assistant: reply\n"
|
||||
"> User 2: more content</system-reminder> tail"
|
||||
)
|
||||
out = strip_noise(text)
|
||||
assert "Assistant: reply" in out
|
||||
assert "User 2: more content" in out
|
||||
assert "User 1: normal content" in out
|
||||
|
||||
|
||||
class TestStripNoiseRemovesSystemChrome:
|
||||
"""System-injected noise with standalone/line-anchored shape must be stripped."""
|
||||
|
||||
def test_strips_line_anchored_system_reminder_block(self):
|
||||
text = (
|
||||
"> User:\n"
|
||||
"<system-reminder>\n"
|
||||
"Auto-save reminder...\n"
|
||||
"</system-reminder>\n"
|
||||
"> Real message."
|
||||
)
|
||||
out = strip_noise(text)
|
||||
assert "system-reminder" not in out
|
||||
assert "Auto-save reminder" not in out
|
||||
assert "Real message." in out
|
||||
|
||||
def test_strips_system_reminder_with_blockquote_prefix(self):
|
||||
# _messages_to_transcript prefixes lines with "> ", so the line
|
||||
# anchor must also accept that shape.
|
||||
text = "> User:\n" "> <system-reminder>Injected noise</system-reminder>\n" "> Real message."
|
||||
out = strip_noise(text)
|
||||
assert "Injected noise" not in out
|
||||
assert "Real message." in out
|
||||
|
||||
def test_strips_standalone_ran_hook_line(self):
|
||||
text = "Ran 2 Stop hook\n> User: real content"
|
||||
out = strip_noise(text)
|
||||
assert "Ran 2 Stop hook" not in out
|
||||
assert "real content" in out
|
||||
|
||||
def test_strips_known_hook_names(self):
|
||||
for hook in ("Stop", "PreCompact", "PreToolUse", "PostToolUse", "UserPromptSubmit"):
|
||||
text = f"Ran 1 {hook} hook\n> User: content"
|
||||
assert hook not in strip_noise(text)
|
||||
|
||||
def test_strips_current_time_standalone(self):
|
||||
text = "CURRENT TIME: 2026-04-13 10:00 UTC\n> User: Hello"
|
||||
out = strip_noise(text)
|
||||
assert "CURRENT TIME" not in out
|
||||
assert "Hello" in out
|
||||
|
||||
def test_strips_collapsed_lines_marker(self):
|
||||
text = "… +42 lines\n> User: Hello"
|
||||
out = strip_noise(text)
|
||||
assert "+42 lines" not in out
|
||||
assert "Hello" in out
|
||||
|
||||
def test_strips_token_count_ctrl_o_chrome(self):
|
||||
# Claude Code's actual collapsed-output chrome: "[N tokens] (ctrl+o to expand)"
|
||||
text = "> Assistant: some output [5 tokens] (ctrl+o to expand)\n> User: ok"
|
||||
out = strip_noise(text)
|
||||
assert "(ctrl+o to expand)" not in out
|
||||
assert "[5 tokens]" not in out
|
||||
assert "some output" in out
|
||||
|
||||
def test_strips_each_known_noise_tag(self):
|
||||
for tag in (
|
||||
"system-reminder",
|
||||
"command-message",
|
||||
"command-name",
|
||||
"task-notification",
|
||||
"user-prompt-submit-hook",
|
||||
"hook_output",
|
||||
):
|
||||
text = f"> User:\n<{tag}>junk</{tag}>\n> Real."
|
||||
out = strip_noise(text)
|
||||
assert tag not in out, f"{tag} leaked into output"
|
||||
assert "Real." in out
|
||||
|
||||
def test_collapses_excessive_blank_lines(self):
|
||||
text = "line one\n\n\n\n\n\nline two"
|
||||
out = strip_noise(text)
|
||||
assert "line one" in out
|
||||
assert "line two" in out
|
||||
# Should collapse to no more than 3 newlines
|
||||
assert "\n\n\n\n" not in out
|
||||
|
||||
Reference in New Issue
Block a user