From 7fc260f75236551707fa3932adb994d4cb3f6332 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Sun, 3 May 2026 05:48:41 -0300 Subject: [PATCH] fix(mcp): basename source_file in tool_get_drawer responses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MCP `mempalace_get_drawer` tool returned the entire raw drawer metadata blob to any connected client, and the `source_file` field in that blob is the absolute filesystem path written by the miners (`miner.py`, `convo_miner.py` — `source_file = str(filepath)`). On a single-user local deployment this is self-disclosure, but in nested-agent or multi-server MCP topologies the client is a separate trust domain and the host's directory layout has no documented client-side use. Mirror the mitigation that `searcher.search_memories()` already applies on its own return path: reduce `source_file` to its basename via `Path(source_file).name` before handing the metadata to the client. Citations still work — the directory layout does not leak. Companion to #1 (omit palace_path from tool_status). Same threat class, different surface: - mempalace_status — palace dir path → fixed in #1 - mempalace_get_drawer — per-drawer source_file path → this PR Other read tools were audited and do not leak host paths: - mempalace_search — already basenames source_file - mempalace_list_drawers — returns wing/room/preview only - mempalace_diary_read — date/timestamp/topic/content only - mempalace_reconnect — success/message/drawers only - mempalace_kg_* — entity/predicate strings, counts - mempalace_check_duplicate — wing/room/preview only Changes: - mempalace/mcp_server.py: tool_get_drawer() now basenames metadata.source_file - tests/test_mcp_server.py: regression test asserting the absolute path and its parent directory do not appear anywhere in the response - website/reference/mcp-tools.md: clarify the documented return shape --- mempalace/mcp_server.py | 15 ++++++++++--- tests/test_mcp_server.py | 39 ++++++++++++++++++++++++++++++++++ website/reference/mcp-tools.md | 2 +- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index 4aab316..b010ab9 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -912,12 +912,21 @@ def tool_get_drawer(drawer_id: str): return {"error": f"Drawer not found: {drawer_id}"} meta = result["metadatas"][0] doc = result["documents"][0] + # source_file is the absolute filesystem path written by the + # miners. Reduce to its basename before handing it to the MCP + # client — same threat model as the palace_path leak fix: + # nested-agent / multi-server topologies treat the client as a + # separate trust domain. Basename preserves citation utility. + # Mirrors the searcher.search_memories() return shape. + safe_meta = dict(meta) if meta else {} + if safe_meta.get("source_file"): + safe_meta["source_file"] = Path(safe_meta["source_file"]).name return { "drawer_id": drawer_id, "content": doc, - "wing": meta.get("wing", ""), - "room": meta.get("room", ""), - "metadata": meta, + "wing": safe_meta.get("wing", ""), + "room": safe_meta.get("room", ""), + "metadata": safe_meta, } except Exception as e: return {"error": str(e)} diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index f8148af..0e37e35 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -531,6 +531,45 @@ class TestWriteTools: result = tool_get_drawer("nonexistent_drawer") assert "error" in result + def test_get_drawer_does_not_leak_absolute_source_file_path( + self, monkeypatch, config, palace_path, collection, kg + ): + """tool_get_drawer must not expose the absolute filesystem path + that the miners write into ``source_file``. Same threat class as + the palace_path leak in mempalace_status: in nested-agent or + multi-server MCP topologies the client is a separate trust + domain, and the directory layout of the host has no documented + client-side use. Basename is enough for citation.""" + _patch_mcp_server(monkeypatch, config, kg) + + secret_dir = "/private/home/alice/secret-research/2026" + absolute_source = f"{secret_dir}/notes.md" + collection.add( + ids=["drawer_leak_probe"], + documents=["verbatim drawer body for leak probe"], + metadatas=[ + { + "wing": "research", + "room": "notes", + "source_file": absolute_source, + "chunk_index": 0, + "added_by": "miner", + "filed_at": "2026-05-03T00:00:00", + } + ], + ) + + from mempalace.mcp_server import tool_get_drawer + + result = tool_get_drawer("drawer_leak_probe") + assert result["drawer_id"] == "drawer_leak_probe" + assert result["metadata"]["source_file"] == "notes.md" + # Defense-in-depth: no field anywhere in the response should + # contain the absolute path or its parent directory. + serialized = json.dumps(result) + assert absolute_source not in serialized + assert secret_dir not in serialized + def test_list_drawers(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_list_drawers diff --git a/website/reference/mcp-tools.md b/website/reference/mcp-tools.md index 671225a..6866aa6 100644 --- a/website/reference/mcp-tools.md +++ b/website/reference/mcp-tools.md @@ -122,7 +122,7 @@ Fetch a single drawer by ID — returns full content and metadata. |-----------|------|----------|-------------| | `drawer_id` | string | **Yes** | ID of the drawer to fetch | -**Returns:** `{ drawer: { id, wing, room, content, ... } }` +**Returns:** `{ drawer_id, content, wing, room, metadata }` where `metadata.source_file`, when present, is the basename only — the absolute path written by the miners is reduced before the dict is returned to MCP clients. ---