feat: new MCP tools — get/list/update drawer, hook settings, export (resolves #635) (#667)

* feat: MCP reliability — inode detection, WAL rotation, metadata cache, search limits Infrastructure hardening for the MCP server: - Detect palace DB replacement via inode tracking (repair command support) - WAL rotation to prevent unbounded WAL growth - _fetch_all_metadata() + _get_cached_metadata() with 60s TTL for taxonomy/status - _MAX_RESULTS cap (100) with limit clamping [1, _MAX_RESULTS] - max_distance parameter for similarity threshold in search - Handle all notifications/* methods, null arguments, method=None - Remove duplicate _client_cache = None declarations - searcher.py max_distance parameter passthrough Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: new MCP tools (get/list/update drawer, hook settings, memories filed), export, normalize New MCP tools: - mempalace_get_drawer: fetch single drawer by ID with full content - mempalace_list_drawers: paginated listing with wing/room filter - mempalace_update_drawer: update content/wing/room on existing drawers - mempalace_hook_settings: get/set hook behavior (silent_save, desktop_toast) - mempalace_memories_filed_away: check latest checkpoint status Also includes: - exporter.py: export palace as browsable markdown files - normalize.py: tool_use/tool_result capture for richer transcript mining - layers.py: updated for new tool integration - config.py: hook settings properties (hook_silent_save, hook_desktop_toast) Depends on PR 3 (reliability) for _MAX_RESULTS, _metadata_cache, WAL logging. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: normalize.py handles string messages and Read offset type mismatch Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: params null guard, L2→cosine docs, empty tool_use_map key guard - Handle explicit null in MCP params (request.get("params") or {}) - Fix search tool description: L2 → cosine distance (collection uses hnsw:space=cosine) - Guard against empty string key in tool_use_map from malformed JSONL entries Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: rename ambiguous var 'l' to 'line' (E741 lint) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: address code review findings (5 issues) 1. min_similarity backwards-compat: convert similarity to distance scale (1.0 - similarity) instead of passing raw value as max_distance 2. Restore structured error reporting (error + partial fields) in tool_status, tool_list_wings, tool_list_rooms, tool_get_taxonomy — reverts silent except:pass that dropped #647 security hardening 3. inode cache: remove falsy-zero short-circuit so missing DB file triggers reconnect instead of reusing stale client 4. _fetch_all_metadata: check for empty batch before extending/advancing offset to prevent infinite loop on concurrent deletion 5. KG initialization: only override path when --palace is explicit; default runs use KnowledgeGraph's built-in default path Co-authored-by: jphein <jphein@users.noreply.github.com> --------- Co-authored-by: jp <jp@jphein.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: jphein <jphein@users.noreply.github.com>
2026-04-11 21:25:04 -07:00
parent 58eca5075a
commit 20c8f8e57b
9 changed files with 1429 additions and 164 deletions
@@ -0,0 +1,153 @@
+"""
+exporter.py — Export the palace as a browsable folder of markdown files.
+
+Produces:
+  output_dir/
+    index.md              — table of contents
+    wing_name/
+      room_name.md        — one file per room, drawers as sections
+
+Streams drawers in paginated batches so memory usage stays bounded
+regardless of palace size.
+"""
+
+import os
+import re
+from collections import defaultdict
+from datetime import datetime
+
+from .palace import get_collection
+
+
+def _safe_path_component(name: str) -> str:
+    """Sanitize a string for use as a directory/file name component."""
+    name = re.sub(r'[/\\:*?"<>|]', '_', name)
+    name = name.strip('. ')
+    return name or 'unknown'
+
+
+def export_palace(palace_path: str, output_dir: str, format: str = "markdown") -> dict:
+    """Export all palace drawers as markdown files organized by wing/room.
+
+    Streams drawers in batches of 1000 and writes each wing/room file
+    incrementally, keeping memory usage proportional to batch size rather
+    than total palace size.
+
+    Args:
+        palace_path: Path to the ChromaDB palace directory.
+        output_dir: Where to write the exported markdown tree.
+        format: Output format (currently only "markdown").
+
+    Returns:
+        Stats dict: {"wings": N, "rooms": N, "drawers": N}
+    """
+    col = get_collection(palace_path)
+    total = col.count()
+
+    if total == 0:
+        print("  Palace is empty — nothing to export.")
+        return {"wings": 0, "rooms": 0, "drawers": 0}
+
+    os.makedirs(output_dir, exist_ok=True)
+
+    # Track which room files have been opened (so we can append vs overwrite)
+    opened_rooms: set[tuple[str, str]] = set()
+    # Track stats per wing: {wing: {room: count}}
+    wing_stats: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
+    total_drawers = 0
+
+    print(f"  Streaming {total} drawers...")
+    offset = 0
+    while offset < total:
+        batch = col.get(limit=1000, offset=offset, include=["documents", "metadatas"])
+        if not batch["ids"]:
+            break
+
+        # Group this batch by wing/room so we do one file write per room per batch
+        batch_grouped: dict[str, dict[str, list]] = defaultdict(lambda: defaultdict(list))
+        for doc_id, doc, meta in zip(batch["ids"], batch["documents"], batch["metadatas"]):
+            wing = meta.get("wing", "unknown")
+            room = meta.get("room", "general")
+            batch_grouped[wing][room].append({
+                "id": doc_id,
+                "content": doc,
+                "source": meta.get("source_file", ""),
+                "filed_at": meta.get("filed_at", ""),
+                "added_by": meta.get("added_by", ""),
+            })
+
+        # Write/append each room file
+        for wing, rooms in batch_grouped.items():
+            safe_wing = _safe_path_component(wing)
+            wing_dir = os.path.join(output_dir, safe_wing)
+            os.makedirs(wing_dir, exist_ok=True)
+
+            for room, drawers in rooms.items():
+                safe_room = _safe_path_component(room)
+                room_path = os.path.join(wing_dir, f"{safe_room}.md")
+                key = (wing, room)
+                is_new = key not in opened_rooms
+
+                with open(room_path, "a" if not is_new else "w", encoding="utf-8") as f:
+                    if is_new:
+                        f.write(f"# {wing} / {room}\n\n")
+                        opened_rooms.add(key)
+
+                    for drawer in drawers:
+                        source = drawer["source"] or "unknown"
+                        filed = drawer["filed_at"] or "unknown"
+                        added_by = drawer["added_by"] or "unknown"
+
+                        f.write(
+                            f"## {drawer['id']}\n"
+                            f"\n"
+                            f"> {_quote_content(drawer['content'])}\n"
+                            f"\n"
+                            f"| Field | Value |\n"
+                            f"|-------|-------|\n"
+                            f"| Source | {source} |\n"
+                            f"| Filed | {filed} |\n"
+                            f"| Added by | {added_by} |\n"
+                            f"\n"
+                            f"---\n\n"
+                        )
+
+                    wing_stats[wing][room] += len(drawers)
+                    total_drawers += len(drawers)
+
+        offset += len(batch["ids"])
+
+    # Build and print stats
+    index_rows = []
+    for wing in sorted(wing_stats):
+        rooms = wing_stats[wing]
+        wing_drawer_count = sum(rooms.values())
+        index_rows.append((wing, len(rooms), wing_drawer_count))
+        print(f"  {wing}: {len(rooms)} rooms, {wing_drawer_count} drawers")
+
+    # Write index.md
+    today = datetime.now().strftime("%Y-%m-%d")
+    index_lines = [
+        f"# Palace Export — {today}\n",
+        "",
+        "| Wing | Rooms | Drawers |",
+        "|------|-------|---------|",
+    ]
+    for wing, room_count, drawer_count in index_rows:
+        index_lines.append(f"| [{wing}]({wing}/) | {room_count} | {drawer_count} |")
+    index_lines.append("")
+
+    index_path = os.path.join(output_dir, "index.md")
+    with open(index_path, "w", encoding="utf-8") as f:
+        f.write("\n".join(index_lines))
+
+    stats = {"wings": len(wing_stats), "rooms": sum(r for _, r, _ in index_rows), "drawers": total_drawers}
+    print(f"\n  Exported {stats['drawers']} drawers across {stats['wings']} wings, {stats['rooms']} rooms")
+    print(f"  Output: {output_dir}")
+    return stats
+
+
+def _quote_content(text: str) -> str:
+    """Format content for a markdown blockquote, handling multiline."""
+    lines = text.rstrip("\n").split("\n")
+    return "\n> ".join(lines)