From 4d581cbb730b26d78e29e4e85115b948e0c0603e Mon Sep 17 00:00:00 2001
From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com>
Date: Mon, 13 Apr 2026 07:51:46 -0300
Subject: [PATCH 1/2] =?UTF-8?q?feat:=20optional=20LLM-based=20closet=20reg?=
 =?UTF-8?q?eneration=20=E2=80=94=20bring-your-own=20endpoint?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds mempalace/closet_llm.py as an OPTIONAL path for richer closet
generation. Regex closets remain the default and cover the local-first
promise; users who want LLM-quality topics can bring their own endpoint.

Configuration (env or CLI flag):
  LLM_ENDPOINT — OpenAI-compatible base URL (required)
  LLM_KEY      — bearer token (optional; local inference skips this)
  LLM_MODEL    — model name (required)

Works with Ollama, vLLM, llama.cpp servers, OpenAI, OpenRouter, and any
other provider that speaks OpenAI-compatible /chat/completions. Zero new
dependencies — uses stdlib urllib.

Replaces the original Anthropic-SDK-hardcoded version of this module
from Milla's branch (commit 935f657). Same prompt, same parsing, same
regenerate_closets flow; only the transport was generalised so the
feature doesn't lock users into a specific vendor or require API keys
for core memory operations (CLAUDE.md, "Local-first, zero API").

Includes 13 unit tests covering config resolution, request shape,
auth-header omission when no key is set, code-fence stripping, and
missing-config error path. All mocked — zero network calls in tests.

Co-Authored-By: MSL <232237854+milla-jovovich@users.noreply.github.com>
---
 mempalace/closet_llm.py  | 345 +++++++++++++++++++++++++++++++++++++++
 tests/test_closet_llm.py | 222 +++++++++++++++++++++++++
 2 files changed, 567 insertions(+)
 create mode 100644 mempalace/closet_llm.py
 create mode 100644 tests/test_closet_llm.py

diff --git a/mempalace/closet_llm.py b/mempalace/closet_llm.py
new file mode 100644
index 0000000..35ec6d6
--- /dev/null
+++ b/mempalace/closet_llm.py
@@ -0,0 +1,345 @@
+"""
+closet_llm.py — Generate closets via a user-configured LLM for richer indexing.
+
+The regex-based closet extraction catches action verbs, headers, and proper
+nouns — but misses implicit topics, foreign-language content, and contextual
+references. An LLM reads everything and produces better closets.
+
+This module is **OPTIONAL and opt-in**. Regex closets are always created by
+the miner; this path regenerates them afterward using whatever LLM the user
+chooses. Core memory operations remain API-free by design (see CLAUDE.md,
+"Local-first, zero API").
+
+## Bring-your-own-LLM configuration
+
+The endpoint is any OpenAI-compatible Chat Completions URL:
+
+    LLM_ENDPOINT=http://localhost:11434/v1   # Ollama
+    LLM_ENDPOINT=http://localhost:8000/v1    # vLLM, llama.cpp
+    LLM_ENDPOINT=https://api.openai.com/v1
+    LLM_ENDPOINT=https://openrouter.ai/api/v1
+    LLM_ENDPOINT=https://api.anthropic.com/v1  # when proxied through a compat layer
+
+Set:
+    LLM_ENDPOINT — base URL (required)
+    LLM_KEY      — bearer token (optional; local inference usually doesn't need it)
+    LLM_MODEL    — model name (required), e.g. "gpt-4o-mini", "llama3:8b", "qwen2.5:7b"
+
+Or pass flags on the CLI (flags win over env):
+
+    python -m mempalace.closet_llm \\
+        --palace ~/.mempalace/palace \\
+        --endpoint http://localhost:11434/v1 \\
+        --model llama3:8b
+
+No vendor lock-in. No hidden dependency on any specific provider. Zero deps
+added to pyproject — uses stdlib urllib.
+"""
+
+import json
+import os
+import re
+import time
+import urllib.request
+import urllib.error
+from datetime import datetime
+from typing import Optional
+
+from .palace import get_collection, get_closets_collection, upsert_closet_lines
+
+MAX_CONTENT_CHARS = 30000
+MAX_OUTPUT_TOKENS = 1500
+HTTP_TIMEOUT_S = 60
+
+PROMPT_TEMPLATE = """You are reading content filed in a memory palace. Generate a
+topic-dense index that will be used to find this content later when someone searches.
+
+Source: {source_file}
+Wing: {wing} | Room: {room}
+
+CONTENT:
+{content}
+
+---
+
+Output a JSON object with EXACTLY these fields:
+
+{{
+  "topics": ["distinctive_word_or_phrase_1", "topic_2", ...],
+  "quotes": ["[Speaker] verbatim quote", ...],
+  "summary": "2-3 sentences describing what this content is about."
+}}
+
+RULES:
+- Topics: 8-15 entries. Include proper nouns (names, places, projects),
+  distinctive technical terms, and key concepts. NOT generic words like
+  "conversation" or "discussion".
+- Quotes: 2-5 entries. EXACT verbatim from the content, not paraphrased.
+  Attribute with [Speaker] prefix if speaker is identifiable.
+- Summary: mention WHO, WHAT, and WHY. No filler.
+- Write in the same language as the content.
+- Output valid JSON only. No code fences. No commentary.
+"""
+
+
+class LLMConfig:
+    """Resolved LLM connection config. CLI flags > env vars."""
+
+    def __init__(
+        self,
+        endpoint: Optional[str] = None,
+        key: Optional[str] = None,
+        model: Optional[str] = None,
+    ):
+        self.endpoint = (endpoint or os.environ.get("LLM_ENDPOINT", "")).rstrip("/")
+        self.key = key or os.environ.get("LLM_KEY", "")
+        self.model = model or os.environ.get("LLM_MODEL", "")
+
+    def missing(self) -> list:
+        missing = []
+        if not self.endpoint:
+            missing.append("LLM_ENDPOINT (or --endpoint)")
+        if not self.model:
+            missing.append("LLM_MODEL (or --model)")
+        # key is optional — local inference servers (Ollama, vLLM) often don't require one
+        return missing
+
+
+def _call_llm(cfg: LLMConfig, source_file: str, wing: str, room: str, content: str):
+    """Single LLM call via OpenAI-compatible /chat/completions.
+
+    Returns (parsed_json_dict_or_None, usage_dict_or_None).
+    """
+    try:
+        from mempalace.i18n import t
+
+        lang_instruction = t("aaak.instruction")
+    except Exception:
+        lang_instruction = ""
+
+    prompt = PROMPT_TEMPLATE.format(
+        source_file=source_file[:100],
+        wing=wing,
+        room=room,
+        content=content[:MAX_CONTENT_CHARS],
+    )
+    if lang_instruction and "english" not in lang_instruction.lower():
+        prompt += f"\n\nLanguage instruction: {lang_instruction}"
+
+    body = json.dumps(
+        {
+            "model": cfg.model,
+            "max_tokens": MAX_OUTPUT_TOKENS,
+            "messages": [{"role": "user", "content": prompt}],
+        }
+    ).encode("utf-8")
+
+    headers = {"Content-Type": "application/json"}
+    if cfg.key:
+        headers["Authorization"] = f"Bearer {cfg.key}"
+
+    url = f"{cfg.endpoint}/chat/completions"
+
+    for attempt in range(3):
+        try:
+            req = urllib.request.Request(url, data=body, headers=headers, method="POST")
+            with urllib.request.urlopen(req, timeout=HTTP_TIMEOUT_S) as resp:
+                raw = resp.read().decode("utf-8")
+            payload = json.loads(raw)
+
+            text = payload["choices"][0]["message"]["content"].strip()
+            text = re.sub(r"^```(?:json)?\s*", "", text)
+            text = re.sub(r"\s*```$", "", text)
+            parsed = json.loads(text)
+            return parsed, payload.get("usage")
+        except json.JSONDecodeError:
+            return None, None
+        except urllib.error.HTTPError as e:
+            # 429 / 503 = retry with backoff
+            if e.code in (429, 503) and attempt < 2:
+                time.sleep(2 ** attempt)
+                continue
+            return None, None
+        except Exception as e:
+            if "rate" in str(e).lower() and attempt < 2:
+                time.sleep(2 ** attempt)
+                continue
+            return None, None
+    return None, None
+
+
+def _parsed_to_closet_lines(parsed, drawer_ids, entities_str):
+    """Convert LLM's JSON output to closet pointer lines."""
+    lines = []
+    drawer_ref = ",".join(drawer_ids[:3])
+
+    for topic in parsed.get("topics", [])[:15]:
+        lines.append(f"{topic}|{entities_str}|→{drawer_ref}")
+    for quote in parsed.get("quotes", [])[:5]:
+        lines.append(f'{quote}|{entities_str}|→{drawer_ref}')
+    summary = parsed.get("summary", "")
+    if summary:
+        lines.append(f"{summary[:200]}|{entities_str}|→{drawer_ref}")
+
+    return lines
+
+
+def regenerate_closets(
+    palace_path,
+    wing=None,
+    sample=0,
+    dry_run=False,
+    cfg: Optional[LLMConfig] = None,
+):
+    """Regenerate closets using a configured LLM for richer topic extraction.
+
+    Reads existing drawers, sends content to the configured endpoint,
+    replaces regex closets with LLM-generated ones. Regex closets remain
+    as the fallback whenever the call fails.
+    """
+    if cfg is None:
+        cfg = LLMConfig()
+    missing = cfg.missing()
+    if missing:
+        print("Error: missing configuration: " + ", ".join(missing))
+        print("Set env vars LLM_ENDPOINT / LLM_MODEL (and optionally LLM_KEY),")
+        print("or pass --endpoint / --model / --key on the CLI.")
+        return {"error": "missing-config", "missing": missing}
+
+    drawers_col = get_collection(palace_path, create=False)
+    closets_col = get_closets_collection(palace_path)
+
+    total = drawers_col.count()
+    if total == 0:
+        print("No drawers in palace.")
+        return {"processed": 0}
+
+    all_data = drawers_col.get(limit=total, include=["documents", "metadatas"])
+    by_source = {}
+    for doc_id, doc, meta in zip(all_data["ids"], all_data["documents"], all_data["metadatas"]):
+        source = meta.get("source_file", "unknown")
+        w = meta.get("wing", "")
+        if wing and w != wing:
+            continue
+        if source not in by_source:
+            by_source[source] = {"drawer_ids": [], "content": [], "meta": meta}
+        by_source[source]["drawer_ids"].append(doc_id)
+        by_source[source]["content"].append(doc)
+
+    sources = list(by_source.keys())
+    if sample > 0:
+        sources = sources[:sample]
+
+    print(f"Regenerating closets for {len(sources)} source files via {cfg.endpoint} ({cfg.model})...")
+    if dry_run:
+        print("DRY RUN — no changes will be written")
+
+    processed = 0
+    failed = 0
+    total_input = 0
+    total_output = 0
+
+    for i, source in enumerate(sources, 1):
+        data = by_source[source]
+        content = "\n\n".join(data["content"])
+        meta = data["meta"]
+        w = meta.get("wing", "")
+        r = meta.get("room", "")
+        entities = meta.get("entities", "")
+
+        if dry_run:
+            print(f"  [{i}/{len(sources)}] {os.path.basename(source)} ({len(content)} chars)")
+            continue
+
+        parsed, usage = _call_llm(cfg, source, w, r, content)
+        if not parsed:
+            failed += 1
+            print(f"  [{i}/{len(sources)}] ✗ {os.path.basename(source)} — LLM failed")
+            continue
+
+        if usage:
+            total_input += usage.get("prompt_tokens", 0)
+            total_output += usage.get("completion_tokens", 0)
+
+        lines = _parsed_to_closet_lines(parsed, data["drawer_ids"], entities)
+        closet_id_base = f"closet_{w}_{r}_{source.split('/')[-1][:30]}"
+
+        # Delete old regex closets for this source before writing LLM ones
+        try:
+            old_ids = closets_col.get(
+                where={"source_file": source}, include=[]
+            ).get("ids", [])
+            if old_ids:
+                closets_col.delete(ids=old_ids)
+        except Exception:
+            pass
+
+        upsert_closet_lines(
+            closets_col,
+            closet_id_base,
+            lines,
+            {
+                "wing": w,
+                "room": r,
+                "source_file": source,
+                "generated_by": f"llm:{cfg.model}",
+                "filed_at": datetime.now().isoformat(),
+                "entities": entities,
+            },
+        )
+
+        processed += 1
+        n_topics = len(parsed.get("topics", []))
+        print(f"  [{i}/{len(sources)}] ✓ {os.path.basename(source)} — {n_topics} topics")
+
+    print(f"\nDone. {processed} regenerated, {failed} failed.")
+    if total_input or total_output:
+        print(f"Tokens: {total_input:,} in + {total_output:,} out (cost depends on provider)")
+
+    return {
+        "processed": processed,
+        "failed": failed,
+        "input_tokens": total_input,
+        "output_tokens": total_output,
+    }
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="Regenerate closets via a user-configured LLM (OpenAI-compatible API)"
+    )
+    parser.add_argument(
+        "--palace",
+        default=os.path.expanduser("~/.mempalace/palace"),
+        help="Path to the palace",
+    )
+    parser.add_argument("--wing", default=None, help="Limit to one wing")
+    parser.add_argument(
+        "--sample", type=int, default=0, help="Only process first N source files"
+    )
+    parser.add_argument(
+        "--dry-run", action="store_true", help="List work without calling the LLM"
+    )
+    parser.add_argument(
+        "--endpoint",
+        default=None,
+        help="LLM base URL (overrides $LLM_ENDPOINT), e.g. http://localhost:11434/v1",
+    )
+    parser.add_argument(
+        "--key",
+        default=None,
+        help="LLM bearer token (overrides $LLM_KEY). Optional for local inference.",
+    )
+    parser.add_argument(
+        "--model",
+        default=None,
+        help='LLM model name (overrides $LLM_MODEL), e.g. "gpt-4o-mini" or "llama3:8b"',
+    )
+    args = parser.parse_args()
+
+    cfg = LLMConfig(endpoint=args.endpoint, key=args.key, model=args.model)
+    regenerate_closets(
+        args.palace, wing=args.wing, sample=args.sample, dry_run=args.dry_run, cfg=cfg
+    )
diff --git a/tests/test_closet_llm.py b/tests/test_closet_llm.py
new file mode 100644
index 0000000..762e16d
--- /dev/null
+++ b/tests/test_closet_llm.py
@@ -0,0 +1,222 @@
+"""Unit tests for the optional LLM-based closet regeneration.
+
+These tests don't hit the network. They mock urllib to verify:
+- LLMConfig correctly reads env vars and CLI overrides
+- missing config is reported cleanly
+- the OpenAI-compatible request shape is correct
+- response parsing handles the standard chat-completions payload
+"""
+
+import io
+import json
+import os
+import tempfile
+from unittest.mock import patch
+
+import pytest
+
+from mempalace.closet_llm import (
+    LLMConfig,
+    _call_llm,
+    _parsed_to_closet_lines,
+    regenerate_closets,
+)
+
+
+# ── LLMConfig ─────────────────────────────────────────────────────────────
+
+
+class TestLLMConfig:
+    def test_reads_env_vars(self, monkeypatch):
+        monkeypatch.setenv("LLM_ENDPOINT", "http://localhost:11434/v1")
+        monkeypatch.setenv("LLM_KEY", "sk-abc")
+        monkeypatch.setenv("LLM_MODEL", "llama3:8b")
+        c = LLMConfig()
+        assert c.endpoint == "http://localhost:11434/v1"
+        assert c.key == "sk-abc"
+        assert c.model == "llama3:8b"
+
+    def test_cli_flags_override_env(self, monkeypatch):
+        monkeypatch.setenv("LLM_ENDPOINT", "http://env-endpoint/v1")
+        monkeypatch.setenv("LLM_MODEL", "env-model")
+        c = LLMConfig(endpoint="http://flag-endpoint/v1", model="flag-model")
+        assert c.endpoint == "http://flag-endpoint/v1"
+        assert c.model == "flag-model"
+
+    def test_trailing_slash_stripped(self):
+        c = LLMConfig(endpoint="http://foo/v1/", model="m")
+        assert c.endpoint == "http://foo/v1"
+
+    def test_missing_reports_required(self, monkeypatch):
+        monkeypatch.delenv("LLM_ENDPOINT", raising=False)
+        monkeypatch.delenv("LLM_KEY", raising=False)
+        monkeypatch.delenv("LLM_MODEL", raising=False)
+        c = LLMConfig()
+        missing = c.missing()
+        assert any("ENDPOINT" in m for m in missing)
+        assert any("MODEL" in m for m in missing)
+        # key is optional
+        assert not any("KEY" in m for m in missing)
+
+    def test_key_is_optional(self, monkeypatch):
+        monkeypatch.delenv("LLM_KEY", raising=False)
+        c = LLMConfig(endpoint="http://local/v1", model="m")
+        assert c.missing() == []
+
+
+# ── _parsed_to_closet_lines ──────────────────────────────────────────────
+
+
+class TestParsedToLines:
+    def test_topics_become_pointers(self):
+        parsed = {"topics": ["authentication", "jwt tokens"], "quotes": [], "summary": ""}
+        lines = _parsed_to_closet_lines(parsed, ["d1", "d2"], "Alice;Bob")
+        assert len(lines) == 2
+        assert "authentication|Alice;Bob|→d1,d2" in lines
+        assert "jwt tokens|Alice;Bob|→d1,d2" in lines
+
+    def test_quotes_and_summary_included(self):
+        parsed = {
+            "topics": ["t1"],
+            "quotes": ["[Igor] we ship Friday"],
+            "summary": "Release planning discussion",
+        }
+        lines = _parsed_to_closet_lines(parsed, ["d1"], "")
+        joined = "\n".join(lines)
+        assert "we ship Friday" in joined
+        assert "Release planning discussion" in joined
+
+    def test_caps_topics_at_15(self):
+        parsed = {"topics": [f"t{i}" for i in range(20)], "quotes": [], "summary": ""}
+        lines = _parsed_to_closet_lines(parsed, ["d1"], "")
+        assert len(lines) == 15
+
+
+# ── _call_llm (HTTP mocked) ──────────────────────────────────────────────
+
+
+class _FakeResp:
+    """Mimics urlopen's context-manager response."""
+
+    def __init__(self, payload: dict, status: int = 200):
+        self._body = json.dumps(payload).encode("utf-8")
+        self.status = status
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *a):
+        return False
+
+    def read(self):
+        return self._body
+
+
+class TestCallLLM:
+    def _make_cfg(self):
+        return LLMConfig(
+            endpoint="http://localhost:11434/v1", key="sk-test", model="llama3:8b"
+        )
+
+    def test_request_shape_and_parsing(self):
+        cfg = self._make_cfg()
+        captured = {}
+
+        def fake_urlopen(req, timeout=None):
+            captured["url"] = req.full_url
+            captured["headers"] = dict(req.header_items())
+            captured["body"] = json.loads(req.data.decode("utf-8"))
+            return _FakeResp(
+                {
+                    "choices": [
+                        {
+                            "message": {
+                                "content": json.dumps(
+                                    {
+                                        "topics": ["postgres"],
+                                        "quotes": ["[Igor] migrate now"],
+                                        "summary": "db migration",
+                                    }
+                                )
+                            }
+                        }
+                    ],
+                    "usage": {"prompt_tokens": 42, "completion_tokens": 17},
+                }
+            )
+
+        with patch("urllib.request.urlopen", side_effect=fake_urlopen):
+            parsed, usage = _call_llm(cfg, "/tmp/test.md", "w", "r", "content body")
+
+        assert parsed["topics"] == ["postgres"]
+        assert usage["prompt_tokens"] == 42
+        assert captured["url"] == "http://localhost:11434/v1/chat/completions"
+        # Authorization header is stored capitalized-then-lowercase depending on urllib version
+        auth_vals = {v for k, v in captured["headers"].items() if k.lower() == "authorization"}
+        assert "Bearer sk-test" in auth_vals
+        assert captured["body"]["model"] == "llama3:8b"
+        assert captured["body"]["messages"][0]["role"] == "user"
+
+    def test_omits_auth_header_when_no_key(self):
+        cfg = LLMConfig(endpoint="http://localhost:11434/v1", model="llama3:8b")
+        captured_headers = {}
+
+        def fake_urlopen(req, timeout=None):
+            captured_headers.update({k.lower(): v for k, v in req.header_items()})
+            return _FakeResp(
+                {
+                    "choices": [
+                        {"message": {"content": '{"topics":[],"quotes":[],"summary":""}'}}
+                    ],
+                    "usage": {"prompt_tokens": 0, "completion_tokens": 0},
+                }
+            )
+
+        with patch("urllib.request.urlopen", side_effect=fake_urlopen):
+            _call_llm(cfg, "/tmp/x", "w", "r", "c")
+
+        assert "authorization" not in captured_headers
+
+    def test_strips_code_fences(self):
+        cfg = self._make_cfg()
+        fenced = '```json\n{"topics":["t1"],"quotes":[],"summary":""}\n```'
+
+        def fake_urlopen(req, timeout=None):
+            return _FakeResp(
+                {
+                    "choices": [{"message": {"content": fenced}}],
+                    "usage": {"prompt_tokens": 1, "completion_tokens": 1},
+                }
+            )
+
+        with patch("urllib.request.urlopen", side_effect=fake_urlopen):
+            parsed, _ = _call_llm(cfg, "/tmp/x", "w", "r", "c")
+        assert parsed == {"topics": ["t1"], "quotes": [], "summary": ""}
+
+    def test_returns_none_on_invalid_json(self):
+        cfg = self._make_cfg()
+
+        def fake_urlopen(req, timeout=None):
+            return _FakeResp(
+                {
+                    "choices": [{"message": {"content": "not json at all"}}],
+                    "usage": {"prompt_tokens": 1, "completion_tokens": 1},
+                }
+            )
+
+        with patch("urllib.request.urlopen", side_effect=fake_urlopen):
+            parsed, usage = _call_llm(cfg, "/tmp/x", "w", "r", "c")
+        assert parsed is None
+
+
+# ── regenerate_closets error paths ───────────────────────────────────────
+
+
+class TestRegenerateClosets:
+    def test_missing_config_returns_error(self, monkeypatch):
+        monkeypatch.delenv("LLM_ENDPOINT", raising=False)
+        monkeypatch.delenv("LLM_MODEL", raising=False)
+        with tempfile.TemporaryDirectory() as palace:
+            result = regenerate_closets(palace)
+            assert result["error"] == "missing-config"
+            assert any("ENDPOINT" in m for m in result["missing"])

From 8e446f904ce00f58347fa5469ae1dadfa1278637 Mon Sep 17 00:00:00 2001
From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com>
Date: Mon, 13 Apr 2026 08:43:54 -0300
Subject: [PATCH 2/2] =?UTF-8?q?fix(search):=20hybrid=20closet+drawer=20ret?=
 =?UTF-8?q?rieval=20=E2=80=94=20closets=20boost,=20never=20gate=20(#795)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mempalace/searcher.py       | 240 +++++++++++++++++++-----------------
 tests/test_hybrid_search.py | 141 +++++++++++++++++++++
 2 files changed, 270 insertions(+), 111 deletions(-)
 create mode 100644 tests/test_hybrid_search.py

diff --git a/mempalace/searcher.py b/mempalace/searcher.py
index 19b07f4..06806aa 100644
--- a/mempalace/searcher.py
+++ b/mempalace/searcher.py
@@ -183,138 +183,156 @@ def search_memories(
 
     where = build_where_filter(wing, room)
 
-    # Try closet-first search: search the compact index, then hydrate drawers
-    closet_hits = []
+    # Hybrid retrieval: always query drawers directly (the floor), then use
+    # closet hits to boost rankings. Closets are a ranking SIGNAL, never a
+    # GATE — direct drawer search is always the baseline.
+    #
+    # This avoids the "weak-closets regression" where narrative content
+    # produces low-signal closets (regex extraction matches few topics)
+    # and closet-first routing hides drawers that direct search would find.
+    try:
+        dkwargs = {
+            "query_texts": [query],
+            "n_results": n_results * 3,  # over-fetch for re-ranking
+            "include": ["documents", "metadatas", "distances"],
+        }
+        if where:
+            dkwargs["where"] = where
+        drawer_results = drawers_col.query(**dkwargs)
+    except Exception as e:
+        return {"error": f"Search error: {e}"}
+
+    # Gather closet hits (best-per-source) to build a boost lookup.
+    closet_boost_by_source = {}  # source_file -> (rank, closet_dist, preview)
     try:
         closets_col = get_closets_collection(palace_path, create=False)
         ckwargs = {
             "query_texts": [query],
-            "n_results": n_results * 2,  # over-fetch closets to find best drawers
+            "n_results": n_results * 2,
             "include": ["documents", "metadatas", "distances"],
         }
         if where:
             ckwargs["where"] = where
         closet_results = closets_col.query(**ckwargs)
-        if closet_results["documents"][0]:
-            closet_hits = list(zip(
+        for rank, (doc, meta, dist) in enumerate(
+            zip(
                 closet_results["documents"][0],
                 closet_results["metadatas"][0],
                 closet_results["distances"][0],
-            ))
+            )
+        ):
+            source = meta.get("source_file", "")
+            if source and source not in closet_boost_by_source:
+                closet_boost_by_source[source] = (rank, dist, doc[:200])
     except Exception:
-        pass  # no closets yet — fall through to direct drawer search
+        pass  # no closets yet — hybrid degrades to pure drawer search
 
-    # If closets found results, hydrate the referenced drawers
-    MAX_HYDRATION_CHARS = 10000  # cap to prevent blowup on large source files
+    # Rank-based boost. Ordinal signal (which closet matched best) is more
+    # reliable than absolute distance on narrative content.
+    CLOSET_RANK_BOOSTS = [0.40, 0.25, 0.15, 0.08, 0.04]
+    CLOSET_DISTANCE_CAP = 1.5  # cosine dist > 1.5 = too weak to use as signal
 
-    if closet_hits:
-        import re
-        seen_sources = set()
-        hits = []
-        for closet_doc, closet_meta, closet_dist in closet_hits:
-            source = closet_meta.get("source_file", "")
-            if source in seen_sources:
-                continue
-            seen_sources.add(source)
-
-            # Find drawers for this source file, grep for most relevant chunk
-            try:
-                drawer_results = drawers_col.get(
-                    where={"source_file": source},
-                    include=["documents", "metadatas"],
-                )
-                if drawer_results.get("ids"):
-                    # Drawer-grep: score each chunk against the query,
-                    # return the best-matching chunk first + surrounding context
-                    query_terms = set(re.findall(r'\w{2,}', query.lower()))
-                    best_idx = 0
-                    best_score = -1
-                    for idx, doc in enumerate(drawer_results["documents"]):
-                        doc_lower = doc.lower()
-                        score = sum(1 for t in query_terms if t in doc_lower)
-                        if score > best_score:
-                            best_score = score
-                            best_idx = idx
-
-                    # Build result: best chunk first, then neighbors
-                    docs = drawer_results["documents"]
-                    n_docs = len(docs)
-                    # Include best chunk + 1 before + 1 after for context
-                    start = max(0, best_idx - 1)
-                    end = min(n_docs, best_idx + 2)
-                    relevant_text = "\n\n".join(docs[start:end])
-
-                    if len(relevant_text) > MAX_HYDRATION_CHARS:
-                        relevant_text = relevant_text[:MAX_HYDRATION_CHARS] + f"\n\n[...truncated. {n_docs} total drawers. Use mempalace_get_drawer for full content.]"
-
-                    meta = drawer_results["metadatas"][best_idx]
-                    hits.append({
-                        "text": relevant_text,
-                        "wing": meta.get("wing", "unknown"),
-                        "room": meta.get("room", "unknown"),
-                        "source_file": Path(source).name,
-                        "similarity": round(max(0.0, 1 - closet_dist), 3),
-                        "distance": round(closet_dist, 4),
-                        "matched_via": "closet",
-                        "closet_preview": closet_doc[:200],
-                        "drawer_index": best_idx,
-                        "total_drawers": n_docs,
-                    })
-            except Exception:
-                pass
-
-            if len(hits) >= n_results:
-                break
-
-        if hits:
-            # Re-rank with BM25 hybrid scoring
-            hits = _hybrid_rank(hits, query)
-            return {
-                "query": query,
-                "filters": {"wing": wing, "room": room},
-                "total_before_filter": len(closet_hits),
-                "results": hits,
-            }
-
-    # Fallback: direct drawer search (no closets yet, or closets empty)
-    try:
-        kwargs = {
-            "query_texts": [query],
-            "n_results": n_results,
-            "include": ["documents", "metadatas", "distances"],
-        }
-        if where:
-            kwargs["where"] = where
-
-        results = drawers_col.query(**kwargs)
-    except Exception as e:
-        return {"error": f"Search error: {e}"}
-
-    docs = results["documents"][0]
-    metas = results["metadatas"][0]
-    dists = results["distances"][0]
-
-    hits = []
-    for doc, meta, dist in zip(docs, metas, dists):
-        # Filter on raw distance before rounding to avoid precision loss
+    scored = []
+    for doc, meta, dist in zip(
+        drawer_results["documents"][0],
+        drawer_results["metadatas"][0],
+        drawer_results["distances"][0],
+    ):
         if max_distance > 0.0 and dist > max_distance:
             continue
-        hits.append(
-            {
-                "text": doc,
-                "wing": meta.get("wing", "unknown"),
-                "room": meta.get("room", "unknown"),
-                "source_file": Path(meta.get("source_file", "?")).name,
-                "similarity": round(max(0.0, 1 - dist), 3),
-                "distance": round(dist, 4),
-            }
-        )
 
-    # Re-rank with BM25 hybrid scoring
+        source = meta.get("source_file", "")
+        boost = 0.0
+        matched_via = "drawer"
+        closet_preview = None
+        if source in closet_boost_by_source:
+            c_rank, c_dist, c_preview = closet_boost_by_source[source]
+            if c_dist <= CLOSET_DISTANCE_CAP and c_rank < len(CLOSET_RANK_BOOSTS):
+                boost = CLOSET_RANK_BOOSTS[c_rank]
+                matched_via = "drawer+closet"
+                closet_preview = c_preview
+
+        effective_dist = dist - boost
+        entry = {
+            "text": doc,
+            "wing": meta.get("wing", "unknown"),
+            "room": meta.get("room", "unknown"),
+            "source_file": Path(source).name if source else "?",
+            "similarity": round(max(0.0, 1 - effective_dist), 3),
+            "distance": round(dist, 4),
+            "effective_distance": round(effective_dist, 4),
+            "closet_boost": round(boost, 3),
+            "matched_via": matched_via,
+            "_sort_key": effective_dist,
+        }
+        if closet_preview:
+            entry["closet_preview"] = closet_preview
+        scored.append(entry)
+
+    scored.sort(key=lambda h: h["_sort_key"])
+    hits = scored[:n_results]
+
+    # Drawer-grep enrichment: for top hits whose source file has multiple
+    # drawers, return the best-matching chunk + its immediate neighbors
+    # instead of just the single drawer. Preserves the chunk-expansion
+    # behavior users relied on in the closet-first path.
+    MAX_HYDRATION_CHARS = 10000
+    import re as _re
+
+    for h in hits:
+        if h["matched_via"] == "drawer":
+            continue
+        # Only enrich closet-matched hits (cheap: we already know source matters)
+        source_name = h["source_file"]
+        # Look up full source_file by matching suffix in candidate pool
+        full_source = next(
+            (
+                m.get("source_file", "")
+                for m in drawer_results["metadatas"][0]
+                if m.get("source_file", "").endswith(source_name)
+            ),
+            "",
+        )
+        if not full_source:
+            continue
+        try:
+            source_drawers = drawers_col.get(
+                where={"source_file": full_source}, include=["documents"]
+            )
+        except Exception:
+            continue
+        docs = source_drawers.get("documents") or []
+        if len(docs) <= 1:
+            continue
+
+        query_terms = set(_re.findall(r"\w{2,}", query.lower()))
+        best_idx, best_score = 0, -1
+        for idx, d in enumerate(docs):
+            d_lower = d.lower()
+            s = sum(1 for t in query_terms if t in d_lower)
+            if s > best_score:
+                best_score, best_idx = s, idx
+
+        start = max(0, best_idx - 1)
+        end = min(len(docs), best_idx + 2)
+        expanded = "\n\n".join(docs[start:end])
+        if len(expanded) > MAX_HYDRATION_CHARS:
+            expanded = (
+                expanded[:MAX_HYDRATION_CHARS]
+                + f"\n\n[...truncated. {len(docs)} total drawers. Use mempalace_get_drawer for full content.]"
+            )
+        h["text"] = expanded
+        h["drawer_index"] = best_idx
+        h["total_drawers"] = len(docs)
+
+    # BM25 hybrid re-rank within the final candidate set
     hits = _hybrid_rank(hits, query)
+    for h in hits:
+        h.pop("_sort_key", None)
+
     return {
         "query": query,
         "filters": {"wing": wing, "room": room},
-        "total_before_filter": len(docs),
+        "total_before_filter": len(drawer_results["documents"][0]),
         "results": hits,
     }
diff --git a/tests/test_hybrid_search.py b/tests/test_hybrid_search.py
new file mode 100644
index 0000000..02d3f5f
--- /dev/null
+++ b/tests/test_hybrid_search.py
@@ -0,0 +1,141 @@
+"""Tests for the hybrid closet+drawer retrieval in search_memories.
+
+The hybrid path queries drawers directly (the floor) AND closets, applying a
+rank-based boost to drawers whose source_file appears in top closet hits.
+This avoids the "weak-closets regression" where low-signal closets (from
+regex extraction on narrative content) could hide drawers that direct
+search would have found.
+"""
+
+import os
+import tempfile
+
+import chromadb
+import pytest
+
+from mempalace.palace import (
+    get_collection,
+    get_closets_collection,
+    upsert_closet_lines,
+)
+from mempalace.searcher import search_memories
+
+
+def _seed_drawers(palace_path):
+    """Insert 4 short drawers with deterministic content."""
+    col = get_collection(palace_path, create=True)
+    col.upsert(
+        ids=["D1", "D2", "D3", "D4"],
+        documents=[
+            "We switched the auth service to use JWT tokens with a 24h expiry.",
+            "Database migration to PostgreSQL 15 completed last Tuesday.",
+            "The frontend team is debating whether to adopt TanStack Query.",
+            "Kafka consumer rebalance timeout set to 45 seconds after incident.",
+        ],
+        metadatas=[
+            {"wing": "backend", "room": "auth", "source_file": "fixture_D1.md"},
+            {"wing": "backend", "room": "db", "source_file": "fixture_D2.md"},
+            {"wing": "frontend", "room": "state", "source_file": "fixture_D3.md"},
+            {"wing": "backend", "room": "queue", "source_file": "fixture_D4.md"},
+        ],
+    )
+
+
+def _seed_strong_closet_for(palace_path, drawer_id, source_file, topics):
+    """Insert a closet whose content strongly overlaps the query keywords."""
+    col = get_closets_collection(palace_path)
+    lines = [f"{t}||→{drawer_id}" for t in topics]
+    upsert_closet_lines(
+        col,
+        closet_id_base=f"closet_{drawer_id}",
+        lines=lines,
+        metadata={
+            "wing": "backend",
+            "room": "auth",
+            "source_file": source_file,
+            "generated_by": "test",
+        },
+    )
+
+
+# ── core invariant: closets can only HELP, never HIDE ─────────────────────
+
+
+class TestHybridInvariant:
+    def test_no_closets_degrades_to_direct_drawer_search(self, tmp_path):
+        palace = str(tmp_path / "palace")
+        _seed_drawers(palace)
+        # No closets created.
+        result = search_memories("Kafka rebalance timeout", palace, n_results=3)
+        ids = [h["source_file"] for h in result["results"]]
+        assert ids, "should return results"
+        assert "fixture_D4.md" in ids, (
+            "direct drawer search alone should surface the Kafka drawer"
+        )
+
+    def test_weak_closets_do_not_hide_direct_drawer_hits(self, tmp_path):
+        """A closet that points at a wrong drawer must NOT suppress the
+        drawer that direct search would have ranked first."""
+        palace = str(tmp_path / "palace")
+        _seed_drawers(palace)
+        # Seed a misleading closet: it matches a generic phrase but points at D3.
+        _seed_strong_closet_for(
+            palace,
+            drawer_id="D3",
+            source_file="fixture_D3.md",
+            topics=["Kafka queue tuning", "consumer rebalance config"],
+        )
+        result = search_memories("Kafka consumer rebalance timeout", palace, n_results=5)
+        ids = [h["source_file"] for h in result["results"]]
+        assert "fixture_D4.md" in ids, (
+            "D4 must appear — direct drawer search alone would rank it first. "
+            "Closet pointing to D3 should only boost D3, never hide D4."
+        )
+
+    def test_closet_boost_lifts_matching_drawer(self, tmp_path):
+        """When a closet agrees with direct search, the matching drawer
+        should be boosted to rank 1."""
+        palace = str(tmp_path / "palace")
+        _seed_drawers(palace)
+        _seed_strong_closet_for(
+            palace,
+            drawer_id="D1",
+            source_file="fixture_D1.md",
+            topics=["JWT auth tokens", "session expiry", "authentication service"],
+        )
+        result = search_memories("JWT auth tokens expiry", palace, n_results=3)
+        ids = [h["source_file"] for h in result["results"]]
+        assert ids[0] == "fixture_D1.md"
+        top = result["results"][0]
+        assert top["matched_via"] == "drawer+closet"
+        assert top["closet_boost"] > 0
+
+
+# ── closet_boost metadata ────────────────────────────────────────────────
+
+
+class TestClosetMetadata:
+    def test_closet_preview_exposed_when_boosted(self, tmp_path):
+        palace = str(tmp_path / "palace")
+        _seed_drawers(palace)
+        _seed_strong_closet_for(
+            palace,
+            drawer_id="D1",
+            source_file="fixture_D1.md",
+            topics=["JWT auth tokens", "24h expiry", "authentication"],
+        )
+        result = search_memories("JWT authentication", palace, n_results=2)
+        top = result["results"][0]
+        assert top["source_file"] == "fixture_D1.md"
+        assert "closet_preview" in top
+
+    def test_drawer_only_hits_have_no_closet_preview(self, tmp_path):
+        palace = str(tmp_path / "palace")
+        _seed_drawers(palace)
+        # No closets
+        result = search_memories("TanStack Query", palace, n_results=2)
+        assert result["results"]
+        for h in result["results"]:
+            assert h["matched_via"] == "drawer"
+            assert "closet_preview" not in h
+            assert h["closet_boost"] == 0.0