feat(search): drawer-grep returns best-matching chunk + neighbors
When a closet hit leads to a source file with many drawers, grep each chunk for query terms and return the BEST-MATCHING chunk + 1 neighbor on each side, instead of dumping the whole file truncated at MAX_HYDRATION_CHARS. Result now includes drawer_index and total_drawers so callers can request adjacent drawers explicitly. Extracted from Milla's commit 935f657 which bundled drawer-grep with closet_llm (deferred pending LLM_ENDPOINT refactor) and fact_checker (separate PR). Ported only the searcher.py change. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+30
-5
@@ -205,6 +205,8 @@ def search_memories(
|
||||
pass # no closets yet — fall through to direct drawer search
|
||||
|
||||
# If closets found results, hydrate the referenced drawers
|
||||
MAX_HYDRATION_CHARS = 10000 # cap to prevent blowup on large source files
|
||||
|
||||
if closet_hits:
|
||||
import re
|
||||
seen_sources = set()
|
||||
@@ -215,18 +217,39 @@ def search_memories(
|
||||
continue
|
||||
seen_sources.add(source)
|
||||
|
||||
# Find drawers for this source file
|
||||
# Find drawers for this source file, grep for most relevant chunk
|
||||
try:
|
||||
drawer_results = drawers_col.get(
|
||||
where={"source_file": source},
|
||||
include=["documents", "metadatas"],
|
||||
)
|
||||
if drawer_results.get("ids"):
|
||||
# Combine all drawer content for this file
|
||||
full_text = "\n\n".join(drawer_results["documents"])
|
||||
meta = drawer_results["metadatas"][0]
|
||||
# Drawer-grep: score each chunk against the query,
|
||||
# return the best-matching chunk first + surrounding context
|
||||
query_terms = set(re.findall(r'\w{2,}', query.lower()))
|
||||
best_idx = 0
|
||||
best_score = -1
|
||||
for idx, doc in enumerate(drawer_results["documents"]):
|
||||
doc_lower = doc.lower()
|
||||
score = sum(1 for t in query_terms if t in doc_lower)
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_idx = idx
|
||||
|
||||
# Build result: best chunk first, then neighbors
|
||||
docs = drawer_results["documents"]
|
||||
n_docs = len(docs)
|
||||
# Include best chunk + 1 before + 1 after for context
|
||||
start = max(0, best_idx - 1)
|
||||
end = min(n_docs, best_idx + 2)
|
||||
relevant_text = "\n\n".join(docs[start:end])
|
||||
|
||||
if len(relevant_text) > MAX_HYDRATION_CHARS:
|
||||
relevant_text = relevant_text[:MAX_HYDRATION_CHARS] + f"\n\n[...truncated. {n_docs} total drawers. Use mempalace_get_drawer for full content.]"
|
||||
|
||||
meta = drawer_results["metadatas"][best_idx]
|
||||
hits.append({
|
||||
"text": full_text,
|
||||
"text": relevant_text,
|
||||
"wing": meta.get("wing", "unknown"),
|
||||
"room": meta.get("room", "unknown"),
|
||||
"source_file": Path(source).name,
|
||||
@@ -234,6 +257,8 @@ def search_memories(
|
||||
"distance": round(closet_dist, 4),
|
||||
"matched_via": "closet",
|
||||
"closet_preview": closet_doc[:200],
|
||||
"drawer_index": best_idx,
|
||||
"total_drawers": n_docs,
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user