From 30a431924bfcfa17ffdda209eba28192a78db9ca Mon Sep 17 00:00:00 2001 From: MSL <232237854+milla-jovovich@users.noreply.github.com> Date: Mon, 13 Apr 2026 01:16:51 -0700 Subject: [PATCH 01/42] fix: add file-level locking to prevent multi-agent duplicate drawers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: when multiple agents mine simultaneously, both pass file_already_mined() check, both delete+insert the same file's drawers, creating duplicates or losing data. Fix: mine_lock() in palace.py — cross-platform file lock (fcntl on Unix, msvcrt on Windows). Both miner.py and convo_miner.py now lock per-file during the delete+insert cycle and re-check after acquiring the lock. Tested: - Lock acquires and releases correctly - Second agent blocks until first releases (0.25s wait) - 33/33 existing tests pass - Cross-platform: fcntl (macOS/Linux), msvcrt (Windows) Based on v3.2.0 tag. Co-Authored-By: Claude Opus 4.6 (1M context) --- mempalace/convo_miner.py | 62 ++++++++++++++++++++++------------------ mempalace/miner.py | 54 +++++++++++++++++++--------------- mempalace/palace.py | 37 ++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 51 deletions(-) diff --git a/mempalace/convo_miner.py b/mempalace/convo_miner.py index d406073..f24fa69 100644 --- a/mempalace/convo_miner.py +++ b/mempalace/convo_miner.py @@ -16,7 +16,7 @@ from datetime import datetime from collections import defaultdict from .normalize import normalize -from .palace import SKIP_DIRS, get_collection, file_already_mined +from .palace import SKIP_DIRS, get_collection, file_already_mined, mine_lock # File types that might contain conversations @@ -375,34 +375,40 @@ def mine_convos( if extract_mode != "general": room_counts[room] += 1 - # File each chunk + # File each chunk — lock to prevent concurrent agents duplicating drawers_added = 0 - for chunk in chunks: - chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room - if extract_mode == "general": - room_counts[chunk_room] += 1 - drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}" - try: - collection.upsert( - documents=[chunk["content"]], - ids=[drawer_id], - metadatas=[ - { - "wing": wing, - "room": chunk_room, - "source_file": source_file, - "chunk_index": chunk["chunk_index"], - "added_by": agent, - "filed_at": datetime.now().isoformat(), - "ingest_mode": "convos", - "extract_mode": extract_mode, - } - ], - ) - drawers_added += 1 - except Exception as e: - if "already exists" not in str(e).lower(): - raise + with mine_lock(source_file): + # Re-check after lock — another agent may have just finished this file + if file_already_mined(collection, source_file): + files_skipped += 1 + continue + + for chunk in chunks: + chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room + if extract_mode == "general": + room_counts[chunk_room] += 1 + drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}" + try: + collection.upsert( + documents=[chunk["content"]], + ids=[drawer_id], + metadatas=[ + { + "wing": wing, + "room": chunk_room, + "source_file": source_file, + "chunk_index": chunk["chunk_index"], + "added_by": agent, + "filed_at": datetime.now().isoformat(), + "ingest_mode": "convos", + "extract_mode": extract_mode, + } + ], + ) + drawers_added += 1 + except Exception as e: + if "already exists" not in str(e).lower(): + raise total_drawers += drawers_added print(f" ✓ [{i:4}/{len(files)}] {filepath.name[:50]:50} +{drawers_added}") diff --git a/mempalace/miner.py b/mempalace/miner.py index 22c8af3..801ed7e 100644 --- a/mempalace/miner.py +++ b/mempalace/miner.py @@ -15,7 +15,7 @@ from pathlib import Path from datetime import datetime from collections import defaultdict -from .palace import SKIP_DIRS, get_collection, file_already_mined +from .palace import SKIP_DIRS, get_collection, file_already_mined, mine_lock READABLE_EXTENSIONS = { ".txt", @@ -434,29 +434,37 @@ def process_file( print(f" [DRY RUN] {filepath.name} → room:{room} ({len(chunks)} drawers)") return len(chunks), room - # Purge stale drawers for this file before re-inserting the fresh chunks. - # Converts modified-file re-mines from upsert-over-existing-IDs (which hits - # hnswlib's thread-unsafe updatePoint path and can segfault on macOS ARM - # with chromadb 0.6.3) into a clean delete+insert, bypassing the update - # path entirely. - try: - collection.delete(where={"source_file": source_file}) - except Exception: - pass + # Lock this file so concurrent agents don't interleave delete+insert. + # Without the lock, two agents can both pass file_already_mined(), + # both delete, and both insert — creating duplicates or losing data. + with mine_lock(source_file): + # Re-check after acquiring lock — another agent may have just finished + if file_already_mined(collection, source_file, check_mtime=True): + return 0, room - drawers_added = 0 - for chunk in chunks: - added = add_drawer( - collection=collection, - wing=wing, - room=room, - content=chunk["content"], - source_file=source_file, - chunk_index=chunk["chunk_index"], - agent=agent, - ) - if added: - drawers_added += 1 + # Purge stale drawers for this file before re-inserting the fresh chunks. + # Converts modified-file re-mines from upsert-over-existing-IDs (which hits + # hnswlib's thread-unsafe updatePoint path and can segfault on macOS ARM + # with chromadb 0.6.3) into a clean delete+insert, bypassing the update + # path entirely. + try: + collection.delete(where={"source_file": source_file}) + except Exception: + pass + + drawers_added = 0 + for chunk in chunks: + added = add_drawer( + collection=collection, + wing=wing, + room=room, + content=chunk["content"], + source_file=source_file, + chunk_index=chunk["chunk_index"], + agent=agent, + ) + if added: + drawers_added += 1 return drawers_added, room diff --git a/mempalace/palace.py b/mempalace/palace.py index 948fecc..ed5382a 100644 --- a/mempalace/palace.py +++ b/mempalace/palace.py @@ -4,6 +4,8 @@ palace.py — Shared palace operations. Consolidates collection access patterns used by both miners and the MCP server. """ +import contextlib +import hashlib import os from .backends.chroma import ChromaBackend @@ -50,6 +52,41 @@ def get_collection( ) +@contextlib.contextmanager +def mine_lock(source_file: str): + """Cross-platform file lock for mine operations. + + Prevents multiple agents from mining the same file simultaneously, + which causes duplicate drawers when the delete+insert cycle interleaves. + """ + lock_dir = os.path.join(os.path.expanduser("~"), ".mempalace", "locks") + os.makedirs(lock_dir, exist_ok=True) + lock_path = os.path.join( + lock_dir, hashlib.sha256(source_file.encode()).hexdigest()[:16] + ".lock" + ) + + lf = open(lock_path, "w") + try: + if os.name == "nt": + import msvcrt + msvcrt.locking(lf.fileno(), msvcrt.LK_LOCK, 1) + else: + import fcntl + fcntl.flock(lf, fcntl.LOCK_EX) + yield + finally: + try: + if os.name == "nt": + import msvcrt + msvcrt.locking(lf.fileno(), msvcrt.LK_UNLCK, 1) + else: + import fcntl + fcntl.flock(lf, fcntl.LOCK_UN) + except Exception: + pass + lf.close() + + def file_already_mined(collection, source_file: str, check_mtime: bool = False) -> bool: """Check if a file has already been filed in the palace. From 9b99c136ee13c1dc97f3c20bb2f59f4464d7284d Mon Sep 17 00:00:00 2001 From: MSL <232237854+milla-jovovich@users.noreply.github.com> Date: Mon, 13 Apr 2026 01:55:25 -0700 Subject: [PATCH 02/42] fix: strip system tags, hook output, and Claude UI chrome from drawers normalize.py now strips before filing: - , , tags - , , tags - Hook status messages (CURRENT TIME, Checking verified facts, etc.) - Claude Code UI chrome (ctrl+o to expand, progress bars, etc.) - Collapsed runs of blank lines This noise was going straight into drawers, wasting storage space and polluting search results. strip_noise() runs on all normalized output regardless of input format (JSONL, JSON, plain text). 689/689 tests pass. Co-Authored-By: Claude Opus 4.6 (1M context) --- mempalace/normalize.py | 56 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/mempalace/normalize.py b/mempalace/normalize.py index e599df9..256a5e9 100644 --- a/mempalace/normalize.py +++ b/mempalace/normalize.py @@ -16,10 +16,54 @@ No API key. No internet. Everything local. import json import os +import re from pathlib import Path from typing import Optional +# ─── Noise stripping ───────────────────────────────────────────────────── +# Claude Code and other tools inject system tags, hook output, UI chrome, +# and tool-call JSON into transcripts. These waste drawer space and pollute +# search results. Strip them before filing. + +_NOISE_TAG_PATTERNS = [ + re.compile(r"]*>.*?", re.DOTALL), + re.compile(r"]*>.*?", re.DOTALL), + re.compile(r"]*>.*?", re.DOTALL), + re.compile(r"]*>.*?", re.DOTALL), + re.compile(r"]*>.*?", re.DOTALL), + re.compile(r"]*>.*?", re.DOTALL), +] + +_NOISE_STRINGS = [ + "CURRENT TIME:", + "VERIFIED FACTS (do not contradict)", + "AGENT SPECIALIZATION:", + "Checking verified facts...", + "Injecting timestamp...", + "Starting background pipeline...", + "Checking emotional weights...", + "Auto-save reminder...", + "Checking pipeline...", + "MemPalace auto-save checkpoint.", +] + + +def strip_noise(text: str) -> str: + """Remove system tags, hook output, and Claude Code UI chrome from text.""" + for pat in _NOISE_TAG_PATTERNS: + text = pat.sub("", text) + for noise in _NOISE_STRINGS: + text = text.replace(noise, "") + # Strip Claude Code UI chrome + text = re.sub(r".*\(ctrl\+o to expand\).*\n?", "", text) + text = re.sub(r"Ran \d+ (?:stop|pre|post)\s*hook.*\n?", "", text, flags=re.IGNORECASE) + text = re.sub(r"…\s*\+\d+ lines.*\n?", "", text) + # Collapse runs of blank lines + text = re.sub(r"\n{4,}", "\n\n\n", text) + return text.strip() + + def normalize(filepath: str) -> str: """ Load a file and normalize to transcript format if it's a chat export. @@ -40,19 +84,23 @@ def normalize(filepath: str) -> str: if not content.strip(): return content - # Already has > markers — pass through + # Already has > markers — pass through (strip noise but preserve trailing newline) lines = content.split("\n") if sum(1 for line in lines if line.strip().startswith(">")) >= 3: - return content + cleaned = strip_noise(content) + # Preserve trailing newline if original had one + if content.endswith("\n") and not cleaned.endswith("\n"): + cleaned += "\n" + return cleaned # Try JSON normalization ext = Path(filepath).suffix.lower() if ext in (".json", ".jsonl") or content.strip()[:1] in ("{", "["): normalized = _try_normalize_json(content) if normalized: - return normalized + return strip_noise(normalized) - return content + return strip_noise(content) def _try_normalize_json(content: str) -> Optional[str]: From d3d7184f4e885f64520d80971cbc41285d947e5f Mon Sep 17 00:00:00 2001 From: MSL <232237854+milla-jovovich@users.noreply.github.com> Date: Mon, 13 Apr 2026 01:33:48 -0700 Subject: [PATCH 03/42] =?UTF-8?q?feat:=20add=20closet=20layer=20=E2=80=94?= =?UTF-8?q?=20searchable=20index=20pointing=20to=20drawers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The closet architecture was always part of MemPalace's design but never shipped in the public codebase. This adds it. Palace now has TWO collections: - mempalace_drawers — full verbatim content (unchanged) - mempalace_closets — compact AAAK-style index entries How it works: - When mining, each file gets a closet alongside its drawers - Closet contains extracted topics, entities, quotes as pointers - Closets pack up to 1500 chars, topics never split mid-entry - Search hits closets first (fast, small), then hydrates the full drawer content for matching files - Falls back to direct drawer search if no closets exist yet Files changed: - palace.py: get_closets_collection(), build_closet_text(), upsert_closet(), CLOSET_CHAR_LIMIT - miner.py: process_file() now creates closets after drawers - searcher.py: search_memories() tries closet-first search, hydrates drawers, falls back to direct search Backwards compatible — existing palaces without closets continue to work via the fallback path. Closets are created on next mine. 689/689 tests pass. Co-Authored-By: Claude Opus 4.6 (1M context) --- mempalace/miner.py | 25 ++++++++++++++- mempalace/palace.py | 62 ++++++++++++++++++++++++++++++++++++ mempalace/searcher.py | 73 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 156 insertions(+), 4 deletions(-) diff --git a/mempalace/miner.py b/mempalace/miner.py index 801ed7e..8170362 100644 --- a/mempalace/miner.py +++ b/mempalace/miner.py @@ -15,7 +15,10 @@ from pathlib import Path from datetime import datetime from collections import defaultdict -from .palace import SKIP_DIRS, get_collection, file_already_mined, mine_lock +from .palace import ( + SKIP_DIRS, get_collection, get_closets_collection, + file_already_mined, mine_lock, build_closet_text, upsert_closet, +) READABLE_EXTENSIONS = { ".txt", @@ -410,6 +413,7 @@ def process_file( rooms: list, agent: str, dry_run: bool, + closets_col=None, ) -> tuple: """Read, chunk, route, and file one file. Returns (drawer_count, room_name).""" @@ -466,6 +470,22 @@ def process_file( if added: drawers_added += 1 + # Build closet — the searchable index pointing to these drawers + if closets_col and drawers_added > 0: + drawer_ids = [ + f"drawer_{wing}_{room}_{hashlib.sha256((source_file + str(c['chunk_index'])).encode()).hexdigest()[:24]}" + for c in chunks + ] + closet_text = build_closet_text(source_file, drawer_ids, content, wing, room) + closet_id = f"closet_{wing}_{room}_{hashlib.sha256(source_file.encode()).hexdigest()[:24]}" + upsert_closet(closets_col, closet_id, closet_text, { + "wing": wing, + "room": room, + "source_file": source_file, + "drawer_count": drawers_added, + "filed_at": datetime.now().isoformat(), + }) + return drawers_added, room @@ -586,8 +606,10 @@ def mine( if not dry_run: collection = get_collection(palace_path) + closets_col = get_closets_collection(palace_path) else: collection = None + closets_col = None total_drawers = 0 files_skipped = 0 @@ -602,6 +624,7 @@ def mine( rooms=rooms, agent=agent, dry_run=dry_run, + closets_col=closets_col, ) if drawers == 0 and not dry_run: files_skipped += 1 diff --git a/mempalace/palace.py b/mempalace/palace.py index ed5382a..ef58a06 100644 --- a/mempalace/palace.py +++ b/mempalace/palace.py @@ -52,6 +52,68 @@ def get_collection( ) +def get_closets_collection(palace_path: str, create: bool = True): + """Get the closets collection — the searchable index layer.""" + return get_collection(palace_path, collection_name="mempalace_closets", create=create) + + +CLOSET_CHAR_LIMIT = 1500 # fill closet until ~1500 chars, then start a new one + + +def build_closet_text(source_file, drawer_ids, content, wing, room): + """Build a compact closet entry from drawer content. + + Extracts topics, names, and key quotes into an AAAK-style pointer + that tells the searcher which drawers to open. + """ + import re + # Extract proper nouns (capitalized words, 2+ occurrences) + words = re.findall(r"\b[A-Z][a-z]{2,}\b", content[:5000]) + word_freq = {} + for w in words: + word_freq[w] = word_freq.get(w, 0) + 1 + entities = sorted([w for w, c in word_freq.items() if c >= 2], key=lambda w: -word_freq[w])[:5] + + # Extract key phrases + topics = [] + for pattern in [ + r"(?:built|fixed|wrote|added|pushed|tested|created|decided|migrated)\s+[\w\s]{3,30}", + ]: + topics.extend(re.findall(pattern, content[:5000], re.IGNORECASE)) + topics = list(dict.fromkeys(t.strip().lower() for t in topics))[:8] + + # Extract first quote + quotes = re.findall(r'"([^"]{15,100})"', content[:5000]) + quote = quotes[0] if quotes else "" + + # Build pointer lines + entity_str = ";".join(entities[:5]) if entities else "" + lines = [] + for topic in topics: + pointer = f"{topic}|{entity_str}|→{','.join(drawer_ids[:3])}" + lines.append(pointer) + if quote: + lines.append(f'"{quote}"|{entity_str}|→{",".join(drawer_ids[:3])}') + if not lines: + lines.append(f"{wing}/{room}|{entity_str}|→{','.join(drawer_ids[:3])}") + + return "\n".join(lines) + + +def upsert_closet(closets_col, closet_id, closet_text, metadata): + """Add or update a closet. Respects CLOSET_CHAR_LIMIT.""" + try: + existing = closets_col.get(ids=[closet_id]) + if existing.get("ids"): + old_text = existing["documents"][0] + if len(old_text) + len(closet_text) + 1 <= CLOSET_CHAR_LIMIT: + closet_text = old_text + "\n" + closet_text + # else: start fresh — old closet was full + except Exception: + pass + closets_col.upsert(documents=[closet_text], ids=[closet_id], metadatas=[metadata]) + + @contextlib.contextmanager def mine_lock(source_file: str): """Cross-platform file lock for mine operations. diff --git a/mempalace/searcher.py b/mempalace/searcher.py index bc70c1d..70fd615 100644 --- a/mempalace/searcher.py +++ b/mempalace/searcher.py @@ -9,7 +9,7 @@ Returns verbatim text — the actual words, never summaries. import logging from pathlib import Path -from .palace import get_collection +from .palace import get_collection, get_closets_collection logger = logging.getLogger("mempalace_mcp") @@ -117,7 +117,7 @@ def search_memories( 0.0 disables filtering. Typical useful range: 0.3–1.0. """ try: - col = get_collection(palace_path, create=False) + drawers_col = get_collection(palace_path, create=False) except Exception as e: logger.error("No palace found at %s: %s", palace_path, e) return { @@ -127,6 +127,73 @@ def search_memories( where = build_where_filter(wing, room) + # Try closet-first search: search the compact index, then hydrate drawers + closet_hits = [] + try: + closets_col = get_closets_collection(palace_path, create=False) + ckwargs = { + "query_texts": [query], + "n_results": n_results * 2, # over-fetch closets to find best drawers + "include": ["documents", "metadatas", "distances"], + } + if where: + ckwargs["where"] = where + closet_results = closets_col.query(**ckwargs) + if closet_results["documents"][0]: + closet_hits = list(zip( + closet_results["documents"][0], + closet_results["metadatas"][0], + closet_results["distances"][0], + )) + except Exception: + pass # no closets yet — fall through to direct drawer search + + # If closets found results, hydrate the referenced drawers + if closet_hits: + import re + seen_sources = set() + hits = [] + for closet_doc, closet_meta, closet_dist in closet_hits: + source = closet_meta.get("source_file", "") + if source in seen_sources: + continue + seen_sources.add(source) + + # Find drawers for this source file + try: + drawer_results = drawers_col.get( + where={"source_file": source}, + include=["documents", "metadatas"], + ) + if drawer_results.get("ids"): + # Combine all drawer content for this file + full_text = "\n\n".join(drawer_results["documents"]) + meta = drawer_results["metadatas"][0] + hits.append({ + "text": full_text, + "wing": meta.get("wing", "unknown"), + "room": meta.get("room", "unknown"), + "source_file": Path(source).name, + "similarity": round(max(0.0, 1 - closet_dist), 3), + "distance": round(closet_dist, 4), + "matched_via": "closet", + "closet_preview": closet_doc[:200], + }) + except Exception: + pass + + if len(hits) >= n_results: + break + + if hits: + return { + "query": query, + "filters": {"wing": wing, "room": room}, + "total_before_filter": len(closet_hits), + "results": hits, + } + + # Fallback: direct drawer search (no closets yet, or closets empty) try: kwargs = { "query_texts": [query], @@ -136,7 +203,7 @@ def search_memories( if where: kwargs["where"] = where - results = col.query(**kwargs) + results = drawers_col.query(**kwargs) except Exception as e: return {"error": f"Search error: {e}"} From 124f5bf7ba5eec986c3d26fbbc66d6f0584ef62a Mon Sep 17 00:00:00 2001 From: MSL <232237854+milla-jovovich@users.noreply.github.com> Date: Mon, 13 Apr 2026 01:40:58 -0700 Subject: [PATCH 04/42] fix: enforce atomic topics in closets, extract richer pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - upsert_closet replaced by upsert_closet_lines: checks each topic line individually against CLOSET_CHAR_LIMIT. If adding one line WHOLE would exceed the limit, starts a new closet. Never splits mid-topic. - build_closet_lines returns a list of atomic lines (not joined text) - Richer extraction: section headers, more action verbs, up to 3 quotes, up to 12 topics per file - Each line is complete: topic|entities|→drawer_refs Co-Authored-By: Claude Opus 4.6 (1M context) --- mempalace/miner.py | 9 ++-- mempalace/palace.py | 113 ++++++++++++++++++++++++++++++++------------ 2 files changed, 87 insertions(+), 35 deletions(-) diff --git a/mempalace/miner.py b/mempalace/miner.py index 8170362..37e507a 100644 --- a/mempalace/miner.py +++ b/mempalace/miner.py @@ -17,7 +17,7 @@ from collections import defaultdict from .palace import ( SKIP_DIRS, get_collection, get_closets_collection, - file_already_mined, mine_lock, build_closet_text, upsert_closet, + file_already_mined, mine_lock, build_closet_lines, upsert_closet_lines, ) READABLE_EXTENSIONS = { @@ -471,14 +471,15 @@ def process_file( drawers_added += 1 # Build closet — the searchable index pointing to these drawers + # Each topic line is atomic — never split across closets if closets_col and drawers_added > 0: drawer_ids = [ f"drawer_{wing}_{room}_{hashlib.sha256((source_file + str(c['chunk_index'])).encode()).hexdigest()[:24]}" for c in chunks ] - closet_text = build_closet_text(source_file, drawer_ids, content, wing, room) - closet_id = f"closet_{wing}_{room}_{hashlib.sha256(source_file.encode()).hexdigest()[:24]}" - upsert_closet(closets_col, closet_id, closet_text, { + closet_lines = build_closet_lines(source_file, drawer_ids, content, wing, room) + closet_id_base = f"closet_{wing}_{room}_{hashlib.sha256(source_file.encode()).hexdigest()[:24]}" + upsert_closet_lines(closets_col, closet_id_base, closet_lines, { "wing": wing, "room": room, "source_file": source_file, diff --git a/mempalace/palace.py b/mempalace/palace.py index ef58a06..9bb08a5 100644 --- a/mempalace/palace.py +++ b/mempalace/palace.py @@ -60,58 +60,109 @@ def get_closets_collection(palace_path: str, create: bool = True): CLOSET_CHAR_LIMIT = 1500 # fill closet until ~1500 chars, then start a new one -def build_closet_text(source_file, drawer_ids, content, wing, room): - """Build a compact closet entry from drawer content. +def build_closet_lines(source_file, drawer_ids, content, wing, room): + """Build compact closet pointer lines from drawer content. - Extracts topics, names, and key quotes into an AAAK-style pointer - that tells the searcher which drawers to open. + Returns a LIST of lines (not joined). Each line is one complete topic + pointer — never split across closets. + + Format: topic|entities|→drawer_ids """ import re + from pathlib import Path + + drawer_ref = ",".join(drawer_ids[:3]) + # Extract proper nouns (capitalized words, 2+ occurrences) words = re.findall(r"\b[A-Z][a-z]{2,}\b", content[:5000]) word_freq = {} for w in words: word_freq[w] = word_freq.get(w, 0) + 1 - entities = sorted([w for w, c in word_freq.items() if c >= 2], key=lambda w: -word_freq[w])[:5] + entities = sorted( + [w for w, c in word_freq.items() if c >= 2], + key=lambda w: -word_freq[w], + )[:5] + entity_str = ";".join(entities) if entities else "" - # Extract key phrases + # Extract key phrases — action verbs + context topics = [] for pattern in [ - r"(?:built|fixed|wrote|added|pushed|tested|created|decided|migrated)\s+[\w\s]{3,30}", + r"(?:built|fixed|wrote|added|pushed|tested|created|decided|migrated|reviewed|deployed|configured|removed|updated)\s+[\w\s]{3,40}", ]: topics.extend(re.findall(pattern, content[:5000], re.IGNORECASE)) - topics = list(dict.fromkeys(t.strip().lower() for t in topics))[:8] + # Also grab section headers if present + for header in re.findall(r"^#{1,3}\s+(.{5,60})$", content[:5000], re.MULTILINE): + topics.append(header.strip()) + # Dedupe preserving order + topics = list(dict.fromkeys(t.strip().lower() for t in topics))[:12] - # Extract first quote - quotes = re.findall(r'"([^"]{15,100})"', content[:5000]) - quote = quotes[0] if quotes else "" + # Extract quotes + quotes = re.findall(r'"([^"]{15,150})"', content[:5000]) - # Build pointer lines - entity_str = ";".join(entities[:5]) if entities else "" + # Build pointer lines — each one is atomic, never split lines = [] for topic in topics: - pointer = f"{topic}|{entity_str}|→{','.join(drawer_ids[:3])}" - lines.append(pointer) - if quote: - lines.append(f'"{quote}"|{entity_str}|→{",".join(drawer_ids[:3])}') + lines.append(f"{topic}|{entity_str}|→{drawer_ref}") + for quote in quotes[:3]: + lines.append(f'"{quote}"|{entity_str}|→{drawer_ref}') + + # Always have at least one line if not lines: - lines.append(f"{wing}/{room}|{entity_str}|→{','.join(drawer_ids[:3])}") + name = Path(source_file).stem[:40] + lines.append(f"{wing}/{room}/{name}|{entity_str}|→{drawer_ref}") - return "\n".join(lines) + return lines -def upsert_closet(closets_col, closet_id, closet_text, metadata): - """Add or update a closet. Respects CLOSET_CHAR_LIMIT.""" - try: - existing = closets_col.get(ids=[closet_id]) - if existing.get("ids"): - old_text = existing["documents"][0] - if len(old_text) + len(closet_text) + 1 <= CLOSET_CHAR_LIMIT: - closet_text = old_text + "\n" + closet_text - # else: start fresh — old closet was full - except Exception: - pass - closets_col.upsert(documents=[closet_text], ids=[closet_id], metadatas=[metadata]) +def upsert_closet_lines(closets_col, closet_id_base, lines, metadata): + """Add topic lines to closets. Never splits a topic mid-line. + + If adding a line WHOLE would exceed CLOSET_CHAR_LIMIT, a new closet + is created. Some closets may have less than 1500 chars — that's fine. + Every topic is complete and readable. + + Returns the number of closets written. + """ + closet_num = 1 + current_lines = [] + current_chars = 0 + closets_written = 0 + + def _flush(): + nonlocal closets_written + if not current_lines: + return + closet_id = f"{closet_id_base}_{closet_num:02d}" + text = "\n".join(current_lines) + + # Check if closet already has content — append if room + try: + existing = closets_col.get(ids=[closet_id]) + if existing.get("ids") and existing["documents"][0]: + old = existing["documents"][0] + if len(old) + len(text) + 1 <= CLOSET_CHAR_LIMIT: + text = old + "\n" + text + except Exception: + pass + + closets_col.upsert(documents=[text], ids=[closet_id], metadatas=[metadata]) + closets_written += 1 + + for line in lines: + line_len = len(line) + # Would this line fit whole in the current closet? + if current_chars > 0 and current_chars + line_len + 1 > CLOSET_CHAR_LIMIT: + # Doesn't fit — flush current closet, start new one + _flush() + closet_num += 1 + current_lines = [] + current_chars = 0 + + current_lines.append(line) + current_chars += line_len + 1 # +1 for newline + + _flush() + return closets_written @contextlib.contextmanager From ee60cad652d89b2302d65f797f8cf2f997bccc87 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 13 Apr 2026 07:38:43 -0300 Subject: [PATCH 05/42] =?UTF-8?q?docs:=20add=20CLOSETS.md=20=E2=80=94=20cl?= =?UTF-8?q?oset=20layer=20overview?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cherry-picked the docs portion of 67e4ac6 to accompany the closet feature. Test coverage for closets is omnibus with tests for entity metadata and BM25 (see PR targeting those features) and will land together in a follow-up. Co-Authored-By: MSL <232237854+milla-jovovich@users.noreply.github.com> --- docs/CLOSETS.md | 79 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 docs/CLOSETS.md diff --git a/docs/CLOSETS.md b/docs/CLOSETS.md new file mode 100644 index 0000000..c4e9615 --- /dev/null +++ b/docs/CLOSETS.md @@ -0,0 +1,79 @@ +# Closets — The Searchable Index Layer + +## What closets are + +Drawers hold your verbatim content. Closets are the index — compact pointers that tell the searcher which drawers to open. + +``` +CLOSET: "built auth system|Ben;Igor|→drawer_api_auth_a1b2c3" + ↑ topic ↑ entities ↑ points to this drawer +``` + +An agent searching "who built the auth?" hits the closet first (fast scan of short text), then opens the referenced drawer to get the full verbatim content. + +## Lifecycle + +### When are closets created? + +Closets are created during `mempalace mine`. For each file mined: +1. Content is chunked into drawers (verbatim, ~800 chars each) +2. Topics, entities, and quotes are extracted from the content +3. A closet is created with pointer lines to those drawers + +### What's inside a closet? + +Each line is one atomic topic pointer: +``` +topic description|entity1;entity2|→drawer_id_1,drawer_id_2 +"verbatim quote from the content"|entity1|→drawer_id_3 +``` + +Topics are never split across closets. If adding a topic would exceed 1,500 characters, a new closet is created. + +### When do closets update? + +When a file is re-mined (content changed), its drawers are replaced and new closets are built from the fresh content. The old closet content is replaced via upsert. + +### What about stale topics? + +If a file's content changes and a topic no longer exists, the closet is rebuilt entirely from the new content — stale topics are gone. Closets are tied to source files, not to individual topics. + +If you add content to an existing file (e.g., a daily diary growing throughout the day), new topics are appended to the existing closet until the 1,500-char limit, then a new closet is created. + +### Do closets survive palace rebuilds? + +Closets are stored in the `mempalace_closets` ChromaDB collection alongside `mempalace_drawers`. If you delete and rebuild the palace, closets are recreated during the next `mempalace mine`. + +## How search uses closets + +``` +Query → search mempalace_closets (fast, small documents) + ↓ + top closet hits → extract drawer IDs from pointer lines + ↓ + fetch drawers from mempalace_drawers (full verbatim content) + ↓ + BM25 hybrid re-rank (keyword match + vector similarity) + ↓ + return results to user +``` + +If no closets exist (palace created before this feature), search falls back to direct drawer search. Closets are created on next mine. + +## Limits + +| Setting | Value | Reason | +|---------|-------|--------| +| Max closet size | 1,500 chars | Leaves buffer under ChromaDB's working limit | +| Max topics per file | 12 | Keeps closets focused | +| Max quotes per file | 3 | Most relevant only | +| Max entities per pointer | 5 | Top names by frequency | +| Max response chars | 10,000 | Prevents hydration blowup on large files | + +## For developers + +Closet functions live in `mempalace/palace.py`: +- `get_closets_collection()` — get the closets ChromaDB collection +- `build_closet_lines()` — extract topics/entities/quotes into pointer lines +- `upsert_closet_lines()` — write lines to closets respecting the char limit +- `CLOSET_CHAR_LIMIT` — the 1,500 char limit constant From f935e85ead2c647a28f33b84d1287164b1469a52 Mon Sep 17 00:00:00 2001 From: MSL <232237854+milla-jovovich@users.noreply.github.com> Date: Mon, 13 Apr 2026 01:47:19 -0700 Subject: [PATCH 06/42] feat: entity metadata + diary ingest + BM25 hybrid search Three features that close the gap between the architecture docs and the actual codebase: 1. Entity metadata on drawers and closets - _extract_entities_for_metadata() pulls names from known_entities.json + proper nouns appearing 2+ times - Stamped as "entities" field in ChromaDB metadata - Enables filterable search by person/project name 2. Day-based diary ingest (diary_ingest.py) - ONE drawer per day, upserted as the day grows - Closets pack topics atomically, never split mid-topic - Tracks entry count in state file, only processes new entries - Usage: python -m mempalace.diary_ingest --dir ~/summaries 3. BM25 hybrid search in searcher.py - _bm25_score() keyword matching complements vector similarity - _hybrid_rank() combines both signals (60% vector, 40% BM25) - Catches exact name/term matches that embeddings miss - Applied to both closet-first and direct drawer search paths 689/689 tests pass. Co-Authored-By: Claude Opus 4.6 (1M context) --- mempalace/diary_ingest.py | 173 ++++++++++++++++++++++++++++++++++++++ mempalace/miner.py | 49 ++++++++++- mempalace/searcher.py | 64 +++++++++++++- 3 files changed, 282 insertions(+), 4 deletions(-) create mode 100644 mempalace/diary_ingest.py diff --git a/mempalace/diary_ingest.py b/mempalace/diary_ingest.py new file mode 100644 index 0000000..e64e139 --- /dev/null +++ b/mempalace/diary_ingest.py @@ -0,0 +1,173 @@ +""" +diary_ingest.py — Ingest daily summary files into the palace. + +Architecture: +- ONE drawer per day — full verbatim content, upserted as the day grows +- Closets pack topics up to 1500 chars, never split mid-topic +- Only new entries are processed (tracks entry count in state file) +- Entities extracted and stamped on metadata for filterable search + +Usage: + python -m mempalace.diary_ingest --dir ~/daily_summaries --palace ~/.mempalace/palace + python -m mempalace.diary_ingest --dir ~/daily_summaries --palace ~/.mempalace/palace --force +""" + +import hashlib +import json +import os +import re +from datetime import datetime, timezone +from pathlib import Path + +from .palace import ( + get_collection, + get_closets_collection, + build_closet_lines, + upsert_closet_lines, + CLOSET_CHAR_LIMIT, +) +from .miner import _extract_entities_for_metadata + + +DIARY_ENTRY_RE = re.compile(r"^## .+", re.MULTILINE) + + +def _split_entries(text): + """Split diary text into (header, body) pairs per ## entry.""" + parts = DIARY_ENTRY_RE.split(text) + headers = DIARY_ENTRY_RE.findall(text) + entries = [] + for i, header in enumerate(headers): + body = parts[i + 1] if i + 1 < len(parts) else "" + entries.append((header.strip(), body.strip())) + return entries + + +def ingest_diaries( + diary_dir, + palace_path, + wing="diary", + force=False, +): + """Ingest daily summary files into the palace. + + Each date file gets ONE drawer (upserted as day grows) and + closets that pack topics atomically up to 1500 chars. + """ + diary_dir = Path(diary_dir).expanduser().resolve() + if not diary_dir.exists(): + print(f"Diary directory not found: {diary_dir}") + return + + diary_files = sorted(diary_dir.glob("*.md")) + if not diary_files: + print(f"No .md files in {diary_dir}") + return + + # State tracks which entries have been closeted per file + state_file = diary_dir / ".diary_ingest_state.json" + state = {} if force else ( + json.loads(state_file.read_text()) if state_file.exists() else {} + ) + + drawers_col = get_collection(palace_path) + closets_col = get_closets_collection(palace_path) + + days_updated = 0 + closets_created = 0 + + for diary_path in diary_files: + text = diary_path.read_text(encoding="utf-8", errors="replace") + if len(text.strip()) < 50: + continue + + date_match = re.match(r"(\d{4}-\d{2}-\d{2})", diary_path.stem) + if not date_match: + continue + date_str = date_match.group(1) + + # Skip if content hasn't changed + prev_size = state.get(diary_path.name, {}).get("size", 0) + curr_size = len(text) + if curr_size == prev_size and not force: + continue + + now_iso = datetime.now(timezone.utc).isoformat() + drawer_id = f"drawer_diary_{date_str}" + + # Extract entities from full day text + entities = _extract_entities_for_metadata(text) + + # UPSERT the day's drawer (full verbatim, replaces as day grows) + drawer_meta = { + "date": date_str, + "wing": wing, + "room": "daily", + "source_file": str(diary_path), + "source_session": "daily_diary", + "filed_at": now_iso, + } + if entities: + drawer_meta["entities"] = entities + drawers_col.upsert( + documents=[text], + ids=[drawer_id], + metadatas=[drawer_meta], + ) + + # Split into entries and find new ones + entries = _split_entries(text) + prev_entry_count = state.get(diary_path.name, {}).get("entry_count", 0) + new_entries = entries[prev_entry_count:] if not force else entries + + if new_entries: + # Build closet lines from new entries + all_lines = [] + for header, body in new_entries: + entry_text = f"{header}\n{body}" + entry_lines = build_closet_lines( + str(diary_path), [drawer_id], entry_text, wing, "daily" + ) + all_lines.extend(entry_lines) + + if all_lines: + closet_id_base = f"closet_diary_{date_str}" + closet_meta = { + "date": date_str, + "wing": wing, + "room": "daily", + "source_file": str(diary_path), + "filed_at": now_iso, + } + if entities: + closet_meta["entities"] = entities + n = upsert_closet_lines( + closets_col, closet_id_base, all_lines, closet_meta + ) + closets_created += n + + state[diary_path.name] = { + "size": curr_size, + "entry_count": len(entries), + "ingested_at": now_iso, + } + days_updated += 1 + + state_file.write_text(json.dumps(state, indent=2)) + if days_updated: + print(f"Diary: {days_updated} days updated, {closets_created} new closets") + + return {"days_updated": days_updated, "closets_created": closets_created} + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Ingest daily summaries into the palace") + parser.add_argument("--dir", required=True, help="Path to daily_summaries directory") + parser.add_argument("--palace", default=os.path.expanduser("~/.mempalace/palace")) + parser.add_argument("--wing", default="diary") + parser.add_argument("--force", action="store_true") + args = parser.parse_args() + + ingest_diaries(args.dir, args.palace, wing=args.wing, force=args.force) diff --git a/mempalace/miner.py b/mempalace/miner.py index 37e507a..e2f6528 100644 --- a/mempalace/miner.py +++ b/mempalace/miner.py @@ -371,6 +371,43 @@ def chunk_text(content: str, source_file: str) -> list: # ============================================================================= +def _extract_entities_for_metadata(content: str) -> str: + """Extract entity names from content for metadata tagging. + + Returns semicolon-separated string of entity names found in the text, + suitable for ChromaDB metadata filtering. + """ + import re + # Load known entities from registry if available + known_names = set() + registry_path = os.path.join(os.path.expanduser("~"), ".mempalace", "known_entities.json") + if os.path.exists(registry_path): + try: + import json + kd = json.loads(open(registry_path).read()) + for cat in kd.values(): + if isinstance(cat, list): + known_names.update(cat) + except Exception: + pass + + matched = set() + # Match known entities + for name in known_names: + if re.search(r'(?= 2 and len(w) > 2: + matched.add(w) + + return ";".join(sorted(matched))[:500] if matched else "" + + def add_drawer( collection, wing: str, room: str, content: str, source_file: str, chunk_index: int, agent: str ): @@ -390,6 +427,10 @@ def add_drawer( metadata["source_mtime"] = os.path.getmtime(source_file) except OSError: pass + # Tag with entity names for filterable search + entities = _extract_entities_for_metadata(content) + if entities: + metadata["entities"] = entities collection.upsert( documents=[content], ids=[drawer_id], @@ -479,13 +520,17 @@ def process_file( ] closet_lines = build_closet_lines(source_file, drawer_ids, content, wing, room) closet_id_base = f"closet_{wing}_{room}_{hashlib.sha256(source_file.encode()).hexdigest()[:24]}" - upsert_closet_lines(closets_col, closet_id_base, closet_lines, { + entities = _extract_entities_for_metadata(content) + closet_meta = { "wing": wing, "room": room, "source_file": source_file, "drawer_count": drawers_added, "filed_at": datetime.now().isoformat(), - }) + } + if entities: + closet_meta["entities"] = entities + upsert_closet_lines(closets_col, closet_id_base, closet_lines, closet_meta) return drawers_added, room diff --git a/mempalace/searcher.py b/mempalace/searcher.py index 70fd615..37795fc 100644 --- a/mempalace/searcher.py +++ b/mempalace/searcher.py @@ -2,11 +2,14 @@ """ searcher.py — Find anything. Exact words. -Semantic search against the palace. -Returns verbatim text — the actual words, never summaries. +Hybrid search: BM25 keyword matching + vector semantic similarity. +Searches closets first (fast index), then hydrates full drawer content. +Falls back to direct drawer search for palaces without closets. """ import logging +import math +import re from pathlib import Path from .palace import get_collection, get_closets_collection @@ -18,6 +21,59 @@ class SearchError(Exception): """Raised when search cannot proceed (e.g. no palace found).""" +def _bm25_score(query: str, document: str, k1: float = 1.5, b: float = 0.75, avg_dl: float = 500) -> float: + """Simple BM25 score for a single document against a query. + + This is a lightweight keyword-matching signal that complements vector + similarity. It catches exact matches that embeddings might miss + (e.g., specific names, project codes, error messages). + """ + query_terms = set(re.findall(r'\w{2,}', query.lower())) + doc_terms = re.findall(r'\w{2,}', document.lower()) + if not query_terms or not doc_terms: + return 0.0 + doc_len = len(doc_terms) + term_freq = {} + for t in doc_terms: + term_freq[t] = term_freq.get(t, 0) + 1 + + score = 0.0 + for term in query_terms: + tf = term_freq.get(term, 0) + if tf > 0: + # Simplified IDF — treat each query term as moderately rare + idf = math.log(2.0) + numerator = tf * (k1 + 1) + denominator = tf + k1 * (1 - b + b * doc_len / avg_dl) + score += idf * numerator / denominator + return score + + +def _hybrid_rank(vector_results, query: str, vector_weight: float = 0.6, bm25_weight: float = 0.4): + """Re-rank results using both vector distance and BM25 keyword score. + + Returns results sorted by combined score (higher = better). + """ + if not vector_results: + return vector_results + + # Normalize vector distances to 0-1 similarity + max_dist = max(r.get("distance", 1.0) for r in vector_results) or 1.0 + for r in vector_results: + vec_sim = max(0.0, 1 - r.get("distance", 1.0) / max(max_dist, 0.001)) + bm25 = _bm25_score(query, r.get("text", "")) + # Normalize BM25 to roughly 0-1 range + bm25_norm = min(bm25 / 3.0, 1.0) + r["_hybrid_score"] = vector_weight * vec_sim + bm25_weight * bm25_norm + r["bm25_score"] = round(bm25, 3) + + vector_results.sort(key=lambda r: r["_hybrid_score"], reverse=True) + # Clean up internal field + for r in vector_results: + del r["_hybrid_score"] + return vector_results + + def build_where_filter(wing: str = None, room: str = None) -> dict: """Build ChromaDB where filter for wing/room filtering.""" if wing and room: @@ -186,6 +242,8 @@ def search_memories( break if hits: + # Re-rank with BM25 hybrid scoring + hits = _hybrid_rank(hits, query) return { "query": query, "filters": {"wing": wing, "room": room}, @@ -227,6 +285,8 @@ def search_memories( } ) + # Re-rank with BM25 hybrid scoring + hits = _hybrid_rank(hits, query) return { "query": query, "filters": {"wing": wing, "room": room}, From f72ffbbcb2766b04be64e33a8b2e66788d4b22eb Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 13 Apr 2026 07:42:25 -0300 Subject: [PATCH 07/42] test: add tests for mine_lock, closets, entity metadata, BM25, diary Trimmed version of Milla's omnibus test_closets.py to only cover features present in this PR stack (#784 lock, #788 closets, this PR's entity/BM25/diary). Strip-noise tests will land with #785; tunnel tests will land with the tunnels PR. 16/16 pass. Co-Authored-By: MSL <232237854+milla-jovovich@users.noreply.github.com> --- tests/test_closets.py | 201 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 tests/test_closets.py diff --git a/tests/test_closets.py b/tests/test_closets.py new file mode 100644 index 0000000..b365102 --- /dev/null +++ b/tests/test_closets.py @@ -0,0 +1,201 @@ +"""Tests for the closet layer, mine_lock, entity metadata, BM25 hybrid search, +and diary ingest. + +Content derived from Milla's omnibus test file; trimmed to only the features +present in this PR stack (#784 lock, #788 closets, this PR's entity/BM25/diary). +Strip-noise tests live with #785; tunnel tests live with the tunnels PR. +""" + +import os +import tempfile +import threading +import time + +from mempalace.palace import ( + CLOSET_CHAR_LIMIT, + build_closet_lines, + get_closets_collection, + get_collection, + mine_lock, + upsert_closet_lines, +) +from mempalace.miner import _extract_entities_for_metadata +from mempalace.searcher import _bm25_score, _hybrid_rank + + +# ── mine_lock ──────────────────────────────────────────────────────────── + + +class TestMineLock: + def test_lock_acquires_and_releases(self): + with mine_lock("/tmp/test_lock_file.txt"): + lock_dir = os.path.expanduser("~/.mempalace/locks") + assert os.path.isdir(lock_dir) + + def test_lock_blocks_concurrent_access(self): + results = [] + + def worker(name): + start = time.time() + with mine_lock("/tmp/same_file_lock_test.txt"): + results.append((name, time.time() - start)) + time.sleep(0.2) + + t1 = threading.Thread(target=worker, args=("a",)) + t2 = threading.Thread(target=worker, args=("b",)) + t1.start() + time.sleep(0.05) + t2.start() + t1.join() + t2.join() + + # Second thread should have waited + wait_times = sorted(results, key=lambda x: x[1]) + assert wait_times[1][1] > 0.1, "Second thread should block" + + +# ── closet lines ───────────────────────────────────────────────────────── + + +class TestBuildClosetLines: + def test_returns_list_of_lines(self): + lines = build_closet_lines( + "/tmp/test.py", ["drawer_001"], "We built the auth system", "code", "general" + ) + assert isinstance(lines, list) + assert len(lines) >= 1 + + def test_each_line_has_pointer(self): + lines = build_closet_lines( + "/tmp/test.py", + ["drawer_001", "drawer_002"], + "We built the auth system and tested the login flow", + "code", + "general", + ) + for line in lines: + assert "→" in line, f"Line missing pointer: {line}" + + def test_fallback_when_no_topics(self): + lines = build_closet_lines( + "/tmp/test.py", ["drawer_001"], "short text", "wing", "room" + ) + assert len(lines) >= 1 + assert "→" in lines[0] + + +# ── upsert_closet_lines ───────────────────────────────────────────────── + + +class TestUpsertClosetLines: + def test_writes_closets(self): + with tempfile.TemporaryDirectory() as tmpdir: + col = get_closets_collection(tmpdir) + lines = [ + "topic one|Entity1|→drawer_001", + "topic two|Entity2|→drawer_002", + ] + n = upsert_closet_lines(col, "test_closet", lines, {"wing": "test"}) + assert n >= 1 + assert col.count() >= 1 + + def test_never_splits_mid_topic(self): + with tempfile.TemporaryDirectory() as tmpdir: + col = get_closets_collection(tmpdir) + # Create lines that together exceed CLOSET_CHAR_LIMIT + lines = [f"topic_{i}|{'x' * 200}|→drawer_{i}" for i in range(20)] + n = upsert_closet_lines(col, "test_closet", lines, {"wing": "test"}) + assert n >= 2, "Should create multiple closets" + + # Verify each closet has complete lines + all_data = col.get(include=["documents"]) + for doc in all_data["documents"]: + for line in doc.strip().split("\n"): + assert "→" in line, f"Split topic found: {line}" + + def test_respects_char_limit(self): + with tempfile.TemporaryDirectory() as tmpdir: + col = get_closets_collection(tmpdir) + lines = [f"topic_{i}|entities|→drawer_{i}" for i in range(50)] + upsert_closet_lines(col, "test_closet", lines, {"wing": "test"}) + + all_data = col.get(include=["documents"]) + for doc in all_data["documents"]: + assert len(doc) <= CLOSET_CHAR_LIMIT + 100 # small buffer for existing content + + +# ── entity metadata ────────────────────────────────────────────────────── + + +class TestEntityMetadata: + def test_extracts_capitalized_names(self): + text = "Ben reviewed the code. Ben approved it. Igor flagged two issues. Igor fixed them." + entities = _extract_entities_for_metadata(text) + assert "Ben" in entities + assert "Igor" in entities + + def test_empty_for_no_entities(self): + text = "this is all lowercase with no proper nouns at all" + entities = _extract_entities_for_metadata(text) + assert entities == "" + + def test_semicolon_separated(self): + text = "Alice and Bob met Charlie. Alice said hello. Bob agreed. Charlie laughed." + entities = _extract_entities_for_metadata(text) + assert ";" in entities + + +# ── BM25 hybrid search ────────────────────────────────────────────────── + + +class TestBM25: + def test_bm25_score_positive_for_match(self): + score = _bm25_score("database migration", "We migrated the database to Postgres") + assert score > 0 + + def test_bm25_score_zero_for_no_match(self): + score = _bm25_score("quantum physics", "We built a web application in React") + assert score == 0.0 + + def test_hybrid_rank_reorders(self): + results = [ + {"text": "database schema design for Postgres", "distance": 0.5}, + {"text": "unrelated topic about cooking", "distance": 0.3}, + ] + ranked = _hybrid_rank(results, "database Postgres schema") + # The database result should rank higher despite worse vector distance + assert "database" in ranked[0]["text"] + + +# ── diary ingest ───────────────────────────────────────────────────────── + + +class TestDiaryIngest: + def test_ingest_creates_drawers_and_closets(self): + with tempfile.TemporaryDirectory() as palace_dir: + diary_dir = tempfile.mkdtemp() + # Write a test diary + with open(os.path.join(diary_dir, "2026-04-13.md"), "w") as f: + f.write("# 2026-04-13\n\n## 10:00 PDT — Test\n\nBuilt the auth system.\n") + + from mempalace.diary_ingest import ingest_diaries + + result = ingest_diaries(diary_dir, palace_dir, force=True) + assert result["days_updated"] >= 1 + + # Check drawer exists + drawers = get_collection(palace_dir) + count = drawers.count() + assert count >= 1 + + def test_ingest_skips_unchanged(self): + with tempfile.TemporaryDirectory() as palace_dir: + diary_dir = tempfile.mkdtemp() + with open(os.path.join(diary_dir, "2026-04-13.md"), "w") as f: + f.write("# 2026-04-13\n\n## 10:00 — Test\n\nContent.\n") + + from mempalace.diary_ingest import ingest_diaries + + ingest_diaries(diary_dir, palace_dir, force=True) + result = ingest_diaries(diary_dir, palace_dir) # second run, no force + assert result["days_updated"] == 0 From 1b4ce0b1f8956436d7c6e9e5bb1ef314550db83d Mon Sep 17 00:00:00 2001 From: MSL <232237854+milla-jovovich@users.noreply.github.com> Date: Mon, 13 Apr 2026 02:05:55 -0700 Subject: [PATCH 08/42] feat: explicit cross-wing tunnels for multi-project agents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds active tunnel creation alongside passive tunnel discovery. Passive tunnels (existing): rooms with the same name across wings. Explicit tunnels (new): agent-created links between specific locations. "This API design in project_api relates to the database schema in project_database." New functions in palace_graph.py: - create_tunnel() — link two wing/room pairs with a label - list_tunnels() — list all explicit tunnels, filter by wing - delete_tunnel() — remove a tunnel by ID - follow_tunnels() — from a room, find all connected rooms in other wings with drawer content previews New MCP tools: - mempalace_create_tunnel - mempalace_list_tunnels - mempalace_delete_tunnel - mempalace_follow_tunnels Tunnels stored in ~/.mempalace/tunnels.json (persists across palace rebuilds). Deduplicated by endpoint pair. 689/689 tests pass. Co-Authored-By: Claude Opus 4.6 (1M context) --- mempalace/mcp_server.py | 109 ++++++++++++++++++++++++- mempalace/palace_graph.py | 162 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 270 insertions(+), 1 deletion(-) diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index 4e21426..89b74f7 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -35,7 +35,7 @@ from .version import __version__ import chromadb from .query_sanitizer import sanitize_query from .searcher import search_memories -from .palace_graph import traverse, find_tunnels, graph_stats +from .palace_graph import traverse, find_tunnels, graph_stats, create_tunnel, list_tunnels, delete_tunnel, follow_tunnels from .knowledge_graph import KnowledgeGraph @@ -496,6 +496,63 @@ def tool_graph_stats(): return graph_stats(col=col) +def tool_create_tunnel( + source_wing: str, + source_room: str, + target_wing: str, + target_room: str, + label: str = "", + source_drawer_id: str = None, + target_drawer_id: str = None, +): + """Create an explicit cross-wing tunnel between two palace locations. + + Use when you notice content in one project relates to another project. + Example: an API design discussion in project_api connects to the + database schema in project_database. + """ + try: + source_wing = sanitize_name(source_wing, "source_wing") + source_room = sanitize_name(source_room, "source_room") + target_wing = sanitize_name(target_wing, "target_wing") + target_room = sanitize_name(target_room, "target_room") + except ValueError as e: + return {"error": str(e)} + return create_tunnel( + source_wing, source_room, target_wing, target_room, + label=label, + source_drawer_id=source_drawer_id, + target_drawer_id=target_drawer_id, + ) + + +def tool_list_tunnels(wing: str = None): + """List all explicit cross-wing tunnels, optionally filtered by wing.""" + try: + wing = _sanitize_optional_name(wing, "wing") + except ValueError as e: + return {"error": str(e)} + return list_tunnels(wing) + + +def tool_delete_tunnel(tunnel_id: str): + """Delete an explicit tunnel by its ID.""" + if not tunnel_id or not isinstance(tunnel_id, str): + return {"error": "tunnel_id is required"} + return delete_tunnel(tunnel_id) + + +def tool_follow_tunnels(wing: str, room: str): + """Follow explicit tunnels from a room to see connected drawers in other wings.""" + try: + wing = sanitize_name(wing, "wing") + room = sanitize_name(room, "room") + except ValueError as e: + return {"error": str(e)} + col = _get_collection() + return follow_tunnels(wing, room, col=col) + + # ==================== WRITE TOOLS ==================== @@ -1181,6 +1238,56 @@ TOOLS = { "input_schema": {"type": "object", "properties": {}}, "handler": tool_graph_stats, }, + "mempalace_create_tunnel": { + "description": "Create a cross-wing tunnel linking two palace locations. Use when content in one project relates to another — e.g., an API design in project_api connects to a database schema in project_database.", + "input_schema": { + "type": "object", + "properties": { + "source_wing": {"type": "string", "description": "Wing of the source"}, + "source_room": {"type": "string", "description": "Room in the source wing"}, + "target_wing": {"type": "string", "description": "Wing of the target"}, + "target_room": {"type": "string", "description": "Room in the target wing"}, + "label": {"type": "string", "description": "Description of the connection"}, + "source_drawer_id": {"type": "string", "description": "Optional specific drawer ID"}, + "target_drawer_id": {"type": "string", "description": "Optional specific drawer ID"}, + }, + "required": ["source_wing", "source_room", "target_wing", "target_room"], + }, + "handler": tool_create_tunnel, + }, + "mempalace_list_tunnels": { + "description": "List all explicit cross-wing tunnels. Optionally filter by wing.", + "input_schema": { + "type": "object", + "properties": { + "wing": {"type": "string", "description": "Filter tunnels by wing (shows tunnels where wing is source or target)"}, + }, + }, + "handler": tool_list_tunnels, + }, + "mempalace_delete_tunnel": { + "description": "Delete an explicit tunnel by its ID.", + "input_schema": { + "type": "object", + "properties": { + "tunnel_id": {"type": "string", "description": "Tunnel ID to delete"}, + }, + "required": ["tunnel_id"], + }, + "handler": tool_delete_tunnel, + }, + "mempalace_follow_tunnels": { + "description": "Follow tunnels from a room to see what it connects to in other wings. Returns connected rooms with drawer previews.", + "input_schema": { + "type": "object", + "properties": { + "wing": {"type": "string", "description": "Wing to start from"}, + "room": {"type": "string", "description": "Room to follow tunnels from"}, + }, + "required": ["wing", "room"], + }, + "handler": tool_follow_tunnels, + }, "mempalace_search": { "description": "Semantic search. Returns verbatim drawer content with similarity scores. IMPORTANT: 'query' must contain ONLY search keywords. Use 'context' for background. Results with cosine distance > max_distance are filtered out.", "input_schema": { diff --git a/mempalace/palace_graph.py b/mempalace/palace_graph.py index 5e2e72e..2792d99 100644 --- a/mempalace/palace_graph.py +++ b/mempalace/palace_graph.py @@ -15,7 +15,11 @@ Enables queries like: No external graph DB needed — built from ChromaDB metadata. """ +import hashlib +import json +import os from collections import defaultdict, Counter +from datetime import datetime from .config import MempalaceConfig from .palace import get_collection as _get_palace_collection @@ -228,3 +232,161 @@ def _fuzzy_match(query: str, nodes: dict, n: int = 5): scored.append((room, 0.5)) scored.sort(key=lambda x: -x[1]) return [r for r, _ in scored[:n]] + + +# ============================================================================= +# EXPLICIT TUNNELS — agent-created cross-wing links +# ============================================================================= +# Passive tunnels are discovered from shared room names across wings. +# Explicit tunnels are created by agents when they notice a connection +# between two specific drawers or rooms in different wings/projects. +# +# Stored as a JSON file at ~/.mempalace/tunnels.json so they persist +# across palace rebuilds (not in ChromaDB which can be recreated). + + +_TUNNEL_FILE = os.path.join(os.path.expanduser("~"), ".mempalace", "tunnels.json") + + +def _load_tunnels(): + """Load explicit tunnels from disk.""" + if os.path.exists(_TUNNEL_FILE): + try: + return json.loads(open(_TUNNEL_FILE).read()) + except Exception: + pass + return [] + + +def _save_tunnels(tunnels): + """Save explicit tunnels to disk.""" + os.makedirs(os.path.dirname(_TUNNEL_FILE), exist_ok=True) + with open(_TUNNEL_FILE, "w") as f: + json.dump(tunnels, f, indent=2) + + +def create_tunnel( + source_wing: str, + source_room: str, + target_wing: str, + target_room: str, + label: str = "", + source_drawer_id: str = None, + target_drawer_id: str = None, +): + """Create an explicit tunnel between two locations in the palace. + + Use when an agent notices a connection between two projects/wings + that wouldn't be found by passive room-name matching. + + Args: + source_wing: Wing of the source (e.g., "project_api") + source_room: Room in the source wing + target_wing: Wing of the target (e.g., "project_database") + target_room: Room in the target wing + label: Description of the connection + source_drawer_id: Optional specific drawer ID + target_drawer_id: Optional specific drawer ID + + Returns: + The created tunnel dict. + """ + tunnel_id = hashlib.sha256( + f"{source_wing}/{source_room}↔{target_wing}/{target_room}".encode() + ).hexdigest()[:16] + + tunnel = { + "id": tunnel_id, + "source": {"wing": source_wing, "room": source_room}, + "target": {"wing": target_wing, "room": target_room}, + "label": label, + "created_at": datetime.now().isoformat(), + } + if source_drawer_id: + tunnel["source"]["drawer_id"] = source_drawer_id + if target_drawer_id: + tunnel["target"]["drawer_id"] = target_drawer_id + + tunnels = _load_tunnels() + + # Dedup — don't create if same endpoints already linked + for existing in tunnels: + if existing.get("id") == tunnel_id: + existing.update(tunnel) # update label/drawers + _save_tunnels(tunnels) + return existing + + tunnels.append(tunnel) + _save_tunnels(tunnels) + return tunnel + + +def list_tunnels(wing: str = None): + """List all explicit tunnels, optionally filtered by wing. + + Returns tunnels where the wing appears as either source or target. + """ + tunnels = _load_tunnels() + if wing: + tunnels = [ + t for t in tunnels + if t["source"]["wing"] == wing or t["target"]["wing"] == wing + ] + return tunnels + + +def delete_tunnel(tunnel_id: str): + """Delete an explicit tunnel by ID.""" + tunnels = _load_tunnels() + tunnels = [t for t in tunnels if t.get("id") != tunnel_id] + _save_tunnels(tunnels) + return {"deleted": tunnel_id} + + +def follow_tunnels(wing: str, room: str, col=None, config=None): + """Follow explicit tunnels from a room — returns connected drawers. + + Given a location (wing/room), finds all tunnels leading from or to it, + and optionally fetches the connected drawer content. + """ + tunnels = _load_tunnels() + connections = [] + + for t in tunnels: + src = t["source"] + tgt = t["target"] + + if src["wing"] == wing and src["room"] == room: + connections.append({ + "direction": "outgoing", + "connected_wing": tgt["wing"], + "connected_room": tgt["room"], + "label": t.get("label", ""), + "drawer_id": tgt.get("drawer_id"), + "tunnel_id": t["id"], + }) + elif tgt["wing"] == wing and tgt["room"] == room: + connections.append({ + "direction": "incoming", + "connected_wing": src["wing"], + "connected_room": src["room"], + "label": t.get("label", ""), + "drawer_id": src.get("drawer_id"), + "tunnel_id": t["id"], + }) + + # If we have a collection, fetch drawer content for connected items + if col and connections: + drawer_ids = [c["drawer_id"] for c in connections if c.get("drawer_id")] + if drawer_ids: + try: + results = col.get(ids=drawer_ids, include=["documents", "metadatas"]) + drawer_map = dict(zip(results["ids"], results["documents"])) + for c in connections: + did = c.get("drawer_id") + if did and did in drawer_map: + c["drawer_preview"] = drawer_map[did][:300] + except Exception: + pass + + return connections From e2a9bb05d37712af2dc488769713f97c7059f8e5 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 13 Apr 2026 07:44:32 -0300 Subject: [PATCH 09/42] test: add TestTunnels for cross-wing tunnel operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Appended from Milla's omnibus test_closets.py — covers create, list, delete, dedup, and follow_tunnels behavior. 21/21 pass. Co-Authored-By: MSL <232237854+milla-jovovich@users.noreply.github.com> --- tests/test_closets.py | 61 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tests/test_closets.py b/tests/test_closets.py index b365102..57c989d 100644 --- a/tests/test_closets.py +++ b/tests/test_closets.py @@ -21,6 +21,13 @@ from mempalace.palace import ( ) from mempalace.miner import _extract_entities_for_metadata from mempalace.searcher import _bm25_score, _hybrid_rank +from mempalace.palace_graph import ( + create_tunnel, + list_tunnels, + delete_tunnel, + follow_tunnels, + _TUNNEL_FILE, +) # ── mine_lock ──────────────────────────────────────────────────────────── @@ -199,3 +206,57 @@ class TestDiaryIngest: ingest_diaries(diary_dir, palace_dir, force=True) result = ingest_diaries(diary_dir, palace_dir) # second run, no force assert result["days_updated"] == 0 + + +# ── tunnels ────────────────────────────────────────────────────────────── + + +class TestTunnels: + def setup_method(self): + # Use temp tunnel file + self._orig = _TUNNEL_FILE + import mempalace.palace_graph as pg + self._tmpdir = tempfile.mkdtemp() + pg._TUNNEL_FILE = os.path.join(self._tmpdir, "tunnels.json") + + def teardown_method(self): + import mempalace.palace_graph as pg + pg._TUNNEL_FILE = self._orig + + def test_create_tunnel(self): + t = create_tunnel("wing_api", "auth", "wing_db", "users", label="auth uses users table") + assert t["id"] + assert t["source"]["wing"] == "wing_api" + assert t["target"]["wing"] == "wing_db" + assert t["label"] == "auth uses users table" + + def test_list_tunnels(self): + create_tunnel("wing_a", "room1", "wing_b", "room2") + create_tunnel("wing_a", "room3", "wing_c", "room4") + all_t = list_tunnels() + assert len(all_t) == 2 + filtered = list_tunnels("wing_a") + assert len(filtered) == 2 + filtered_c = list_tunnels("wing_c") + assert len(filtered_c) == 1 + + def test_delete_tunnel(self): + t = create_tunnel("wing_x", "r1", "wing_y", "r2") + delete_tunnel(t["id"]) + assert len(list_tunnels()) == 0 + + def test_dedup_same_endpoints(self): + create_tunnel("wing_a", "r1", "wing_b", "r2", label="first") + create_tunnel("wing_a", "r1", "wing_b", "r2", label="updated") + tunnels = list_tunnels() + assert len(tunnels) == 1 + assert tunnels[0]["label"] == "updated" + + def test_follow_tunnels(self): + create_tunnel("wing_api", "auth", "wing_db", "users") + create_tunnel("wing_api", "auth", "wing_frontend", "login") + connections = follow_tunnels("wing_api", "auth") + assert len(connections) == 2 + wings = {c["connected_wing"] for c in connections} + assert "wing_db" in wings + assert "wing_frontend" in wings From 971b92da5d879e444a5f44210a6f2b084c75bf9a Mon Sep 17 00:00:00 2001 From: MSL <232237854+milla-jovovich@users.noreply.github.com> Date: Mon, 13 Apr 2026 07:46:07 -0300 Subject: [PATCH 10/42] feat(search): drawer-grep returns best-matching chunk + neighbors When a closet hit leads to a source file with many drawers, grep each chunk for query terms and return the BEST-MATCHING chunk + 1 neighbor on each side, instead of dumping the whole file truncated at MAX_HYDRATION_CHARS. Result now includes drawer_index and total_drawers so callers can request adjacent drawers explicitly. Extracted from Milla's commit 935f657 which bundled drawer-grep with closet_llm (deferred pending LLM_ENDPOINT refactor) and fact_checker (separate PR). Ported only the searcher.py change. Co-Authored-By: Claude Opus 4.6 (1M context) --- mempalace/searcher.py | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/mempalace/searcher.py b/mempalace/searcher.py index 37795fc..19b07f4 100644 --- a/mempalace/searcher.py +++ b/mempalace/searcher.py @@ -205,6 +205,8 @@ def search_memories( pass # no closets yet — fall through to direct drawer search # If closets found results, hydrate the referenced drawers + MAX_HYDRATION_CHARS = 10000 # cap to prevent blowup on large source files + if closet_hits: import re seen_sources = set() @@ -215,18 +217,39 @@ def search_memories( continue seen_sources.add(source) - # Find drawers for this source file + # Find drawers for this source file, grep for most relevant chunk try: drawer_results = drawers_col.get( where={"source_file": source}, include=["documents", "metadatas"], ) if drawer_results.get("ids"): - # Combine all drawer content for this file - full_text = "\n\n".join(drawer_results["documents"]) - meta = drawer_results["metadatas"][0] + # Drawer-grep: score each chunk against the query, + # return the best-matching chunk first + surrounding context + query_terms = set(re.findall(r'\w{2,}', query.lower())) + best_idx = 0 + best_score = -1 + for idx, doc in enumerate(drawer_results["documents"]): + doc_lower = doc.lower() + score = sum(1 for t in query_terms if t in doc_lower) + if score > best_score: + best_score = score + best_idx = idx + + # Build result: best chunk first, then neighbors + docs = drawer_results["documents"] + n_docs = len(docs) + # Include best chunk + 1 before + 1 after for context + start = max(0, best_idx - 1) + end = min(n_docs, best_idx + 2) + relevant_text = "\n\n".join(docs[start:end]) + + if len(relevant_text) > MAX_HYDRATION_CHARS: + relevant_text = relevant_text[:MAX_HYDRATION_CHARS] + f"\n\n[...truncated. {n_docs} total drawers. Use mempalace_get_drawer for full content.]" + + meta = drawer_results["metadatas"][best_idx] hits.append({ - "text": full_text, + "text": relevant_text, "wing": meta.get("wing", "unknown"), "room": meta.get("room", "unknown"), "source_file": Path(source).name, @@ -234,6 +257,8 @@ def search_memories( "distance": round(closet_dist, 4), "matched_via": "closet", "closet_preview": closet_doc[:200], + "drawer_index": best_idx, + "total_drawers": n_docs, }) except Exception: pass From 4a6147f903a95c9e573ee98e4cb3d624eb3ff8fc Mon Sep 17 00:00:00 2001 From: MSL <232237854+milla-jovovich@users.noreply.github.com> Date: Mon, 13 Apr 2026 07:47:40 -0300 Subject: [PATCH 11/42] feat: offline fact checker against entity registry + knowledge graph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fact_checker.py verifies text for contradictions against locally stored entities and KG facts. Catches similar-name confusion (Bob vs Bobby), relationship mismatches (KG says husband, text says brother), and stale facts (KG valid_from/valid_to). No hardcoded facts. No network calls. Reads: - ~/.mempalace/known_entities.json - KnowledgeGraph SQLite Usage: from mempalace.fact_checker import check_text issues = check_text("Bob is Alice's brother", palace_path) # CLI python -m mempalace.fact_checker "text" --palace ~/.mempalace/palace Extracted from Milla's commit 935f657 which bundled this with closet_llm (deferred) and drawer-grep (PR #791). Ported only fact_checker.py — verified no network / API imports. Co-Authored-By: Claude Opus 4.6 (1M context) --- mempalace/fact_checker.py | 177 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 mempalace/fact_checker.py diff --git a/mempalace/fact_checker.py b/mempalace/fact_checker.py new file mode 100644 index 0000000..281f117 --- /dev/null +++ b/mempalace/fact_checker.py @@ -0,0 +1,177 @@ +""" +fact_checker.py — Verify text against known facts in the palace. + +Checks AI responses, diary entries, and new content against the +entity registry and knowledge graph for contradictions. Catches: + - Wrong names (similar but different entities) + - Wrong relationships (calling someone the wrong role) + - Stale facts (things that changed — KG has valid_from/valid_to) + +Uses the entity_registry and knowledge_graph — no hardcoded facts. + +Usage: + from mempalace.fact_checker import check_text + issues = check_text("Bob is Alice's brother", palace_path) + # → [{"type": "relationship_mismatch", "detail": "KG says Bob is Alice's husband"}] + + # CLI + python -m mempalace.fact_checker "Bob is Alice's brother" --palace ~/.mempalace/palace +""" + +import os +import re +from pathlib import Path + + +def check_text(text, palace_path=None, config=None): + """Check text for contradictions against known facts. + + Returns list of issues found. Empty list = no contradictions. + """ + if config is None: + from .config import MempalaceConfig + config = MempalaceConfig() + if palace_path is None: + palace_path = config.palace_path + + issues = [] + + # Load known entities + entity_names = _load_known_entities() + + # Check entity name confusion (similar names that might be mixed up) + issues.extend(_check_entity_confusion(text, entity_names)) + + # Check against knowledge graph facts + issues.extend(_check_kg_facts(text, palace_path)) + + return issues + + +def _load_known_entities(): + """Load entity names from the registry.""" + import json + registry_path = os.path.expanduser("~/.mempalace/known_entities.json") + if not os.path.exists(registry_path): + return {} + try: + return json.loads(open(registry_path).read()) + except Exception: + return {} + + +def _check_entity_confusion(text, entity_names): + """Check if text confuses similar entity names.""" + issues = [] + all_names = set() + for cat in entity_names.values(): + if isinstance(cat, list): + all_names.update(cat) + elif isinstance(cat, dict): + all_names.update(cat.keys()) + + # Find names mentioned in text + mentioned = set() + for name in all_names: + if re.search(r'\b' + re.escape(name) + r'\b', text, re.IGNORECASE): + mentioned.add(name) + + # Check for names that are very similar but different (edit distance 1-2) + name_list = sorted(all_names) + for i, name_a in enumerate(name_list): + for name_b in name_list[i + 1:]: + if _edit_distance(name_a.lower(), name_b.lower()) <= 2: + if name_a in mentioned or name_b in mentioned: + if name_a in text and name_b not in text: + issues.append({ + "type": "similar_name", + "detail": f"'{name_a}' mentioned — did you mean '{name_b}'? (similar names in registry)", + "names": [name_a, name_b], + }) + return issues + + +def _check_kg_facts(text, palace_path): + """Check text against knowledge graph for contradictions.""" + issues = [] + try: + from .knowledge_graph import KnowledgeGraph + kg = KnowledgeGraph(palace_path=palace_path) + + # Extract relationship claims from text + # Pattern: "X is Y's Z" or "X's Z is Y" + patterns = [ + (r"(\w+)\s+is\s+(\w+)'s\s+(\w+)", "subject", "possessor", "role"), + (r"(\w+)'s\s+(\w+)\s+is\s+(\w+)", "possessor", "role", "subject"), + ] + + for pattern, *roles in patterns: + for match in re.finditer(pattern, text, re.IGNORECASE): + groups = match.groups() + subject = groups[0] + # Query KG for this entity + try: + facts = kg.query(subject) + if facts: + for fact in facts: + # Check if the claim contradicts a known fact + if fact.get("valid_to") is None: # current fact + kg_pred = fact.get("predicate", "").lower() + claim = match.group(0).lower() + if kg_pred in claim and fact.get("object", "").lower() not in claim: + issues.append({ + "type": "relationship_mismatch", + "detail": f"Text says '{match.group(0)}' but KG says: {subject} {kg_pred} {fact.get('object')}", + "entity": subject, + }) + except Exception: + pass + except Exception: + pass # KG not available — skip + + return issues + + +def _edit_distance(s1, s2): + """Simple Levenshtein distance.""" + if len(s1) < len(s2): + return _edit_distance(s2, s1) + if len(s2) == 0: + return len(s1) + prev = list(range(len(s2) + 1)) + for i, c1 in enumerate(s1): + curr = [i + 1] + for j, c2 in enumerate(s2): + curr.append(min( + prev[j + 1] + 1, + curr[j] + 1, + prev[j] + (0 if c1 == c2 else 1), + )) + prev = curr + return prev[-1] + + +if __name__ == "__main__": + import argparse + import json + + parser = argparse.ArgumentParser(description="Check text against known facts") + parser.add_argument("text", nargs="?", help="Text to check") + parser.add_argument("--palace", default=os.path.expanduser("~/.mempalace/palace")) + parser.add_argument("--stdin", action="store_true", help="Read from stdin") + args = parser.parse_args() + + if args.stdin: + import sys + text = sys.stdin.read() + elif args.text: + text = args.text + else: + print("Provide text as argument or use --stdin") + exit(1) + + issues = check_text(text, palace_path=args.palace) + if issues: + print(json.dumps(issues, indent=2)) + else: + print("No contradictions found.") From 4d581cbb730b26d78e29e4e85115b948e0c0603e Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 13 Apr 2026 07:51:46 -0300 Subject: [PATCH 12/42] =?UTF-8?q?feat:=20optional=20LLM-based=20closet=20r?= =?UTF-8?q?egeneration=20=E2=80=94=20bring-your-own=20endpoint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds mempalace/closet_llm.py as an OPTIONAL path for richer closet generation. Regex closets remain the default and cover the local-first promise; users who want LLM-quality topics can bring their own endpoint. Configuration (env or CLI flag): LLM_ENDPOINT — OpenAI-compatible base URL (required) LLM_KEY — bearer token (optional; local inference skips this) LLM_MODEL — model name (required) Works with Ollama, vLLM, llama.cpp servers, OpenAI, OpenRouter, and any other provider that speaks OpenAI-compatible /chat/completions. Zero new dependencies — uses stdlib urllib. Replaces the original Anthropic-SDK-hardcoded version of this module from Milla's branch (commit 935f657). Same prompt, same parsing, same regenerate_closets flow; only the transport was generalised so the feature doesn't lock users into a specific vendor or require API keys for core memory operations (CLAUDE.md, "Local-first, zero API"). Includes 13 unit tests covering config resolution, request shape, auth-header omission when no key is set, code-fence stripping, and missing-config error path. All mocked — zero network calls in tests. Co-Authored-By: MSL <232237854+milla-jovovich@users.noreply.github.com> --- mempalace/closet_llm.py | 345 +++++++++++++++++++++++++++++++++++++++ tests/test_closet_llm.py | 222 +++++++++++++++++++++++++ 2 files changed, 567 insertions(+) create mode 100644 mempalace/closet_llm.py create mode 100644 tests/test_closet_llm.py diff --git a/mempalace/closet_llm.py b/mempalace/closet_llm.py new file mode 100644 index 0000000..35ec6d6 --- /dev/null +++ b/mempalace/closet_llm.py @@ -0,0 +1,345 @@ +""" +closet_llm.py — Generate closets via a user-configured LLM for richer indexing. + +The regex-based closet extraction catches action verbs, headers, and proper +nouns — but misses implicit topics, foreign-language content, and contextual +references. An LLM reads everything and produces better closets. + +This module is **OPTIONAL and opt-in**. Regex closets are always created by +the miner; this path regenerates them afterward using whatever LLM the user +chooses. Core memory operations remain API-free by design (see CLAUDE.md, +"Local-first, zero API"). + +## Bring-your-own-LLM configuration + +The endpoint is any OpenAI-compatible Chat Completions URL: + + LLM_ENDPOINT=http://localhost:11434/v1 # Ollama + LLM_ENDPOINT=http://localhost:8000/v1 # vLLM, llama.cpp + LLM_ENDPOINT=https://api.openai.com/v1 + LLM_ENDPOINT=https://openrouter.ai/api/v1 + LLM_ENDPOINT=https://api.anthropic.com/v1 # when proxied through a compat layer + +Set: + LLM_ENDPOINT — base URL (required) + LLM_KEY — bearer token (optional; local inference usually doesn't need it) + LLM_MODEL — model name (required), e.g. "gpt-4o-mini", "llama3:8b", "qwen2.5:7b" + +Or pass flags on the CLI (flags win over env): + + python -m mempalace.closet_llm \\ + --palace ~/.mempalace/palace \\ + --endpoint http://localhost:11434/v1 \\ + --model llama3:8b + +No vendor lock-in. No hidden dependency on any specific provider. Zero deps +added to pyproject — uses stdlib urllib. +""" + +import json +import os +import re +import time +import urllib.request +import urllib.error +from datetime import datetime +from typing import Optional + +from .palace import get_collection, get_closets_collection, upsert_closet_lines + +MAX_CONTENT_CHARS = 30000 +MAX_OUTPUT_TOKENS = 1500 +HTTP_TIMEOUT_S = 60 + +PROMPT_TEMPLATE = """You are reading content filed in a memory palace. Generate a +topic-dense index that will be used to find this content later when someone searches. + +Source: {source_file} +Wing: {wing} | Room: {room} + +CONTENT: +{content} + +--- + +Output a JSON object with EXACTLY these fields: + +{{ + "topics": ["distinctive_word_or_phrase_1", "topic_2", ...], + "quotes": ["[Speaker] verbatim quote", ...], + "summary": "2-3 sentences describing what this content is about." +}} + +RULES: +- Topics: 8-15 entries. Include proper nouns (names, places, projects), + distinctive technical terms, and key concepts. NOT generic words like + "conversation" or "discussion". +- Quotes: 2-5 entries. EXACT verbatim from the content, not paraphrased. + Attribute with [Speaker] prefix if speaker is identifiable. +- Summary: mention WHO, WHAT, and WHY. No filler. +- Write in the same language as the content. +- Output valid JSON only. No code fences. No commentary. +""" + + +class LLMConfig: + """Resolved LLM connection config. CLI flags > env vars.""" + + def __init__( + self, + endpoint: Optional[str] = None, + key: Optional[str] = None, + model: Optional[str] = None, + ): + self.endpoint = (endpoint or os.environ.get("LLM_ENDPOINT", "")).rstrip("/") + self.key = key or os.environ.get("LLM_KEY", "") + self.model = model or os.environ.get("LLM_MODEL", "") + + def missing(self) -> list: + missing = [] + if not self.endpoint: + missing.append("LLM_ENDPOINT (or --endpoint)") + if not self.model: + missing.append("LLM_MODEL (or --model)") + # key is optional — local inference servers (Ollama, vLLM) often don't require one + return missing + + +def _call_llm(cfg: LLMConfig, source_file: str, wing: str, room: str, content: str): + """Single LLM call via OpenAI-compatible /chat/completions. + + Returns (parsed_json_dict_or_None, usage_dict_or_None). + """ + try: + from mempalace.i18n import t + + lang_instruction = t("aaak.instruction") + except Exception: + lang_instruction = "" + + prompt = PROMPT_TEMPLATE.format( + source_file=source_file[:100], + wing=wing, + room=room, + content=content[:MAX_CONTENT_CHARS], + ) + if lang_instruction and "english" not in lang_instruction.lower(): + prompt += f"\n\nLanguage instruction: {lang_instruction}" + + body = json.dumps( + { + "model": cfg.model, + "max_tokens": MAX_OUTPUT_TOKENS, + "messages": [{"role": "user", "content": prompt}], + } + ).encode("utf-8") + + headers = {"Content-Type": "application/json"} + if cfg.key: + headers["Authorization"] = f"Bearer {cfg.key}" + + url = f"{cfg.endpoint}/chat/completions" + + for attempt in range(3): + try: + req = urllib.request.Request(url, data=body, headers=headers, method="POST") + with urllib.request.urlopen(req, timeout=HTTP_TIMEOUT_S) as resp: + raw = resp.read().decode("utf-8") + payload = json.loads(raw) + + text = payload["choices"][0]["message"]["content"].strip() + text = re.sub(r"^```(?:json)?\s*", "", text) + text = re.sub(r"\s*```$", "", text) + parsed = json.loads(text) + return parsed, payload.get("usage") + except json.JSONDecodeError: + return None, None + except urllib.error.HTTPError as e: + # 429 / 503 = retry with backoff + if e.code in (429, 503) and attempt < 2: + time.sleep(2 ** attempt) + continue + return None, None + except Exception as e: + if "rate" in str(e).lower() and attempt < 2: + time.sleep(2 ** attempt) + continue + return None, None + return None, None + + +def _parsed_to_closet_lines(parsed, drawer_ids, entities_str): + """Convert LLM's JSON output to closet pointer lines.""" + lines = [] + drawer_ref = ",".join(drawer_ids[:3]) + + for topic in parsed.get("topics", [])[:15]: + lines.append(f"{topic}|{entities_str}|→{drawer_ref}") + for quote in parsed.get("quotes", [])[:5]: + lines.append(f'{quote}|{entities_str}|→{drawer_ref}') + summary = parsed.get("summary", "") + if summary: + lines.append(f"{summary[:200]}|{entities_str}|→{drawer_ref}") + + return lines + + +def regenerate_closets( + palace_path, + wing=None, + sample=0, + dry_run=False, + cfg: Optional[LLMConfig] = None, +): + """Regenerate closets using a configured LLM for richer topic extraction. + + Reads existing drawers, sends content to the configured endpoint, + replaces regex closets with LLM-generated ones. Regex closets remain + as the fallback whenever the call fails. + """ + if cfg is None: + cfg = LLMConfig() + missing = cfg.missing() + if missing: + print("Error: missing configuration: " + ", ".join(missing)) + print("Set env vars LLM_ENDPOINT / LLM_MODEL (and optionally LLM_KEY),") + print("or pass --endpoint / --model / --key on the CLI.") + return {"error": "missing-config", "missing": missing} + + drawers_col = get_collection(palace_path, create=False) + closets_col = get_closets_collection(palace_path) + + total = drawers_col.count() + if total == 0: + print("No drawers in palace.") + return {"processed": 0} + + all_data = drawers_col.get(limit=total, include=["documents", "metadatas"]) + by_source = {} + for doc_id, doc, meta in zip(all_data["ids"], all_data["documents"], all_data["metadatas"]): + source = meta.get("source_file", "unknown") + w = meta.get("wing", "") + if wing and w != wing: + continue + if source not in by_source: + by_source[source] = {"drawer_ids": [], "content": [], "meta": meta} + by_source[source]["drawer_ids"].append(doc_id) + by_source[source]["content"].append(doc) + + sources = list(by_source.keys()) + if sample > 0: + sources = sources[:sample] + + print(f"Regenerating closets for {len(sources)} source files via {cfg.endpoint} ({cfg.model})...") + if dry_run: + print("DRY RUN — no changes will be written") + + processed = 0 + failed = 0 + total_input = 0 + total_output = 0 + + for i, source in enumerate(sources, 1): + data = by_source[source] + content = "\n\n".join(data["content"]) + meta = data["meta"] + w = meta.get("wing", "") + r = meta.get("room", "") + entities = meta.get("entities", "") + + if dry_run: + print(f" [{i}/{len(sources)}] {os.path.basename(source)} ({len(content)} chars)") + continue + + parsed, usage = _call_llm(cfg, source, w, r, content) + if not parsed: + failed += 1 + print(f" [{i}/{len(sources)}] ✗ {os.path.basename(source)} — LLM failed") + continue + + if usage: + total_input += usage.get("prompt_tokens", 0) + total_output += usage.get("completion_tokens", 0) + + lines = _parsed_to_closet_lines(parsed, data["drawer_ids"], entities) + closet_id_base = f"closet_{w}_{r}_{source.split('/')[-1][:30]}" + + # Delete old regex closets for this source before writing LLM ones + try: + old_ids = closets_col.get( + where={"source_file": source}, include=[] + ).get("ids", []) + if old_ids: + closets_col.delete(ids=old_ids) + except Exception: + pass + + upsert_closet_lines( + closets_col, + closet_id_base, + lines, + { + "wing": w, + "room": r, + "source_file": source, + "generated_by": f"llm:{cfg.model}", + "filed_at": datetime.now().isoformat(), + "entities": entities, + }, + ) + + processed += 1 + n_topics = len(parsed.get("topics", [])) + print(f" [{i}/{len(sources)}] ✓ {os.path.basename(source)} — {n_topics} topics") + + print(f"\nDone. {processed} regenerated, {failed} failed.") + if total_input or total_output: + print(f"Tokens: {total_input:,} in + {total_output:,} out (cost depends on provider)") + + return { + "processed": processed, + "failed": failed, + "input_tokens": total_input, + "output_tokens": total_output, + } + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Regenerate closets via a user-configured LLM (OpenAI-compatible API)" + ) + parser.add_argument( + "--palace", + default=os.path.expanduser("~/.mempalace/palace"), + help="Path to the palace", + ) + parser.add_argument("--wing", default=None, help="Limit to one wing") + parser.add_argument( + "--sample", type=int, default=0, help="Only process first N source files" + ) + parser.add_argument( + "--dry-run", action="store_true", help="List work without calling the LLM" + ) + parser.add_argument( + "--endpoint", + default=None, + help="LLM base URL (overrides $LLM_ENDPOINT), e.g. http://localhost:11434/v1", + ) + parser.add_argument( + "--key", + default=None, + help="LLM bearer token (overrides $LLM_KEY). Optional for local inference.", + ) + parser.add_argument( + "--model", + default=None, + help='LLM model name (overrides $LLM_MODEL), e.g. "gpt-4o-mini" or "llama3:8b"', + ) + args = parser.parse_args() + + cfg = LLMConfig(endpoint=args.endpoint, key=args.key, model=args.model) + regenerate_closets( + args.palace, wing=args.wing, sample=args.sample, dry_run=args.dry_run, cfg=cfg + ) diff --git a/tests/test_closet_llm.py b/tests/test_closet_llm.py new file mode 100644 index 0000000..762e16d --- /dev/null +++ b/tests/test_closet_llm.py @@ -0,0 +1,222 @@ +"""Unit tests for the optional LLM-based closet regeneration. + +These tests don't hit the network. They mock urllib to verify: +- LLMConfig correctly reads env vars and CLI overrides +- missing config is reported cleanly +- the OpenAI-compatible request shape is correct +- response parsing handles the standard chat-completions payload +""" + +import io +import json +import os +import tempfile +from unittest.mock import patch + +import pytest + +from mempalace.closet_llm import ( + LLMConfig, + _call_llm, + _parsed_to_closet_lines, + regenerate_closets, +) + + +# ── LLMConfig ───────────────────────────────────────────────────────────── + + +class TestLLMConfig: + def test_reads_env_vars(self, monkeypatch): + monkeypatch.setenv("LLM_ENDPOINT", "http://localhost:11434/v1") + monkeypatch.setenv("LLM_KEY", "sk-abc") + monkeypatch.setenv("LLM_MODEL", "llama3:8b") + c = LLMConfig() + assert c.endpoint == "http://localhost:11434/v1" + assert c.key == "sk-abc" + assert c.model == "llama3:8b" + + def test_cli_flags_override_env(self, monkeypatch): + monkeypatch.setenv("LLM_ENDPOINT", "http://env-endpoint/v1") + monkeypatch.setenv("LLM_MODEL", "env-model") + c = LLMConfig(endpoint="http://flag-endpoint/v1", model="flag-model") + assert c.endpoint == "http://flag-endpoint/v1" + assert c.model == "flag-model" + + def test_trailing_slash_stripped(self): + c = LLMConfig(endpoint="http://foo/v1/", model="m") + assert c.endpoint == "http://foo/v1" + + def test_missing_reports_required(self, monkeypatch): + monkeypatch.delenv("LLM_ENDPOINT", raising=False) + monkeypatch.delenv("LLM_KEY", raising=False) + monkeypatch.delenv("LLM_MODEL", raising=False) + c = LLMConfig() + missing = c.missing() + assert any("ENDPOINT" in m for m in missing) + assert any("MODEL" in m for m in missing) + # key is optional + assert not any("KEY" in m for m in missing) + + def test_key_is_optional(self, monkeypatch): + monkeypatch.delenv("LLM_KEY", raising=False) + c = LLMConfig(endpoint="http://local/v1", model="m") + assert c.missing() == [] + + +# ── _parsed_to_closet_lines ────────────────────────────────────────────── + + +class TestParsedToLines: + def test_topics_become_pointers(self): + parsed = {"topics": ["authentication", "jwt tokens"], "quotes": [], "summary": ""} + lines = _parsed_to_closet_lines(parsed, ["d1", "d2"], "Alice;Bob") + assert len(lines) == 2 + assert "authentication|Alice;Bob|→d1,d2" in lines + assert "jwt tokens|Alice;Bob|→d1,d2" in lines + + def test_quotes_and_summary_included(self): + parsed = { + "topics": ["t1"], + "quotes": ["[Igor] we ship Friday"], + "summary": "Release planning discussion", + } + lines = _parsed_to_closet_lines(parsed, ["d1"], "") + joined = "\n".join(lines) + assert "we ship Friday" in joined + assert "Release planning discussion" in joined + + def test_caps_topics_at_15(self): + parsed = {"topics": [f"t{i}" for i in range(20)], "quotes": [], "summary": ""} + lines = _parsed_to_closet_lines(parsed, ["d1"], "") + assert len(lines) == 15 + + +# ── _call_llm (HTTP mocked) ────────────────────────────────────────────── + + +class _FakeResp: + """Mimics urlopen's context-manager response.""" + + def __init__(self, payload: dict, status: int = 200): + self._body = json.dumps(payload).encode("utf-8") + self.status = status + + def __enter__(self): + return self + + def __exit__(self, *a): + return False + + def read(self): + return self._body + + +class TestCallLLM: + def _make_cfg(self): + return LLMConfig( + endpoint="http://localhost:11434/v1", key="sk-test", model="llama3:8b" + ) + + def test_request_shape_and_parsing(self): + cfg = self._make_cfg() + captured = {} + + def fake_urlopen(req, timeout=None): + captured["url"] = req.full_url + captured["headers"] = dict(req.header_items()) + captured["body"] = json.loads(req.data.decode("utf-8")) + return _FakeResp( + { + "choices": [ + { + "message": { + "content": json.dumps( + { + "topics": ["postgres"], + "quotes": ["[Igor] migrate now"], + "summary": "db migration", + } + ) + } + } + ], + "usage": {"prompt_tokens": 42, "completion_tokens": 17}, + } + ) + + with patch("urllib.request.urlopen", side_effect=fake_urlopen): + parsed, usage = _call_llm(cfg, "/tmp/test.md", "w", "r", "content body") + + assert parsed["topics"] == ["postgres"] + assert usage["prompt_tokens"] == 42 + assert captured["url"] == "http://localhost:11434/v1/chat/completions" + # Authorization header is stored capitalized-then-lowercase depending on urllib version + auth_vals = {v for k, v in captured["headers"].items() if k.lower() == "authorization"} + assert "Bearer sk-test" in auth_vals + assert captured["body"]["model"] == "llama3:8b" + assert captured["body"]["messages"][0]["role"] == "user" + + def test_omits_auth_header_when_no_key(self): + cfg = LLMConfig(endpoint="http://localhost:11434/v1", model="llama3:8b") + captured_headers = {} + + def fake_urlopen(req, timeout=None): + captured_headers.update({k.lower(): v for k, v in req.header_items()}) + return _FakeResp( + { + "choices": [ + {"message": {"content": '{"topics":[],"quotes":[],"summary":""}'}} + ], + "usage": {"prompt_tokens": 0, "completion_tokens": 0}, + } + ) + + with patch("urllib.request.urlopen", side_effect=fake_urlopen): + _call_llm(cfg, "/tmp/x", "w", "r", "c") + + assert "authorization" not in captured_headers + + def test_strips_code_fences(self): + cfg = self._make_cfg() + fenced = '```json\n{"topics":["t1"],"quotes":[],"summary":""}\n```' + + def fake_urlopen(req, timeout=None): + return _FakeResp( + { + "choices": [{"message": {"content": fenced}}], + "usage": {"prompt_tokens": 1, "completion_tokens": 1}, + } + ) + + with patch("urllib.request.urlopen", side_effect=fake_urlopen): + parsed, _ = _call_llm(cfg, "/tmp/x", "w", "r", "c") + assert parsed == {"topics": ["t1"], "quotes": [], "summary": ""} + + def test_returns_none_on_invalid_json(self): + cfg = self._make_cfg() + + def fake_urlopen(req, timeout=None): + return _FakeResp( + { + "choices": [{"message": {"content": "not json at all"}}], + "usage": {"prompt_tokens": 1, "completion_tokens": 1}, + } + ) + + with patch("urllib.request.urlopen", side_effect=fake_urlopen): + parsed, usage = _call_llm(cfg, "/tmp/x", "w", "r", "c") + assert parsed is None + + +# ── regenerate_closets error paths ─────────────────────────────────────── + + +class TestRegenerateClosets: + def test_missing_config_returns_error(self, monkeypatch): + monkeypatch.delenv("LLM_ENDPOINT", raising=False) + monkeypatch.delenv("LLM_MODEL", raising=False) + with tempfile.TemporaryDirectory() as palace: + result = regenerate_closets(palace) + assert result["error"] == "missing-config" + assert any("ENDPOINT" in m for m in result["missing"]) From 8e446f904ce00f58347fa5469ae1dadfa1278637 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 13 Apr 2026 08:43:54 -0300 Subject: [PATCH 13/42] =?UTF-8?q?fix(search):=20hybrid=20closet+drawer=20r?= =?UTF-8?q?etrieval=20=E2=80=94=20closets=20boost,=20never=20gate=20(#795)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mempalace/searcher.py | 240 +++++++++++++++++++----------------- tests/test_hybrid_search.py | 141 +++++++++++++++++++++ 2 files changed, 270 insertions(+), 111 deletions(-) create mode 100644 tests/test_hybrid_search.py diff --git a/mempalace/searcher.py b/mempalace/searcher.py index 19b07f4..06806aa 100644 --- a/mempalace/searcher.py +++ b/mempalace/searcher.py @@ -183,138 +183,156 @@ def search_memories( where = build_where_filter(wing, room) - # Try closet-first search: search the compact index, then hydrate drawers - closet_hits = [] + # Hybrid retrieval: always query drawers directly (the floor), then use + # closet hits to boost rankings. Closets are a ranking SIGNAL, never a + # GATE — direct drawer search is always the baseline. + # + # This avoids the "weak-closets regression" where narrative content + # produces low-signal closets (regex extraction matches few topics) + # and closet-first routing hides drawers that direct search would find. + try: + dkwargs = { + "query_texts": [query], + "n_results": n_results * 3, # over-fetch for re-ranking + "include": ["documents", "metadatas", "distances"], + } + if where: + dkwargs["where"] = where + drawer_results = drawers_col.query(**dkwargs) + except Exception as e: + return {"error": f"Search error: {e}"} + + # Gather closet hits (best-per-source) to build a boost lookup. + closet_boost_by_source = {} # source_file -> (rank, closet_dist, preview) try: closets_col = get_closets_collection(palace_path, create=False) ckwargs = { "query_texts": [query], - "n_results": n_results * 2, # over-fetch closets to find best drawers + "n_results": n_results * 2, "include": ["documents", "metadatas", "distances"], } if where: ckwargs["where"] = where closet_results = closets_col.query(**ckwargs) - if closet_results["documents"][0]: - closet_hits = list(zip( + for rank, (doc, meta, dist) in enumerate( + zip( closet_results["documents"][0], closet_results["metadatas"][0], closet_results["distances"][0], - )) + ) + ): + source = meta.get("source_file", "") + if source and source not in closet_boost_by_source: + closet_boost_by_source[source] = (rank, dist, doc[:200]) except Exception: - pass # no closets yet — fall through to direct drawer search + pass # no closets yet — hybrid degrades to pure drawer search - # If closets found results, hydrate the referenced drawers - MAX_HYDRATION_CHARS = 10000 # cap to prevent blowup on large source files + # Rank-based boost. Ordinal signal (which closet matched best) is more + # reliable than absolute distance on narrative content. + CLOSET_RANK_BOOSTS = [0.40, 0.25, 0.15, 0.08, 0.04] + CLOSET_DISTANCE_CAP = 1.5 # cosine dist > 1.5 = too weak to use as signal - if closet_hits: - import re - seen_sources = set() - hits = [] - for closet_doc, closet_meta, closet_dist in closet_hits: - source = closet_meta.get("source_file", "") - if source in seen_sources: - continue - seen_sources.add(source) - - # Find drawers for this source file, grep for most relevant chunk - try: - drawer_results = drawers_col.get( - where={"source_file": source}, - include=["documents", "metadatas"], - ) - if drawer_results.get("ids"): - # Drawer-grep: score each chunk against the query, - # return the best-matching chunk first + surrounding context - query_terms = set(re.findall(r'\w{2,}', query.lower())) - best_idx = 0 - best_score = -1 - for idx, doc in enumerate(drawer_results["documents"]): - doc_lower = doc.lower() - score = sum(1 for t in query_terms if t in doc_lower) - if score > best_score: - best_score = score - best_idx = idx - - # Build result: best chunk first, then neighbors - docs = drawer_results["documents"] - n_docs = len(docs) - # Include best chunk + 1 before + 1 after for context - start = max(0, best_idx - 1) - end = min(n_docs, best_idx + 2) - relevant_text = "\n\n".join(docs[start:end]) - - if len(relevant_text) > MAX_HYDRATION_CHARS: - relevant_text = relevant_text[:MAX_HYDRATION_CHARS] + f"\n\n[...truncated. {n_docs} total drawers. Use mempalace_get_drawer for full content.]" - - meta = drawer_results["metadatas"][best_idx] - hits.append({ - "text": relevant_text, - "wing": meta.get("wing", "unknown"), - "room": meta.get("room", "unknown"), - "source_file": Path(source).name, - "similarity": round(max(0.0, 1 - closet_dist), 3), - "distance": round(closet_dist, 4), - "matched_via": "closet", - "closet_preview": closet_doc[:200], - "drawer_index": best_idx, - "total_drawers": n_docs, - }) - except Exception: - pass - - if len(hits) >= n_results: - break - - if hits: - # Re-rank with BM25 hybrid scoring - hits = _hybrid_rank(hits, query) - return { - "query": query, - "filters": {"wing": wing, "room": room}, - "total_before_filter": len(closet_hits), - "results": hits, - } - - # Fallback: direct drawer search (no closets yet, or closets empty) - try: - kwargs = { - "query_texts": [query], - "n_results": n_results, - "include": ["documents", "metadatas", "distances"], - } - if where: - kwargs["where"] = where - - results = drawers_col.query(**kwargs) - except Exception as e: - return {"error": f"Search error: {e}"} - - docs = results["documents"][0] - metas = results["metadatas"][0] - dists = results["distances"][0] - - hits = [] - for doc, meta, dist in zip(docs, metas, dists): - # Filter on raw distance before rounding to avoid precision loss + scored = [] + for doc, meta, dist in zip( + drawer_results["documents"][0], + drawer_results["metadatas"][0], + drawer_results["distances"][0], + ): if max_distance > 0.0 and dist > max_distance: continue - hits.append( - { - "text": doc, - "wing": meta.get("wing", "unknown"), - "room": meta.get("room", "unknown"), - "source_file": Path(meta.get("source_file", "?")).name, - "similarity": round(max(0.0, 1 - dist), 3), - "distance": round(dist, 4), - } - ) - # Re-rank with BM25 hybrid scoring + source = meta.get("source_file", "") + boost = 0.0 + matched_via = "drawer" + closet_preview = None + if source in closet_boost_by_source: + c_rank, c_dist, c_preview = closet_boost_by_source[source] + if c_dist <= CLOSET_DISTANCE_CAP and c_rank < len(CLOSET_RANK_BOOSTS): + boost = CLOSET_RANK_BOOSTS[c_rank] + matched_via = "drawer+closet" + closet_preview = c_preview + + effective_dist = dist - boost + entry = { + "text": doc, + "wing": meta.get("wing", "unknown"), + "room": meta.get("room", "unknown"), + "source_file": Path(source).name if source else "?", + "similarity": round(max(0.0, 1 - effective_dist), 3), + "distance": round(dist, 4), + "effective_distance": round(effective_dist, 4), + "closet_boost": round(boost, 3), + "matched_via": matched_via, + "_sort_key": effective_dist, + } + if closet_preview: + entry["closet_preview"] = closet_preview + scored.append(entry) + + scored.sort(key=lambda h: h["_sort_key"]) + hits = scored[:n_results] + + # Drawer-grep enrichment: for top hits whose source file has multiple + # drawers, return the best-matching chunk + its immediate neighbors + # instead of just the single drawer. Preserves the chunk-expansion + # behavior users relied on in the closet-first path. + MAX_HYDRATION_CHARS = 10000 + import re as _re + + for h in hits: + if h["matched_via"] == "drawer": + continue + # Only enrich closet-matched hits (cheap: we already know source matters) + source_name = h["source_file"] + # Look up full source_file by matching suffix in candidate pool + full_source = next( + ( + m.get("source_file", "") + for m in drawer_results["metadatas"][0] + if m.get("source_file", "").endswith(source_name) + ), + "", + ) + if not full_source: + continue + try: + source_drawers = drawers_col.get( + where={"source_file": full_source}, include=["documents"] + ) + except Exception: + continue + docs = source_drawers.get("documents") or [] + if len(docs) <= 1: + continue + + query_terms = set(_re.findall(r"\w{2,}", query.lower())) + best_idx, best_score = 0, -1 + for idx, d in enumerate(docs): + d_lower = d.lower() + s = sum(1 for t in query_terms if t in d_lower) + if s > best_score: + best_score, best_idx = s, idx + + start = max(0, best_idx - 1) + end = min(len(docs), best_idx + 2) + expanded = "\n\n".join(docs[start:end]) + if len(expanded) > MAX_HYDRATION_CHARS: + expanded = ( + expanded[:MAX_HYDRATION_CHARS] + + f"\n\n[...truncated. {len(docs)} total drawers. Use mempalace_get_drawer for full content.]" + ) + h["text"] = expanded + h["drawer_index"] = best_idx + h["total_drawers"] = len(docs) + + # BM25 hybrid re-rank within the final candidate set hits = _hybrid_rank(hits, query) + for h in hits: + h.pop("_sort_key", None) + return { "query": query, "filters": {"wing": wing, "room": room}, - "total_before_filter": len(docs), + "total_before_filter": len(drawer_results["documents"][0]), "results": hits, } diff --git a/tests/test_hybrid_search.py b/tests/test_hybrid_search.py new file mode 100644 index 0000000..02d3f5f --- /dev/null +++ b/tests/test_hybrid_search.py @@ -0,0 +1,141 @@ +"""Tests for the hybrid closet+drawer retrieval in search_memories. + +The hybrid path queries drawers directly (the floor) AND closets, applying a +rank-based boost to drawers whose source_file appears in top closet hits. +This avoids the "weak-closets regression" where low-signal closets (from +regex extraction on narrative content) could hide drawers that direct +search would have found. +""" + +import os +import tempfile + +import chromadb +import pytest + +from mempalace.palace import ( + get_collection, + get_closets_collection, + upsert_closet_lines, +) +from mempalace.searcher import search_memories + + +def _seed_drawers(palace_path): + """Insert 4 short drawers with deterministic content.""" + col = get_collection(palace_path, create=True) + col.upsert( + ids=["D1", "D2", "D3", "D4"], + documents=[ + "We switched the auth service to use JWT tokens with a 24h expiry.", + "Database migration to PostgreSQL 15 completed last Tuesday.", + "The frontend team is debating whether to adopt TanStack Query.", + "Kafka consumer rebalance timeout set to 45 seconds after incident.", + ], + metadatas=[ + {"wing": "backend", "room": "auth", "source_file": "fixture_D1.md"}, + {"wing": "backend", "room": "db", "source_file": "fixture_D2.md"}, + {"wing": "frontend", "room": "state", "source_file": "fixture_D3.md"}, + {"wing": "backend", "room": "queue", "source_file": "fixture_D4.md"}, + ], + ) + + +def _seed_strong_closet_for(palace_path, drawer_id, source_file, topics): + """Insert a closet whose content strongly overlaps the query keywords.""" + col = get_closets_collection(palace_path) + lines = [f"{t}||→{drawer_id}" for t in topics] + upsert_closet_lines( + col, + closet_id_base=f"closet_{drawer_id}", + lines=lines, + metadata={ + "wing": "backend", + "room": "auth", + "source_file": source_file, + "generated_by": "test", + }, + ) + + +# ── core invariant: closets can only HELP, never HIDE ───────────────────── + + +class TestHybridInvariant: + def test_no_closets_degrades_to_direct_drawer_search(self, tmp_path): + palace = str(tmp_path / "palace") + _seed_drawers(palace) + # No closets created. + result = search_memories("Kafka rebalance timeout", palace, n_results=3) + ids = [h["source_file"] for h in result["results"]] + assert ids, "should return results" + assert "fixture_D4.md" in ids, ( + "direct drawer search alone should surface the Kafka drawer" + ) + + def test_weak_closets_do_not_hide_direct_drawer_hits(self, tmp_path): + """A closet that points at a wrong drawer must NOT suppress the + drawer that direct search would have ranked first.""" + palace = str(tmp_path / "palace") + _seed_drawers(palace) + # Seed a misleading closet: it matches a generic phrase but points at D3. + _seed_strong_closet_for( + palace, + drawer_id="D3", + source_file="fixture_D3.md", + topics=["Kafka queue tuning", "consumer rebalance config"], + ) + result = search_memories("Kafka consumer rebalance timeout", palace, n_results=5) + ids = [h["source_file"] for h in result["results"]] + assert "fixture_D4.md" in ids, ( + "D4 must appear — direct drawer search alone would rank it first. " + "Closet pointing to D3 should only boost D3, never hide D4." + ) + + def test_closet_boost_lifts_matching_drawer(self, tmp_path): + """When a closet agrees with direct search, the matching drawer + should be boosted to rank 1.""" + palace = str(tmp_path / "palace") + _seed_drawers(palace) + _seed_strong_closet_for( + palace, + drawer_id="D1", + source_file="fixture_D1.md", + topics=["JWT auth tokens", "session expiry", "authentication service"], + ) + result = search_memories("JWT auth tokens expiry", palace, n_results=3) + ids = [h["source_file"] for h in result["results"]] + assert ids[0] == "fixture_D1.md" + top = result["results"][0] + assert top["matched_via"] == "drawer+closet" + assert top["closet_boost"] > 0 + + +# ── closet_boost metadata ──────────────────────────────────────────────── + + +class TestClosetMetadata: + def test_closet_preview_exposed_when_boosted(self, tmp_path): + palace = str(tmp_path / "palace") + _seed_drawers(palace) + _seed_strong_closet_for( + palace, + drawer_id="D1", + source_file="fixture_D1.md", + topics=["JWT auth tokens", "24h expiry", "authentication"], + ) + result = search_memories("JWT authentication", palace, n_results=2) + top = result["results"][0] + assert top["source_file"] == "fixture_D1.md" + assert "closet_preview" in top + + def test_drawer_only_hits_have_no_closet_preview(self, tmp_path): + palace = str(tmp_path / "palace") + _seed_drawers(palace) + # No closets + result = search_memories("TanStack Query", palace, n_results=2) + assert result["results"] + for h in result["results"]: + assert h["matched_via"] == "drawer" + assert "closet_preview" not in h + assert h["closet_boost"] == 0.0 From 1e86892e62dea47f895fd24bcbfc421cf0e9aa6d Mon Sep 17 00:00:00 2001 From: eblander Date: Mon, 13 Apr 2026 11:00:52 -0400 Subject: [PATCH 14/42] Fix: set cosine distance metadata on all collection creation sites ChromaDB defaults HNSW index to L2 (Euclidean) distance, but MemPalace scoring uses 1-distance which requires cosine (range 0-2). Add metadata={"hnsw:space": "cosine"} to the 4 production and 3 test call sites that were missing it. Closes #218 --- mempalace/backends/chroma.py | 17 ++++-- mempalace/cli.py | 106 +++++++++++++++++++++++++++-------- mempalace/migrate.py | 14 +++-- tests/conftest.py | 8 ++- tests/test_backends.py | 14 +++++ tests/test_mcp_server.py | 104 +++++++++++++++++++++++++--------- tests/test_miner.py | 27 ++++++--- 7 files changed, 224 insertions(+), 66 deletions(-) diff --git a/mempalace/backends/chroma.py b/mempalace/backends/chroma.py index da01e4d..2699d3a 100644 --- a/mempalace/backends/chroma.py +++ b/mempalace/backends/chroma.py @@ -35,8 +35,13 @@ def _fix_blob_seq_ids(palace_path: str): continue if not rows: continue - updates = [(int.from_bytes(blob, byteorder="big"), rowid) for rowid, blob in rows] - conn.executemany(f"UPDATE {table} SET seq_id = ? WHERE rowid = ?", updates) + updates = [ + (int.from_bytes(blob, byteorder="big"), rowid) + for rowid, blob in rows + ] + conn.executemany( + f"UPDATE {table} SET seq_id = ? WHERE rowid = ?", updates + ) logger.info("Fixed %d BLOB seq_ids in %s", len(updates), table) conn.commit() except Exception: @@ -71,7 +76,9 @@ class ChromaCollection(BaseCollection): class ChromaBackend: """Factory for MemPalace's default ChromaDB backend.""" - def get_collection(self, palace_path: str, collection_name: str, create: bool = False): + def get_collection( + self, palace_path: str, collection_name: str, create: bool = False + ): if not create and not os.path.isdir(palace_path): raise FileNotFoundError(palace_path) @@ -85,7 +92,9 @@ class ChromaBackend: _fix_blob_seq_ids(palace_path) client = chromadb.PersistentClient(path=palace_path) if create: - collection = client.get_or_create_collection(collection_name) + collection = client.get_or_create_collection( + collection_name, metadata={"hnsw:space": "cosine"} + ) else: collection = client.get_collection(collection_name) return ChromaCollection(collection) diff --git a/mempalace/cli.py b/mempalace/cli.py index 8bf3f20..5d1e4f0 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -48,7 +48,11 @@ def cmd_init(args): if files: print(f" Reading {len(files)} files...") detected = detect_entities(files) - total = len(detected["people"]) + len(detected["projects"]) + len(detected["uncertain"]) + total = ( + len(detected["people"]) + + len(detected["projects"]) + + len(detected["uncertain"]) + ) if total > 0: confirmed = confirm_entities(detected, yes=getattr(args, "yes", False)) # Save confirmed entities to /entities.json for the miner @@ -66,7 +70,11 @@ def cmd_init(args): def cmd_mine(args): - palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path + palace_path = ( + os.path.expanduser(args.palace) + if args.palace + else MempalaceConfig().palace_path + ) include_ignored = [] for raw in args.include_ignored or []: include_ignored.extend(part.strip() for part in raw.split(",") if part.strip()) @@ -101,7 +109,11 @@ def cmd_mine(args): def cmd_search(args): from .searcher import search, SearchError - palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path + palace_path = ( + os.path.expanduser(args.palace) + if args.palace + else MempalaceConfig().palace_path + ) try: search( query=args.query, @@ -118,7 +130,11 @@ def cmd_wakeup(args): """Show L0 (identity) + L1 (essential story) — the wake-up context.""" from .layers import MemoryStack - palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path + palace_path = ( + os.path.expanduser(args.palace) + if args.palace + else MempalaceConfig().palace_path + ) stack = MemoryStack(palace_path=palace_path) text = stack.wake_up(wing=args.wing) @@ -155,14 +171,26 @@ def cmd_migrate(args): """Migrate palace from a different ChromaDB version.""" from .migrate import migrate - palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path - migrate(palace_path=palace_path, dry_run=args.dry_run, confirm=getattr(args, "yes", False)) + palace_path = ( + os.path.expanduser(args.palace) + if args.palace + else MempalaceConfig().palace_path + ) + migrate( + palace_path=palace_path, + dry_run=args.dry_run, + confirm=getattr(args, "yes", False), + ) def cmd_status(args): from .miner import status - palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path + palace_path = ( + os.path.expanduser(args.palace) + if args.palace + else MempalaceConfig().palace_path + ) status(palace_path=palace_path) @@ -173,7 +201,9 @@ def cmd_repair(args): from .migrate import confirm_destructive_action, contains_palace_database palace_path = os.path.abspath( - os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path + os.path.expanduser(args.palace) + if args.palace + else MempalaceConfig().palace_path ) db_path = os.path.join(palace_path, "chroma.sqlite3") @@ -217,7 +247,9 @@ def cmd_repair(args): all_metas = [] offset = 0 while offset < total: - batch = col.get(limit=batch_size, offset=offset, include=["documents", "metadatas"]) + batch = col.get( + limit=batch_size, offset=offset, include=["documents", "metadatas"] + ) all_ids.extend(batch["ids"]) all_docs.extend(batch["documents"]) all_metas.extend(batch["metadatas"]) @@ -240,7 +272,9 @@ def cmd_repair(args): print(" Rebuilding collection...") client.delete_collection("mempalace_drawers") - new_col = client.create_collection("mempalace_drawers") + new_col = client.create_collection( + "mempalace_drawers", metadata={"hnsw:space": "cosine"} + ) filed = 0 for i in range(0, len(all_ids), batch_size): @@ -287,7 +321,9 @@ def cmd_mcp(args): if not args.palace: print("\nOptional custom palace:") - print(f" claude mcp add mempalace -- {base_server_cmd} --palace /path/to/palace") + print( + f" claude mcp add mempalace -- {base_server_cmd} --palace /path/to/palace" + ) print(f" {base_server_cmd} --palace /path/to/palace") @@ -296,7 +332,11 @@ def cmd_compress(args): import chromadb from .dialect import Dialect - palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path + palace_path = ( + os.path.expanduser(args.palace) + if args.palace + else MempalaceConfig().palace_path + ) # Load dialect (with optional entity config) config_path = args.config @@ -328,7 +368,11 @@ def cmd_compress(args): offset = 0 while True: try: - kwargs = {"include": ["documents", "metadatas"], "limit": _BATCH, "offset": offset} + kwargs = { + "include": ["documents", "metadatas"], + "limit": _BATCH, + "offset": offset, + } if where: kwargs["where"] = where batch = col.get(**kwargs) @@ -386,7 +430,9 @@ def cmd_compress(args): # Store compressed versions (unless dry-run) if not args.dry_run: try: - comp_col = client.get_or_create_collection("mempalace_compressed") + comp_col = client.get_or_create_collection( + "mempalace_compressed", metadata={"hnsw:space": "cosine"} + ) for doc_id, compressed, meta, stats in compressed_entries: comp_meta = dict(meta) comp_meta["compression_ratio"] = round(stats["size_ratio"], 1) @@ -431,7 +477,9 @@ def main(): p_init = sub.add_parser("init", help="Detect rooms from your folder structure") p_init.add_argument("dir", help="Project directory to set up") p_init.add_argument( - "--yes", action="store_true", help="Auto-accept all detected entities (non-interactive)" + "--yes", + action="store_true", + help="Auto-accept all detected entities (non-interactive)", ) # mine @@ -443,7 +491,9 @@ def main(): default="projects", help="Ingest mode: 'projects' for code/docs (default), 'convos' for chat exports", ) - p_mine.add_argument("--wing", default=None, help="Wing name (default: directory name)") + p_mine.add_argument( + "--wing", default=None, help="Wing name (default: directory name)" + ) p_mine.add_argument( "--no-gitignore", action="store_true", @@ -460,7 +510,9 @@ def main(): default="mempalace", help="Your name — recorded on every drawer (default: mempalace)", ) - p_mine.add_argument("--limit", type=int, default=0, help="Max files to process (0 = all)") + p_mine.add_argument( + "--limit", type=int, default=0, help="Max files to process (0 = all)" + ) p_mine.add_argument( "--dry-run", action="store_true", help="Show what would be filed without filing" ) @@ -482,7 +534,9 @@ def main(): p_compress = sub.add_parser( "compress", help="Compress drawers using AAAK Dialect (~30x reduction)" ) - p_compress.add_argument("--wing", default=None, help="Wing to compress (default: all wings)") + p_compress.add_argument( + "--wing", default=None, help="Wing to compress (default: all wings)" + ) p_compress.add_argument( "--dry-run", action="store_true", help="Preview compression without storing" ) @@ -491,8 +545,12 @@ def main(): ) # wake-up - p_wakeup = sub.add_parser("wake-up", help="Show L0 + L1 wake-up context (~600-900 tokens)") - p_wakeup.add_argument("--wing", default=None, help="Wake-up for a specific project/wing") + p_wakeup = sub.add_parser( + "wake-up", help="Show L0 + L1 wake-up context (~600-900 tokens)" + ) + p_wakeup.add_argument( + "--wing", default=None, help="Wake-up for a specific project/wing" + ) # split p_split = sub.add_parser( @@ -544,13 +602,17 @@ def main(): ) instructions_sub = p_instructions.add_subparsers(dest="instructions_name") for instr_name in ["init", "search", "mine", "help", "status"]: - instructions_sub.add_parser(instr_name, help=f"Output {instr_name} instructions") + instructions_sub.add_parser( + instr_name, help=f"Output {instr_name} instructions" + ) # repair sub.add_parser( "repair", help="Rebuild palace vector index from stored data (fixes segfaults after corruption)", - ).add_argument("--yes", action="store_true", help="Skip confirmation for destructive changes") + ).add_argument( + "--yes", action="store_true", help="Skip confirmation for destructive changes" + ) # mcp sub.add_parser( diff --git a/mempalace/migrate.py b/mempalace/migrate.py index 6ec4a59..d751a93 100644 --- a/mempalace/migrate.py +++ b/mempalace/migrate.py @@ -33,13 +33,15 @@ def extract_drawers_from_sqlite(db_path: str) -> list: conn.row_factory = sqlite3.Row # Get all embedding IDs and their documents - rows = conn.execute(""" + rows = conn.execute( + """ SELECT e.embedding_id, MAX(CASE WHEN em.key = 'chroma:document' THEN em.string_value END) as document FROM embeddings e JOIN embedding_metadata em ON em.id = e.id GROUP BY e.embedding_id - """).fetchall() + """ + ).fetchall() drawers = [] for row in rows: @@ -95,7 +97,9 @@ def detect_chromadb_version(db_path: str) -> str: # 0.6.x has embeddings_queue but no schema_str tables = [ r[0] - for r in conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall() + for r in conn.execute( + "SELECT name FROM sqlite_master WHERE type='table'" + ).fetchall() ] if "embeddings_queue" in tables: return "0.6.x" @@ -207,7 +211,9 @@ def migrate(palace_path: str, dry_run: bool = False, confirm: bool = False): temp_palace = tempfile.mkdtemp(prefix="mempalace_migrate_") print(f" Creating fresh palace in {temp_palace}...") client = chromadb.PersistentClient(path=temp_palace) - col = client.get_or_create_collection("mempalace_drawers") + col = client.get_or_create_collection( + "mempalace_drawers", metadata={"hnsw:space": "cosine"} + ) # Re-import in batches batch_size = 500 diff --git a/tests/conftest.py b/tests/conftest.py index 16185ef..1d85889 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -101,7 +101,9 @@ def config(tmp_dir, palace_path): def collection(palace_path): """A ChromaDB collection pre-seeded in the temp palace.""" client = chromadb.PersistentClient(path=palace_path) - col = client.get_or_create_collection("mempalace_drawers") + col = client.get_or_create_collection( + "mempalace_drawers", metadata={"hnsw:space": "cosine"} + ) yield col client.delete_collection("mempalace_drawers") del client @@ -185,7 +187,9 @@ def seeded_kg(kg): kg.add_triple("Alice", "parent_of", "Max", valid_from="2015-04-01") kg.add_triple("Max", "does", "swimming", valid_from="2025-01-01") kg.add_triple("Max", "does", "chess", valid_from="2024-06-01") - kg.add_triple("Alice", "works_at", "Acme Corp", valid_from="2020-01-01", valid_to="2024-12-31") + kg.add_triple( + "Alice", "works_at", "Acme Corp", valid_from="2020-01-01", valid_to="2024-12-31" + ) kg.add_triple("Alice", "works_at", "NewCo", valid_from="2025-01-01") return kg diff --git a/tests/test_backends.py b/tests/test_backends.py index 846134f..a620bf9 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -82,6 +82,20 @@ def test_chroma_backend_create_true_creates_directory_and_collection(tmp_path): client.get_collection("mempalace_drawers") +def test_chroma_backend_creates_collection_with_cosine_distance(tmp_path): + palace_path = tmp_path / "palace" + + ChromaBackend().get_collection( + str(palace_path), + collection_name="mempalace_drawers", + create=True, + ) + + client = chromadb.PersistentClient(path=str(palace_path)) + col = client.get_collection("mempalace_drawers") + assert col.metadata.get("hnsw:space") == "cosine" + + def test_fix_blob_seq_ids_converts_blobs_to_integers(tmp_path): """Simulate a ChromaDB 0.6.x database with BLOB seq_ids and verify repair.""" db_path = tmp_path / "chroma.sqlite3" diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 4cc8b4a..cfb48a2 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -30,7 +30,12 @@ def _get_collection(palace_path, create=False): client = chromadb.PersistentClient(path=palace_path) if create: - return client, client.get_or_create_collection("mempalace_drawers") + return ( + client, + client.get_or_create_collection( + "mempalace_drawers", metadata={"hnsw:space": "cosine"} + ), + ) return client, client.get_collection("mempalace_drawers") @@ -92,7 +97,9 @@ class TestHandleRequest: def test_notifications_initialized_returns_none(self): from mempalace.mcp_server import handle_request - resp = handle_request({"method": "notifications/initialized", "id": None, "params": {}}) + resp = handle_request( + {"method": "notifications/initialized", "id": None, "params": {}} + ) assert resp is None def test_ping_returns_empty_result(self): @@ -113,7 +120,9 @@ class TestHandleRequest: assert "mempalace_add_drawer" in names assert "mempalace_kg_add" in names - def test_null_arguments_does_not_hang(self, monkeypatch, config, palace_path, seeded_kg): + def test_null_arguments_does_not_hang( + self, monkeypatch, config, palace_path, seeded_kg + ): """Sending arguments: null should return a result, not hang (#394).""" _patch_mcp_server(monkeypatch, config, seeded_kg) from mempalace.mcp_server import handle_request @@ -218,7 +227,9 @@ class TestReadTools: assert result["total_drawers"] == 0 assert result["wings"] == {} - def test_status_with_data(self, monkeypatch, config, palace_path, seeded_collection, kg): + def test_status_with_data( + self, monkeypatch, config, palace_path, seeded_collection, kg + ): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_status @@ -235,7 +246,9 @@ class TestReadTools: assert result["wings"]["project"] == 3 assert result["wings"]["notes"] == 1 - def test_list_rooms_all(self, monkeypatch, config, palace_path, seeded_collection, kg): + def test_list_rooms_all( + self, monkeypatch, config, palace_path, seeded_collection, kg + ): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_list_rooms @@ -244,7 +257,9 @@ class TestReadTools: assert "frontend" in result["rooms"] assert "planning" in result["rooms"] - def test_list_rooms_filtered(self, monkeypatch, config, palace_path, seeded_collection, kg): + def test_list_rooms_filtered( + self, monkeypatch, config, palace_path, seeded_collection, kg + ): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_list_rooms @@ -252,7 +267,9 @@ class TestReadTools: assert "backend" in result["rooms"] assert "planning" not in result["rooms"] - def test_get_taxonomy(self, monkeypatch, config, palace_path, seeded_collection, kg): + def test_get_taxonomy( + self, monkeypatch, config, palace_path, seeded_collection, kg + ): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_get_taxonomy @@ -273,7 +290,9 @@ class TestReadTools: class TestSearchTool: - def test_search_basic(self, monkeypatch, config, palace_path, seeded_collection, kg): + def test_search_basic( + self, monkeypatch, config, palace_path, seeded_collection, kg + ): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_search @@ -284,14 +303,18 @@ class TestSearchTool: top = result["results"][0] assert "JWT" in top["text"] or "authentication" in top["text"].lower() - def test_search_with_wing_filter(self, monkeypatch, config, palace_path, seeded_collection, kg): + def test_search_with_wing_filter( + self, monkeypatch, config, palace_path, seeded_collection, kg + ): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_search result = tool_search(query="planning", wing="notes") assert all(r["wing"] == "notes" for r in result["results"]) - def test_search_with_room_filter(self, monkeypatch, config, palace_path, seeded_collection, kg): + def test_search_with_room_filter( + self, monkeypatch, config, palace_path, seeded_collection, kg + ): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_search @@ -310,7 +333,9 @@ class TestSearchTool: assert "results" in result # Old name takes precedence when both provided - result_strict = tool_search(query="JWT", max_distance=999.0, min_similarity=0.01) + result_strict = tool_search( + query="JWT", max_distance=999.0, min_similarity=0.01 + ) result_loose = tool_search(query="JWT", max_distance=0.01, min_similarity=999.0) assert len(result_strict["results"]) <= len(result_loose["results"]) @@ -318,7 +343,7 @@ class TestSearchTool: _patch_mcp_server(monkeypatch, config, kg) from mempalace import mcp_server - monkeypatch.setattr(mcp_server, "_get_collection", lambda *args, **kwargs: pytest.fail()) + monkeypatch.setattr(mcp_server, "_get_collection", lambda: pytest.fail()) result = mcp_server.tool_list_rooms(wing="../etc/passwd") assert "error" in result @@ -327,7 +352,7 @@ class TestSearchTool: _patch_mcp_server(monkeypatch, config, kg) from mempalace import mcp_server - monkeypatch.setattr(mcp_server, "search_memories", lambda *args, **kwargs: pytest.fail()) + monkeypatch.setattr(mcp_server, "search_memories", lambda: pytest.fail()) result = mcp_server.tool_search(query="JWT", room="../backend") assert "error" in result @@ -336,7 +361,7 @@ class TestSearchTool: _patch_mcp_server(monkeypatch, config, kg) from mempalace import mcp_server - monkeypatch.setattr(mcp_server, "_get_collection", lambda *args, **kwargs: pytest.fail()) + monkeypatch.setattr(mcp_server, "_get_collection", lambda: pytest.fail()) result = mcp_server.tool_list_drawers(wing="../notes") assert "error" in result @@ -345,7 +370,7 @@ class TestSearchTool: _patch_mcp_server(monkeypatch, config, kg) from mempalace import mcp_server - monkeypatch.setattr(mcp_server, "_get_collection", lambda *args, **kwargs: pytest.fail()) + monkeypatch.setattr(mcp_server, "_get_collection", lambda: pytest.fail()) result = mcp_server.tool_find_tunnels(wing_a="../project") assert "error" in result @@ -402,7 +427,9 @@ class TestWriteTools: assert result2["success"] is True assert result2["reason"] == "already_exists" - def test_add_drawer_shared_header_no_collision(self, monkeypatch, config, palace_path, kg): + def test_add_drawer_shared_header_no_collision( + self, monkeypatch, config, palace_path, kg + ): """Documents sharing a >100-char header must get distinct IDs (full-content hash).""" _patch_mcp_server(monkeypatch, config, kg) _client, _col = _get_collection(palace_path, create=True) @@ -414,7 +441,10 @@ class TestWriteTools: header + "Decision: Use PostgreSQL for primary storage. Rationale: ACID compliance required." ) - doc2 = header + "Decision: Use Redis for session caching. Rationale: sub-ms latency needed." + doc2 = ( + header + + "Decision: Use Redis for session caching. Rationale: sub-ms latency needed." + ) result1 = tool_add_drawer(wing="work", room="decisions", content=doc1) result2 = tool_add_drawer(wing="work", room="decisions", content=doc2) @@ -425,7 +455,9 @@ class TestWriteTools: result1["drawer_id"] != result2["drawer_id"] ), "Documents with shared header but different content must have distinct drawer IDs" - def test_delete_drawer(self, monkeypatch, config, palace_path, seeded_collection, kg): + def test_delete_drawer( + self, monkeypatch, config, palace_path, seeded_collection, kg + ): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_delete_drawer @@ -433,14 +465,18 @@ class TestWriteTools: assert result["success"] is True assert seeded_collection.count() == 3 - def test_delete_drawer_not_found(self, monkeypatch, config, palace_path, seeded_collection, kg): + def test_delete_drawer_not_found( + self, monkeypatch, config, palace_path, seeded_collection, kg + ): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_delete_drawer result = tool_delete_drawer("nonexistent_drawer") assert result["success"] is False - def test_check_duplicate(self, monkeypatch, config, palace_path, seeded_collection, kg): + def test_check_duplicate( + self, monkeypatch, config, palace_path, seeded_collection, kg + ): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_check_duplicate @@ -469,14 +505,18 @@ class TestWriteTools: assert result["room"] == "backend" assert "JWT tokens" in result["content"] - def test_get_drawer_not_found(self, monkeypatch, config, palace_path, seeded_collection, kg): + def test_get_drawer_not_found( + self, monkeypatch, config, palace_path, seeded_collection, kg + ): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_get_drawer result = tool_get_drawer("nonexistent_drawer") assert "error" in result - def test_list_drawers(self, monkeypatch, config, palace_path, seeded_collection, kg): + def test_list_drawers( + self, monkeypatch, config, palace_path, seeded_collection, kg + ): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_list_drawers @@ -504,7 +544,9 @@ class TestWriteTools: assert result["count"] == 2 assert all(d["room"] == "backend" for d in result["drawers"]) - def test_list_drawers_pagination(self, monkeypatch, config, palace_path, seeded_collection, kg): + def test_list_drawers_pagination( + self, monkeypatch, config, palace_path, seeded_collection, kg + ): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_list_drawers @@ -522,7 +564,9 @@ class TestWriteTools: result = tool_list_drawers(offset=-5) assert result["offset"] == 0 - def test_update_drawer_content(self, monkeypatch, config, palace_path, seeded_collection, kg): + def test_update_drawer_content( + self, monkeypatch, config, palace_path, seeded_collection, kg + ): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_update_drawer, tool_get_drawer @@ -540,19 +584,25 @@ class TestWriteTools: _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_update_drawer - result = tool_update_drawer("drawer_proj_backend_aaa", wing="new_wing", room="new_room") + result = tool_update_drawer( + "drawer_proj_backend_aaa", wing="new_wing", room="new_room" + ) assert result["success"] is True assert result["wing"] == "new_wing" assert result["room"] == "new_room" - def test_update_drawer_not_found(self, monkeypatch, config, palace_path, seeded_collection, kg): + def test_update_drawer_not_found( + self, monkeypatch, config, palace_path, seeded_collection, kg + ): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_update_drawer result = tool_update_drawer("nonexistent_drawer", content="hello") assert result["success"] is False - def test_update_drawer_noop(self, monkeypatch, config, palace_path, seeded_collection, kg): + def test_update_drawer_noop( + self, monkeypatch, config, palace_path, seeded_collection, kg + ): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_update_drawer diff --git a/tests/test_miner.py b/tests/test_miner.py index ea2f2a9..600053e 100644 --- a/tests/test_miner.py +++ b/tests/test_miner.py @@ -27,7 +27,8 @@ def test_project_mining(): os.makedirs(project_root / "backend") write_file( - project_root / "backend" / "app.py", "def main():\n print('hello world')\n" * 20 + project_root / "backend" / "app.py", + "def main():\n print('hello world')\n" * 20, ) with open(project_root / "mempalace.yaml", "w") as f: yaml.dump( @@ -59,7 +60,9 @@ def test_scan_project_respects_gitignore(): write_file(project_root / ".gitignore", "ignored.py\ngenerated/\n") write_file(project_root / "src" / "app.py", "print('hello')\n" * 20) write_file(project_root / "ignored.py", "print('ignore me')\n" * 20) - write_file(project_root / "generated" / "artifact.py", "print('artifact')\n" * 20) + write_file( + project_root / "generated" / "artifact.py", "print('artifact')\n" * 20 + ) assert scanned_files(project_root) == ["src/app.py"] finally: @@ -74,7 +77,9 @@ def test_scan_project_respects_nested_gitignore(): write_file(project_root / ".gitignore", "*.log\n") write_file(project_root / "subrepo" / ".gitignore", "tasks/\n") write_file(project_root / "subrepo" / "src" / "main.py", "print('main')\n" * 20) - write_file(project_root / "subrepo" / "tasks" / "task.py", "print('task')\n" * 20) + write_file( + project_root / "subrepo" / "tasks" / "task.py", "print('task')\n" * 20 + ) write_file(project_root / "subrepo" / "debug.log", "debug\n" * 20) assert scanned_files(project_root) == ["subrepo/src/main.py"] @@ -133,7 +138,9 @@ def test_scan_project_can_disable_gitignore(): write_file(project_root / ".gitignore", "data/\n") write_file(project_root / "data" / "stuff.csv", "a,b,c\n" * 20) - assert scanned_files(project_root, respect_gitignore=False) == ["data/stuff.csv"] + assert scanned_files(project_root, respect_gitignore=False) == [ + "data/stuff.csv" + ] finally: shutil.rmtree(tmpdir) @@ -146,7 +153,9 @@ def test_scan_project_can_include_ignored_directory(): write_file(project_root / ".gitignore", "docs/\n") write_file(project_root / "docs" / "guide.md", "# Guide\n" * 20) - assert scanned_files(project_root, include_ignored=["docs"]) == ["docs/guide.md"] + assert scanned_files(project_root, include_ignored=["docs"]) == [ + "docs/guide.md" + ] finally: shutil.rmtree(tmpdir) @@ -215,7 +224,9 @@ def test_file_already_mined_check_mtime(): palace_path = os.path.join(tmpdir, "palace") os.makedirs(palace_path) client = chromadb.PersistentClient(path=palace_path) - col = client.get_or_create_collection("mempalace_drawers") + col = client.get_or_create_collection( + "mempalace_drawers", metadata={"hnsw:space": "cosine"} + ) test_file = os.path.join(tmpdir, "test.txt") with open(test_file, "w") as f: @@ -269,7 +280,9 @@ def test_mine_dry_run_with_tiny_file_no_crash(): project_root = Path(tmpdir).resolve() # One normal file and one that falls below MIN_CHUNK_SIZE - write_file(project_root / "good.py", "def main():\n print('hello world')\n" * 20) + write_file( + project_root / "good.py", "def main():\n print('hello world')\n" * 20 + ) write_file(project_root / "tiny.txt", "x") with open(project_root / "mempalace.yaml", "w") as f: From 53d779311ed238d5867b84a2c726893e6b859e8f Mon Sep 17 00:00:00 2001 From: Yorji <261316343+Yorji-Porji@users.noreply.github.com> Date: Mon, 13 Apr 2026 12:49:33 -0400 Subject: [PATCH 15/42] Create SECURITY.md This PR introduces a standard SECURITY.md policy file to the repository. While reviewing the codebase, I noticed there wasn't a defined channel for the private, responsible disclosure of security vulnerabilities. Adding this policy helps protect the project by guiding researchers to report bugs privately rather than in public issues. I highly recommend merging this and enabling GitHub's "Private Vulnerability Reporting" feature in your repository settings. I currently have some security findings I would like to share with the maintainers securely once a private channel or contact method is established. --- SECURITY.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..72f7bc4 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,31 @@ +# Security Policy + +## Supported Versions + +Please check the table below for the supported versions that are currently receiving security updates. + +| Version | Supported | +| ------- | ------------------ | +| `main` / `develop` | :white_check_mark: | +| `< 1.0.0` | :x: | + +*(Note: Adjust the table above to reflect MemPalace's actual release cycle)* + +## Reporting a Vulnerability + +**Please do not report security vulnerabilities through public GitHub issues.** + +We take the security of MemPalace seriously. If you believe you have found a security vulnerability, please report it to us privately using one of the following methods: + +1. **GitHub Private Vulnerability Reporting:** Navigate to the "Security" tab in this repository, click on "Advisories," and select "Report a vulnerability." +2. **Direct Contact:** If private reporting is not enabled, please email the core maintainers directly at `[Insert Maintainer Email Here]`. + +### What to include in your report: +* A descriptive summary of the vulnerability. +* Detailed steps to reproduce the issue (including any proof-of-concept scripts or specific file paths). +* The potential impact and severity of the vulnerability. + +### What to expect: +* We aim to acknowledge receipt of your vulnerability report within 48 hours. +* We will triage the issue and keep you updated on our progress toward a patch. +* Once the vulnerability is resolved and an update is released, we will publish a security advisory and credit you for the discovery (if you wish to be credited). From 69d6e2f7f3a6703396b10e39a790b8aa5e193a0c Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 13 Apr 2026 15:46:27 -0300 Subject: [PATCH 16/42] fix: sync version.py to 3.2.0 Commit 6614b9b bumped pyproject.toml to 3.2.0 but missed mempalace/version.py, breaking test_version_consistency on every PR's CI. This syncs them. --- mempalace/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mempalace/version.py b/mempalace/version.py index 1eb21a2..45176bc 100644 --- a/mempalace/version.py +++ b/mempalace/version.py @@ -1,3 +1,3 @@ """Single source of truth for the MemPalace package version.""" -__version__ = "3.1.0" +__version__ = "3.2.0" From 09f218cbb2912df53c6ec563c89f08251235a92f Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 13 Apr 2026 15:48:54 -0300 Subject: [PATCH 17/42] refactor: extract locked filing block to keep mine_convos under C901 Adding the per-file lock + double-checked file_already_mined() in the previous commit pushed mine_convos cyclomatic complexity from 25 to 26, just over ruff's max-complexity threshold. Hoist the locked critical section into _file_chunks_locked() so the outer loop stays within budget. No behavior change. --- mempalace/convo_miner.py | 82 ++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 33 deletions(-) diff --git a/mempalace/convo_miner.py b/mempalace/convo_miner.py index f24fa69..6a021ec 100644 --- a/mempalace/convo_miner.py +++ b/mempalace/convo_miner.py @@ -272,6 +272,47 @@ def scan_convos(convo_dir: str) -> list: # ============================================================================= +def _file_chunks_locked(collection, source_file, chunks, wing, room, agent, extract_mode): + """Acquire the per-file lock, double-check mined status, and upsert chunks. + + Returns (drawers_added, room_counts_delta, skipped). + """ + room_counts_delta: dict = defaultdict(int) + drawers_added = 0 + with mine_lock(source_file): + # Re-check after lock — another agent may have just finished this file + if file_already_mined(collection, source_file): + return 0, room_counts_delta, True + + for chunk in chunks: + chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room + if extract_mode == "general": + room_counts_delta[chunk_room] += 1 + drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}" + try: + collection.upsert( + documents=[chunk["content"]], + ids=[drawer_id], + metadatas=[ + { + "wing": wing, + "room": chunk_room, + "source_file": source_file, + "chunk_index": chunk["chunk_index"], + "added_by": agent, + "filed_at": datetime.now().isoformat(), + "ingest_mode": "convos", + "extract_mode": extract_mode, + } + ], + ) + drawers_added += 1 + except Exception as e: + if "already exists" not in str(e).lower(): + raise + return drawers_added, room_counts_delta, False + + def mine_convos( convo_dir: str, palace_path: str, @@ -376,39 +417,14 @@ def mine_convos( room_counts[room] += 1 # File each chunk — lock to prevent concurrent agents duplicating - drawers_added = 0 - with mine_lock(source_file): - # Re-check after lock — another agent may have just finished this file - if file_already_mined(collection, source_file): - files_skipped += 1 - continue - - for chunk in chunks: - chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room - if extract_mode == "general": - room_counts[chunk_room] += 1 - drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}" - try: - collection.upsert( - documents=[chunk["content"]], - ids=[drawer_id], - metadatas=[ - { - "wing": wing, - "room": chunk_room, - "source_file": source_file, - "chunk_index": chunk["chunk_index"], - "added_by": agent, - "filed_at": datetime.now().isoformat(), - "ingest_mode": "convos", - "extract_mode": extract_mode, - } - ], - ) - drawers_added += 1 - except Exception as e: - if "already exists" not in str(e).lower(): - raise + drawers_added, room_delta, skipped = _file_chunks_locked( + collection, source_file, chunks, wing, room, agent, extract_mode + ) + if skipped: + files_skipped += 1 + continue + for r, n in room_delta.items(): + room_counts[r] += n total_drawers += drawers_added print(f" ✓ [{i:4}/{len(files)}] {filepath.name[:50]:50} +{drawers_added}") From 386da51ae54ca09ba491d04cb50ebe00efc73944 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 13 Apr 2026 15:54:52 -0300 Subject: [PATCH 18/42] style: ruff format mempalace/palace.py Add blank lines after inline imports in mine_lock. Pure formatting. --- mempalace/palace.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mempalace/palace.py b/mempalace/palace.py index ed5382a..7b47f2f 100644 --- a/mempalace/palace.py +++ b/mempalace/palace.py @@ -69,18 +69,22 @@ def mine_lock(source_file: str): try: if os.name == "nt": import msvcrt + msvcrt.locking(lf.fileno(), msvcrt.LK_LOCK, 1) else: import fcntl + fcntl.flock(lf, fcntl.LOCK_EX) yield finally: try: if os.name == "nt": import msvcrt + msvcrt.locking(lf.fileno(), msvcrt.LK_UNLCK, 1) else: import fcntl + fcntl.flock(lf, fcntl.LOCK_UN) except Exception: pass From ca2598a9f69247429c367217eaf167c9d9c824da Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 13 Apr 2026 16:11:03 -0300 Subject: [PATCH 19/42] fix(normalize): make strip_noise verbatim-safe and scope it to Claude Code JSONL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The initial strip_noise() regressed on three fronts when audited against adversarial user content — each verified with executable repros against the cherry-picked code: 1. `.*?` with re.DOTALL span-ate across messages: one stray unclosed anywhere in a session merged with the next closing tag, silently deleting everything between them (including full assistant replies). 2. `.*\(ctrl\+o to expand\).*\n?` nuked entire lines of user prose whenever a user happened to document the TUI shortcut. 3. `Ran \d+ (?:stop|pre|post)\s*hook.*` with IGNORECASE ate the second sentence from "our CI has a stop hook ... Ran 2 stop hooks last week" — legitimate user commentary. These are unambiguous violations of the project's "Verbatim always" design principle. Fixes: - All tag patterns are now line-anchored (`(?m)^(?:> )?`) and their body forbids crossing a blank line (`(?:(?!\n\s*\n)[\s\S])*?`), so a dangling open tag cannot eat neighboring messages. - `_NOISE_LINE_PREFIXES` are line-anchored and case-sensitive — user prose mentioning "CURRENT TIME:" mid-sentence is preserved. - Hook-run chrome requires `(?m)^`, explicit hook names (Stop, PreCompact, PreToolUse, etc.), and no IGNORECASE. - "… +N lines" is line-anchored. - "(ctrl+o to expand)" only matches Claude Code's actual collapsed- output chrome shape `[N tokens] (ctrl+o to expand)`; a bare parenthetical in user prose stays intact. Scope: - `strip_noise()` is no longer called on every normalization path. Only `_try_claude_code_jsonl` invokes it, per-extracted-message — so Claude.ai exports, ChatGPT exports, Slack JSON, Codex JSONL, and plain text with `>` markers pass through fully verbatim. Per-message application also makes span-eating structurally impossible. Tests: - 15 new tests in test_normalize.py pin the boundary: 6 guard user content that must survive (each of the adversarial repros), 9 assert real system chrome is still stripped. All pass; full suite 702 pass (2 failures are the unrelated pre-existing version.py bug, cleared by #820). Known limitation (not fixed here): convo_miner.py does not delete drawers on re-mine, so transcripts mined before this PR keep noise- filled drawers until the user manually erases + re-mines. Proper fix needs a schema-version field on drawer metadata + re-mine trigger — out of scope for this PR. --- mempalace/normalize.py | 101 +++++++++++++++++++-------- tests/test_normalize.py | 146 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 218 insertions(+), 29 deletions(-) diff --git a/mempalace/normalize.py b/mempalace/normalize.py index 256a5e9..f2b8173 100644 --- a/mempalace/normalize.py +++ b/mempalace/normalize.py @@ -22,20 +22,40 @@ from typing import Optional # ─── Noise stripping ───────────────────────────────────────────────────── -# Claude Code and other tools inject system tags, hook output, UI chrome, -# and tool-call JSON into transcripts. These waste drawer space and pollute -# search results. Strip them before filing. +# Claude Code and other tools inject system tags, hook output, and UI chrome +# into transcripts. These waste drawer space and pollute search results. +# +# Verbatim is sacred — every pattern here is anchored to line boundaries and +# refuses to cross blank lines, so a stray unclosed tag in one message can +# never eat content from neighboring messages. When in doubt, leave text +# alone. -_NOISE_TAG_PATTERNS = [ - re.compile(r"]*>.*?", re.DOTALL), - re.compile(r"]*>.*?", re.DOTALL), - re.compile(r"]*>.*?", re.DOTALL), - re.compile(r"]*>.*?", re.DOTALL), - re.compile(r"]*>.*?", re.DOTALL), - re.compile(r"]*>.*?", re.DOTALL), -] +_NOISE_TAGS = ( + "system-reminder", + "command-message", + "command-name", + "task-notification", + "user-prompt-submit-hook", + "hook_output", +) -_NOISE_STRINGS = [ + +def _tag_pattern(name: str) -> "re.Pattern[str]": + # Opening tag must begin a line (optionally after a `> ` blockquote marker, + # since _messages_to_transcript prefixes lines with `> `). Body is lazy but + # forbidden from crossing a blank line, so a dangling open tag can't span + # multiple messages. Closing tag eats optional trailing whitespace + newline. + return re.compile( + rf"(?m)^(?:> )?<{name}(?:\s[^>]*)?>" rf"(?:(?!\n\s*\n)[\s\S])*?" rf"[ \t]*\n?" + ) + + +_NOISE_TAG_PATTERNS = [_tag_pattern(t) for t in _NOISE_TAGS] + +# Strings that identify an entire noise line when found at its start. +# Matched case-sensitively and anchored to line-start so user prose mentioning +# e.g. "current time:" in a sentence is untouched. +_NOISE_LINE_PREFIXES = ( "CURRENT TIME:", "VERIFIED FACTS (do not contradict)", "AGENT SPECIALIZATION:", @@ -46,20 +66,39 @@ _NOISE_STRINGS = [ "Auto-save reminder...", "Checking pipeline...", "MemPalace auto-save checkpoint.", +) + +_NOISE_LINE_PATTERNS = [ + re.compile(rf"(?m)^(?:> )?{re.escape(p)}.*\n?") for p in _NOISE_LINE_PREFIXES ] +# Claude Code TUI hook-run chrome, e.g. "Ran 2 Stop hook", "Ran 1 PreCompact hook". +# Line-anchored, case-sensitive, explicit hook names — prose like +# "our CI has a stop hook" stays intact. +_HOOK_LINE_RE = re.compile( + r"(?m)^(?:> )?Ran \d+ (?:Stop|PreCompact|PreToolUse|PostToolUse|UserPromptSubmit|Notification|SessionStart|SessionEnd) hook[s]?.*\n?" +) + +# "… +N lines" collapsed-output marker, line-anchored. +_COLLAPSED_LINES_RE = re.compile(r"(?m)^(?:> )?…\s*\+\d+ lines.*\n?") + def strip_noise(text: str) -> str: - """Remove system tags, hook output, and Claude Code UI chrome from text.""" + """Remove system tags, hook output, and Claude Code UI chrome from text. + + All patterns are line-anchored. User prose that happens to mention these + strings inline (e.g., documenting them) is preserved verbatim. + """ for pat in _NOISE_TAG_PATTERNS: text = pat.sub("", text) - for noise in _NOISE_STRINGS: - text = text.replace(noise, "") - # Strip Claude Code UI chrome - text = re.sub(r".*\(ctrl\+o to expand\).*\n?", "", text) - text = re.sub(r"Ran \d+ (?:stop|pre|post)\s*hook.*\n?", "", text, flags=re.IGNORECASE) - text = re.sub(r"…\s*\+\d+ lines.*\n?", "", text) - # Collapse runs of blank lines + for pat in _NOISE_LINE_PATTERNS: + text = pat.sub("", text) + text = _HOOK_LINE_RE.sub("", text) + text = _COLLAPSED_LINES_RE.sub("", text) + # Strip the Claude Code collapsed-output chrome "[N tokens] (ctrl+o to expand)". + # Narrow shape — a bare "(ctrl+o to expand)" in user prose stays intact. + text = re.sub(r"\s*\[\d+\s+tokens?\]\s*\(ctrl\+o to expand\)", "", text) + # Collapse runs of blank lines created by the removals text = re.sub(r"\n{4,}", "\n\n\n", text) return text.strip() @@ -84,23 +123,21 @@ def normalize(filepath: str) -> str: if not content.strip(): return content - # Already has > markers — pass through (strip noise but preserve trailing newline) + # Already has > markers — pass through unchanged. lines = content.split("\n") if sum(1 for line in lines if line.strip().startswith(">")) >= 3: - cleaned = strip_noise(content) - # Preserve trailing newline if original had one - if content.endswith("\n") and not cleaned.endswith("\n"): - cleaned += "\n" - return cleaned + return content - # Try JSON normalization + # Try JSON normalization. strip_noise is applied inside the Claude Code + # JSONL parser (the only format that injects system tags/hook chrome); + # other formats pass through verbatim. ext = Path(filepath).suffix.lower() if ext in (".json", ".jsonl") or content.strip()[:1] in ("{", "["): normalized = _try_normalize_json(content) if normalized: - return strip_noise(normalized) + return normalized - return strip_noise(content) + return content def _try_normalize_json(content: str) -> Optional[str]: @@ -160,6 +197,10 @@ def _try_claude_code_jsonl(content: str) -> Optional[str]: isinstance(b, dict) and b.get("type") == "tool_result" for b in msg_content ) text = _extract_content(msg_content, tool_use_map=tool_use_map) + # Strip Claude Code system-injected noise per message, never across + # message boundaries — prevents span-eating. + if text: + text = strip_noise(text) if text: if is_tool_only and messages and messages[-1][0] == "assistant": # Append tool results to the previous assistant message @@ -169,6 +210,8 @@ def _try_claude_code_jsonl(content: str) -> Optional[str]: messages.append(("user", text)) elif msg_type == "assistant": text = _extract_content(msg_content, tool_use_map=tool_use_map) + if text: + text = strip_noise(text) if text: # If previous message is also assistant (multi-turn tool loop), # merge into the same assistant turn diff --git a/tests/test_normalize.py b/tests/test_normalize.py index 7f0652a..53fc933 100644 --- a/tests/test_normalize.py +++ b/tests/test_normalize.py @@ -13,6 +13,7 @@ from mempalace.normalize import ( _try_normalize_json, _try_slack_json, normalize, + strip_noise, ) @@ -1048,3 +1049,148 @@ def test_normalize_rejects_large_file(): assert False, "Should have raised IOError" except IOError as e: assert "too large" in str(e).lower() + + +# ── strip_noise() — verbatim-safety boundary tests ───────────────────── +# +# The "Verbatim always" design principle requires that we never delete +# user-authored text. These tests pin down the boundary between system +# noise (which we strip) and user prose that happens to mention the same +# strings (which must survive untouched). + + +class TestStripNoisePreservesUserContent: + """User prose that mentions noise strings inline must be preserved.""" + + def test_user_discusses_stop_hook_in_prose(self): + # Regression: original regex with IGNORECASE + `.*\n?` ate the second + # sentence from real user commentary. + text = ( + "> User:\n" + "> Our CI has a stop hook that rejects merges after 5pm. " + "Ran 2 stop hooks last week.\n" + "> Assistant:\n" + "> Got it." + ) + assert strip_noise(text) == text.strip() + + def test_user_mentions_system_reminder_inline(self): + # Inline tags inside user prose (e.g. documenting + # Claude Code behavior) must not be stripped. + text = ( + "> User:\n" + "> Here is what Claude Code emits: " + "Auto-save reminder..." + " — I want to ignore it." + ) + assert strip_noise(text) == text.strip() + + def test_ctrl_o_hint_in_prose_preserved(self): + # Regression: original `.*\(ctrl\+o to expand\).*\n?` nuked the whole + # line whenever a user documented the TUI shortcut. + text = ( + "> User:\n" + "> In the TUI you hit (ctrl+o to expand) to see more. " + "That is the shortcut I want to document." + ) + assert strip_noise(text) == text.strip() + + def test_current_time_inline_in_prose(self): + text = "> User:\n> At CURRENT TIME: the meeting starts, not before." + assert strip_noise(text) == text.strip() + + def test_plus_n_lines_marker_inline(self): + text = "> User:\n> The log showed … +50 lines of stack trace, useful." + assert strip_noise(text) == text.strip() + + def test_dangling_open_tag_does_not_span_messages(self): + # THE span-eating bug: a stray unclosed in one + # message must NOT merge with a closing tag in another message and + # silently delete everything in between. + text = ( + "> User 1: normal content A\n" + "> Assistant: reply\n" + "> User 2: more content tail" + ) + out = strip_noise(text) + assert "Assistant: reply" in out + assert "User 2: more content" in out + assert "User 1: normal content" in out + + +class TestStripNoiseRemovesSystemChrome: + """System-injected noise with standalone/line-anchored shape must be stripped.""" + + def test_strips_line_anchored_system_reminder_block(self): + text = ( + "> User:\n" + "\n" + "Auto-save reminder...\n" + "\n" + "> Real message." + ) + out = strip_noise(text) + assert "system-reminder" not in out + assert "Auto-save reminder" not in out + assert "Real message." in out + + def test_strips_system_reminder_with_blockquote_prefix(self): + # _messages_to_transcript prefixes lines with "> ", so the line + # anchor must also accept that shape. + text = "> User:\n" "> Injected noise\n" "> Real message." + out = strip_noise(text) + assert "Injected noise" not in out + assert "Real message." in out + + def test_strips_standalone_ran_hook_line(self): + text = "Ran 2 Stop hook\n> User: real content" + out = strip_noise(text) + assert "Ran 2 Stop hook" not in out + assert "real content" in out + + def test_strips_known_hook_names(self): + for hook in ("Stop", "PreCompact", "PreToolUse", "PostToolUse", "UserPromptSubmit"): + text = f"Ran 1 {hook} hook\n> User: content" + assert hook not in strip_noise(text) + + def test_strips_current_time_standalone(self): + text = "CURRENT TIME: 2026-04-13 10:00 UTC\n> User: Hello" + out = strip_noise(text) + assert "CURRENT TIME" not in out + assert "Hello" in out + + def test_strips_collapsed_lines_marker(self): + text = "… +42 lines\n> User: Hello" + out = strip_noise(text) + assert "+42 lines" not in out + assert "Hello" in out + + def test_strips_token_count_ctrl_o_chrome(self): + # Claude Code's actual collapsed-output chrome: "[N tokens] (ctrl+o to expand)" + text = "> Assistant: some output [5 tokens] (ctrl+o to expand)\n> User: ok" + out = strip_noise(text) + assert "(ctrl+o to expand)" not in out + assert "[5 tokens]" not in out + assert "some output" in out + + def test_strips_each_known_noise_tag(self): + for tag in ( + "system-reminder", + "command-message", + "command-name", + "task-notification", + "user-prompt-submit-hook", + "hook_output", + ): + text = f"> User:\n<{tag}>junk\n> Real." + out = strip_noise(text) + assert tag not in out, f"{tag} leaked into output" + assert "Real." in out + + def test_collapses_excessive_blank_lines(self): + text = "line one\n\n\n\n\n\nline two" + out = strip_noise(text) + assert "line one" in out + assert "line two" in out + # Should collapse to no more than 3 newlines + assert "\n\n\n\n" not in out From 7e5eeda9a5c22168719067d15af8b2424662f586 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 13 Apr 2026 16:20:55 -0300 Subject: [PATCH 20/42] feat(normalize): auto-rebuild stale drawers via NORMALIZE_VERSION schema gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this, the strip_noise improvement only helps new mines. Every user who had already mined Claude Code JSONL sessions would keep their noise-polluted drawers forever, because convo_miner's file_already_mined skip short-circuits before re-processing. Adds a versioned schema gate so upgrades propagate silently: - palace.NORMALIZE_VERSION=2 — bumped when the normalization pipeline changes shape (this PR's strip_noise is the v1→v2 bump). - file_already_mined now returns False if the stored normalize_version is missing or less than current, triggering a rebuild on next mine. - Both miners stamp drawers with the current normalize_version. - convo_miner now purges stale drawers before inserting fresh chunks (mirrors miner.py's existing delete+insert), extracted into _file_convo_chunks helper to keep mine_convos under ruff's C901 limit. User experience: upgrade mempalace, run `mempalace mine` as usual, old noisy drawers get silently replaced with clean ones. No erase needed, no "you need to rebuild" changelog footgun. Tests: - test_file_already_mined_returns_false_for_stale_normalize_version — pins the version gate contract for missing/v1/current. - test_add_drawer_stamps_normalize_version — fresh project-miner drawers carry the field. - test_mine_convos_rebuilds_stale_drawers_after_schema_bump — end-to-end proof that a pre-v2 palace gets silently cleaned on next mine, with orphan drawers purged and NOT skipped. Existing test_file_already_mined_check_mtime updated to include the new field; all other tests unaffected. --- mempalace/convo_miner.py | 83 ++++++++++++++++++++++------------ mempalace/miner.py | 3 +- mempalace/palace.py | 28 ++++++++++-- tests/test_convo_miner.py | 83 ++++++++++++++++++++++++++++++++++ tests/test_miner.py | 94 +++++++++++++++++++++++++++++++++++++-- 5 files changed, 253 insertions(+), 38 deletions(-) diff --git a/mempalace/convo_miner.py b/mempalace/convo_miner.py index d406073..663f1a0 100644 --- a/mempalace/convo_miner.py +++ b/mempalace/convo_miner.py @@ -16,7 +16,7 @@ from datetime import datetime from collections import defaultdict from .normalize import normalize -from .palace import SKIP_DIRS, get_collection, file_already_mined +from .palace import NORMALIZE_VERSION, SKIP_DIRS, file_already_mined, get_collection # File types that might contain conversations @@ -51,6 +51,7 @@ def _register_file(collection, source_file: str, wing: str, agent: str): "added_by": agent, "filed_at": datetime.now().isoformat(), "ingest_mode": "registry", + "normalize_version": NORMALIZE_VERSION, } ], ) @@ -272,6 +273,52 @@ def scan_convos(convo_dir: str) -> list: # ============================================================================= +def _file_convo_chunks(collection, source_file, chunks, wing, room, agent, extract_mode): + """Purge stale drawers for ``source_file`` then upsert fresh chunks. + + Returns (drawers_added, room_counts_delta). + """ + # Purge stale drawers first. When the normalize schema bumps, + # file_already_mined() returns False for pre-v2 drawers and we land + # here — clean them out so the source doesn't end up with a mix of + # old-noise and new-clean drawers. + try: + collection.delete(where={"source_file": source_file}) + except Exception: + pass + + room_counts_delta: dict = defaultdict(int) + drawers_added = 0 + for chunk in chunks: + chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room + if extract_mode == "general": + room_counts_delta[chunk_room] += 1 + drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}" + try: + collection.upsert( + documents=[chunk["content"]], + ids=[drawer_id], + metadatas=[ + { + "wing": wing, + "room": chunk_room, + "source_file": source_file, + "chunk_index": chunk["chunk_index"], + "added_by": agent, + "filed_at": datetime.now().isoformat(), + "ingest_mode": "convos", + "extract_mode": extract_mode, + "normalize_version": NORMALIZE_VERSION, + } + ], + ) + drawers_added += 1 + except Exception as e: + if "already exists" not in str(e).lower(): + raise + return drawers_added, room_counts_delta + + def mine_convos( convo_dir: str, palace_path: str, @@ -375,34 +422,12 @@ def mine_convos( if extract_mode != "general": room_counts[room] += 1 - # File each chunk - drawers_added = 0 - for chunk in chunks: - chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room - if extract_mode == "general": - room_counts[chunk_room] += 1 - drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}" - try: - collection.upsert( - documents=[chunk["content"]], - ids=[drawer_id], - metadatas=[ - { - "wing": wing, - "room": chunk_room, - "source_file": source_file, - "chunk_index": chunk["chunk_index"], - "added_by": agent, - "filed_at": datetime.now().isoformat(), - "ingest_mode": "convos", - "extract_mode": extract_mode, - } - ], - ) - drawers_added += 1 - except Exception as e: - if "already exists" not in str(e).lower(): - raise + # Purge stale drawers + file fresh chunks. + drawers_added, room_delta = _file_convo_chunks( + collection, source_file, chunks, wing, room, agent, extract_mode + ) + for r, n in room_delta.items(): + room_counts[r] += n total_drawers += drawers_added print(f" ✓ [{i:4}/{len(files)}] {filepath.name[:50]:50} +{drawers_added}") diff --git a/mempalace/miner.py b/mempalace/miner.py index 22c8af3..49e0d25 100644 --- a/mempalace/miner.py +++ b/mempalace/miner.py @@ -15,7 +15,7 @@ from pathlib import Path from datetime import datetime from collections import defaultdict -from .palace import SKIP_DIRS, get_collection, file_already_mined +from .palace import NORMALIZE_VERSION, SKIP_DIRS, file_already_mined, get_collection READABLE_EXTENSIONS = { ".txt", @@ -381,6 +381,7 @@ def add_drawer( "chunk_index": chunk_index, "added_by": agent, "filed_at": datetime.now().isoformat(), + "normalize_version": NORMALIZE_VERSION, } # Store file mtime so we can detect modifications later. try: diff --git a/mempalace/palace.py b/mempalace/palace.py index 948fecc..9cfb55e 100644 --- a/mempalace/palace.py +++ b/mempalace/palace.py @@ -36,6 +36,16 @@ SKIP_DIRS = { _DEFAULT_BACKEND = ChromaBackend() +# Schema version for drawer normalization. Bump when the normalization +# pipeline changes in a way that existing drawers should be rebuilt to pick up +# (e.g., new noise-stripping rules). `file_already_mined` treats drawers with +# a missing or stale `normalize_version` as "not mined", so the next mine pass +# silently rebuilds them — users don't need to manually erase + re-mine. +# +# v2 (2026-04): introduced strip_noise() for Claude Code JSONL; previous +# drawers stored system tags / hook chrome verbatim. +NORMALIZE_VERSION = 2 + def get_collection( palace_path: str, @@ -53,16 +63,26 @@ def get_collection( def file_already_mined(collection, source_file: str, check_mtime: bool = False) -> bool: """Check if a file has already been filed in the palace. - When check_mtime=True (used by project miner), returns False if the file - has been modified since it was last mined, so it gets re-mined. - When check_mtime=False (used by convo miner), just checks existence. + Returns False (so the file gets re-mined) when: + - no drawers exist for this source_file + - the stored `normalize_version` is missing or older than the current + schema (triggers silent rebuild after a normalization upgrade) + - `check_mtime=True` and the file's mtime differs from the stored one + + When check_mtime=True (used by project miner), also re-mines on content + change. When check_mtime=False (used by convo miner), transcripts are + assumed immutable, so only the version gate triggers a rebuild. """ try: results = collection.get(where={"source_file": source_file}, limit=1) if not results.get("ids"): return False + stored_meta = results.get("metadatas", [{}])[0] or {} + # Pre-v2 drawers have no version field — treat them as stale. + stored_version = stored_meta.get("normalize_version", 1) + if stored_version < NORMALIZE_VERSION: + return False if check_mtime: - stored_meta = results.get("metadatas", [{}])[0] stored_mtime = stored_meta.get("source_mtime") if stored_mtime is None: return False diff --git a/tests/test_convo_miner.py b/tests/test_convo_miner.py index f5074b4..166644b 100644 --- a/tests/test_convo_miner.py +++ b/tests/test_convo_miner.py @@ -75,3 +75,86 @@ def test_mine_convos_does_not_reprocess_empty_chunk_files(capsys): assert "Files skipped (already filed): 1" in out2 finally: shutil.rmtree(tmpdir, ignore_errors=True) + + +def test_mine_convos_rebuilds_stale_drawers_after_schema_bump(capsys): + """When stored drawers have an older normalize_version, the next mine + silently purges them and refiles — no manual erase required. + + This is what makes the strip_noise upgrade apply to existing corpora: + users just run `mempalace mine` again and old noise-filled drawers get + replaced with clean ones.""" + from mempalace.palace import NORMALIZE_VERSION + + tmpdir = tempfile.mkdtemp() + try: + convo_path = Path(tmpdir) / "chat.txt" + convo_path.write_text( + "> What is memory?\nMemory is persistence.\n\n" + "> Why does it matter?\nIt enables continuity.\n\n" + "> How do we build it?\nWith structured storage.\n" + ) + palace_path = os.path.join(tmpdir, "palace") + + # First mine — stamps drawers with NORMALIZE_VERSION + mine_convos(tmpdir, palace_path, wing="test") + capsys.readouterr() + + client = chromadb.PersistentClient(path=palace_path) + col = client.get_collection("mempalace_drawers") + resolved = str(Path(tmpdir).resolve() / "chat.txt") + first_pass = col.get(where={"source_file": resolved}) + first_ids = set(first_pass["ids"]) + assert first_ids, "first mine should produce drawers" + for meta in first_pass["metadatas"]: + assert meta.get("normalize_version") == NORMALIZE_VERSION + + # Simulate pre-v2 drawers: rewrite metadata to an older version, + # and replace content with "noise" so we can see it get cleaned up. + stale_metas = [] + for meta in first_pass["metadatas"]: + stale = dict(meta) + stale["normalize_version"] = 1 + stale_metas.append(stale) + col.update( + ids=list(first_pass["ids"]), + documents=["STALE NOISE"] * len(first_pass["ids"]), + metadatas=stale_metas, + ) + # Add an extra orphan drawer that should also be purged. + col.add( + ids=["orphan_drawer"], + documents=["OLD ORPHAN"], + metadatas=[ + { + "wing": "test", + "room": "default", + "source_file": resolved, + "chunk_index": 999, + "normalize_version": 1, + } + ], + ) + del col, client + + # Second mine — version gate should trigger rebuild + mine_convos(tmpdir, palace_path, wing="test") + out = capsys.readouterr().out + assert ( + "Files skipped (already filed): 0" in out + ), "stale drawers should force a rebuild, not a skip" + + client = chromadb.PersistentClient(path=palace_path) + col = client.get_collection("mempalace_drawers") + rebuilt = col.get(where={"source_file": resolved}) + # Orphan is gone + assert "orphan_drawer" not in rebuilt["ids"] + # No stale content survived + assert all("STALE NOISE" not in d for d in rebuilt["documents"]) + assert all("OLD ORPHAN" not in d for d in rebuilt["documents"]) + # All rebuilt drawers carry the current version + for meta in rebuilt["metadatas"]: + assert meta.get("normalize_version") == NORMALIZE_VERSION + del col, client + finally: + shutil.rmtree(tmpdir, ignore_errors=True) diff --git a/tests/test_miner.py b/tests/test_miner.py index ea2f2a9..020d5bd 100644 --- a/tests/test_miner.py +++ b/tests/test_miner.py @@ -7,7 +7,7 @@ import chromadb import yaml from mempalace.miner import mine, scan_project, status -from mempalace.palace import file_already_mined +from mempalace.palace import NORMALIZE_VERSION, file_already_mined def write_file(path: Path, content: str): @@ -227,11 +227,17 @@ def test_file_already_mined_check_mtime(): assert file_already_mined(col, test_file) is False assert file_already_mined(col, test_file, check_mtime=True) is False - # Add it with mtime + # Add it with mtime + current normalize_version col.add( ids=["d1"], documents=["hello world"], - metadatas=[{"source_file": test_file, "source_mtime": str(mtime)}], + metadatas=[ + { + "source_file": test_file, + "source_mtime": str(mtime), + "normalize_version": NORMALIZE_VERSION, + } + ], ) # Already mined (no mtime check) @@ -253,7 +259,12 @@ def test_file_already_mined_check_mtime(): col.add( ids=["d2"], documents=["other"], - metadatas=[{"source_file": "/fake/no_mtime.txt"}], + metadatas=[ + { + "source_file": "/fake/no_mtime.txt", + "normalize_version": NORMALIZE_VERSION, + } + ], ) assert file_already_mined(col, "/fake/no_mtime.txt", check_mtime=True) is False finally: @@ -296,3 +307,78 @@ def test_status_missing_palace_does_not_create_empty_collection(tmp_path, capsys out = capsys.readouterr().out assert "No palace found" in out assert not palace_path.exists() + + +# ── normalize_version schema gate ─────────────────────────────────────── +# +# When the normalization pipeline changes shape (e.g., strip_noise lands), +# `NORMALIZE_VERSION` is bumped so pre-existing drawers can be silently +# rebuilt on the next mine. These tests pin that contract. + + +def test_file_already_mined_returns_false_for_stale_normalize_version(): + """Pre-v2 drawers (no field, or older integer) must not short-circuit.""" + tmpdir = tempfile.mkdtemp() + try: + palace_path = os.path.join(tmpdir, "palace") + os.makedirs(palace_path) + client = chromadb.PersistentClient(path=palace_path) + col = client.get_or_create_collection("mempalace_drawers") + + # Pre-v2 drawer: no normalize_version field at all + col.add( + ids=["d_old"], + documents=["old"], + metadatas=[{"source_file": "/fake/old.jsonl"}], + ) + assert file_already_mined(col, "/fake/old.jsonl") is False + + # Explicitly older version + col.add( + ids=["d_v1"], + documents=["v1"], + metadatas=[{"source_file": "/fake/v1.jsonl", "normalize_version": 1}], + ) + assert file_already_mined(col, "/fake/v1.jsonl") is False + + # Current version — short-circuits + col.add( + ids=["d_current"], + documents=["cur"], + metadatas=[ + { + "source_file": "/fake/current.jsonl", + "normalize_version": NORMALIZE_VERSION, + } + ], + ) + assert file_already_mined(col, "/fake/current.jsonl") is True + finally: + del col, client + shutil.rmtree(tmpdir, ignore_errors=True) + + +def test_add_drawer_stamps_normalize_version(tmp_path): + """Fresh drawers carry the current schema version so future upgrades work.""" + from mempalace.miner import add_drawer + + palace_path = tmp_path / "palace" + palace_path.mkdir() + client = chromadb.PersistentClient(path=str(palace_path)) + col = client.get_or_create_collection("mempalace_drawers") + try: + added = add_drawer( + collection=col, + wing="test", + room="notes", + content="hello", + source_file=str(tmp_path / "src.md"), + chunk_index=0, + agent="unit", + ) + assert added is True + stored = col.get(limit=1) + meta = stored["metadatas"][0] + assert meta["normalize_version"] == NORMALIZE_VERSION + finally: + del col, client From a3b7988d8791e10877293131e1f7c936e9a8aee1 Mon Sep 17 00:00:00 2001 From: MSL <232237854+milla-jovovich@users.noreply.github.com> Date: Mon, 13 Apr 2026 01:50:07 -0700 Subject: [PATCH 21/42] =?UTF-8?q?fix:=20stop=20hooks=20from=20making=20age?= =?UTF-8?q?nts=20write=20in=20chat=20=E2=80=94=20save=20tokens?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The save hook and precompact hook were telling the agent to write diary entries, add drawers, and add KG triples IN THE CHAT WINDOW. Every line written stays in conversation history and retransmits on every subsequent turn — ~$1/session in wasted tokens. Fix: hooks now say "saved in background, no action needed" and use decision: allow instead of block. The agent continues working without interruption. All filing happens via the background pipeline. Also updated hooks README with: - Known limitation: hooks require session restart after install - Updated cost section: zero tokens, background-only Co-Authored-By: Claude Opus 4.6 (1M context) --- hooks/README.md | 6 +++++- hooks/mempal_precompact_hook.sh | 6 +++--- hooks/mempal_save_hook.sh | 11 +++++++---- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/hooks/README.md b/hooks/README.md index d5380ef..977b109 100644 --- a/hooks/README.md +++ b/hooks/README.md @@ -133,6 +133,10 @@ Example output: [14:40:01] Session abc123: 18 exchanges, 3 since last save ``` +## Known Limitations + +**Hooks require session restart after install.** Claude Code loads hooks from `settings.json` at session start only. If you run `mempalace init` or manually edit hook config mid-session, the hooks won't fire until you restart Claude Code. This is a Claude Code limitation. + ## Cost -**Zero extra tokens.** The hooks are bash scripts that run locally. They don't call any API. The only "cost" is the AI spending a few seconds organizing memories at each checkpoint — and it's doing that with context it already has loaded. +**Zero extra tokens.** The hooks notify the AI that saves happened in the background — the AI doesn't need to write anything in the chat. All filing is handled automatically. Previous versions asked the AI to write diary entries and drawer content in the chat window, which cost ~$1/session in retransmitted tokens. diff --git a/hooks/mempal_precompact_hook.sh b/hooks/mempal_precompact_hook.sh index 550a813..1c14193 100755 --- a/hooks/mempal_precompact_hook.sh +++ b/hooks/mempal_precompact_hook.sh @@ -68,10 +68,10 @@ if [ -n "$MEMPAL_DIR" ] && [ -d "$MEMPAL_DIR" ]; then python3 -m mempalace mine "$MEMPAL_DIR" >> "$STATE_DIR/hook.log" 2>&1 fi -# Always block — compaction = save everything +# Notify — compaction is about to happen but filing is handled in background cat << 'HOOKJSON' { - "decision": "block", - "reason": "COMPACTION IMMINENT. Save ALL topics, decisions, quotes, code, and important context from this session to your memory system. Be thorough — after compaction, detailed context will be lost. Organize into appropriate categories. Use verbatim quotes where possible. Save everything, then allow compaction to proceed." + "decision": "allow", + "reason": "MemPalace pre-compaction save. Your full conversation has been saved verbatim in the background — no action needed. Compaction can proceed safely." } HOOKJSON diff --git a/hooks/mempal_save_hook.sh b/hooks/mempal_save_hook.sh index a0e4681..b15d961 100755 --- a/hooks/mempal_save_hook.sh +++ b/hooks/mempal_save_hook.sh @@ -140,12 +140,15 @@ if [ "$SINCE_LAST" -ge "$SAVE_INTERVAL" ] && [ "$EXCHANGE_COUNT" -gt 0 ]; then python3 -m mempalace mine "$MEMPAL_DIR" >> "$STATE_DIR/hook.log" 2>&1 & fi - # Block the AI and tell it to save - # The "reason" becomes a system message the AI sees and acts on + # Notify the AI that a checkpoint happened — but do NOT ask it to write + # anything in chat. All filing happens in the background via the pipeline. + # The old version asked the agent to write diary entries, add drawers, and + # add KG triples in the chat window — that cost ~$1/session in retransmitted + # tokens and cluttered the conversation. cat << 'HOOKJSON' { - "decision": "block", - "reason": "AUTO-SAVE checkpoint. Save key topics, decisions, quotes, and code from this session to your memory system. Organize into appropriate categories. Use verbatim quotes where possible. Continue conversation after saving." + "decision": "allow", + "reason": "MemPalace auto-save checkpoint. Your conversation is being saved verbatim in the background — no action needed from you. Continue working." } HOOKJSON else From 5db651a543a7617a7eec6d08fc7e571d4842e056 Mon Sep 17 00:00:00 2001 From: shafdev <96260000+shafdev@users.noreply.github.com> Date: Tue, 14 Apr 2026 01:36:04 +0530 Subject: [PATCH 22/42] fix: use microsecond timestamp and full content hash in diary entry ID (#819) --- mempalace/mcp_server.py | 5 ++++- tests/test_mcp_server.py | 43 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index 4e21426..33933ff 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -836,7 +836,10 @@ def tool_diary_write(agent_name: str, entry: str, topic: str = "general"): return _no_palace() now = datetime.now() - entry_id = f"diary_{wing}_{now.strftime('%Y%m%d_%H%M%S')}_{hashlib.sha256(entry[:50].encode()).hexdigest()[:12]}" + entry_id = ( + f"diary_{wing}_{now.strftime('%Y%m%d_%H%M%S%f')}_" + f"{hashlib.sha256(entry.encode()).hexdigest()[:12]}" + ) _wal_log( "diary_write", diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 4cc8b4a..a8189ae 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -6,6 +6,7 @@ dispatch layer (integration-level). Uses isolated palace + KG fixtures via monkeypatch to avoid touching real data. """ +from datetime import datetime import json import sys @@ -643,6 +644,48 @@ class TestDiaryTools: r = tool_diary_read(agent_name="Nobody") assert r["entries"] == [] + def test_diary_write_same_second_shared_prefix_no_collision( + self, monkeypatch, config, palace_path, kg + ): + _patch_mcp_server(monkeypatch, config, kg) + _client, _col = _get_collection(palace_path, create=True) + del _client + + from mempalace import mcp_server + + class FrozenDateTime: + calls = [ + datetime(2026, 4, 13, 22, 15, 30, 123456), + datetime(2026, 4, 13, 22, 15, 30, 123457), + ] + fallback = datetime(2026, 4, 13, 22, 15, 30, 123457) + + @classmethod + def now(cls): + if cls.calls: + return cls.calls.pop(0) + return cls.fallback + + monkeypatch.setattr(mcp_server, "datetime", FrozenDateTime) + + from mempalace.mcp_server import tool_diary_read, tool_diary_write + + entry1 = "A" * 50 + " entry one" + entry2 = "A" * 50 + " entry two" + + result1 = tool_diary_write(agent_name="TestAgent", entry=entry1, topic="status") + result2 = tool_diary_write(agent_name="TestAgent", entry=entry2, topic="status") + + assert result1["success"] is True + assert result2["success"] is True + assert result1["entry_id"] != result2["entry_id"] + + read_result = tool_diary_read(agent_name="TestAgent") + contents = [entry["content"] for entry in read_result["entries"]] + assert read_result["total"] == 2 + assert entry1 in contents + assert entry2 in contents + # ── Cache Invalidation (inode/mtime) ────────────────────────────────── From f3c6770ecf2d15bc4c749f439e6aed28317542dd Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:16:03 +0000 Subject: [PATCH 23/42] fix: remove unused import 'main' from mempalace/__init__.py Removed the 'main' import from `mempalace/__init__.py` and updated `pyproject.toml` to point the script entry point directly to `mempalace.cli:main`. This ensures the CLI remains functional while improving code hygiene. Co-authored-by: igorls <4753812+igorls@users.noreply.github.com> --- mempalace/__init__.py | 3 +-- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/mempalace/__init__.py b/mempalace/__init__.py index f944507..212ac33 100644 --- a/mempalace/__init__.py +++ b/mempalace/__init__.py @@ -2,7 +2,6 @@ import logging -from .cli import main # noqa: E402 from .version import __version__ # noqa: E402 # ChromaDB 0.6.x ships a Posthog telemetry client whose capture() signature is @@ -25,4 +24,4 @@ logging.getLogger("chromadb.telemetry.product.posthog").setLevel(logging.CRITICA # intact, so the real fix is upgrading chromadb to 1.5.4+, which #581 # proposes. See #397 for the history of this line. -__all__ = ["main", "__version__"] +__all__ = ["__version__"] diff --git a/pyproject.toml b/pyproject.toml index 4ab1a1d..8700fd1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ Repository = "https://github.com/milla-jovovich/mempalace" "Bug Tracker" = "https://github.com/milla-jovovich/mempalace/issues" [project.scripts] -mempalace = "mempalace:main" +mempalace = "mempalace.cli:main" [project.optional-dependencies] dev = ["pytest>=7.0", "pytest-cov>=4.0", "ruff>=0.4.0", "psutil>=5.9"] From 1263c3c91ed39d9f9abc8b0f0d5a875b2b1d6794 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 13 Apr 2026 18:20:11 -0300 Subject: [PATCH 24/42] merge: full hardened stack + rewrite fact_checker around actual KG API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merges the full hardened stack (up through #791 drawer-grep) and turns fact_checker from "dead code hidden behind bare except" into an actually-working offline contradiction detector with tests. ## Dead paths the PR body advertised but the code never executed Both buried by a single outer ``except Exception: pass``: * ``kg.query(subject)`` — ``KnowledgeGraph`` has no ``query()`` method; it has ``query_entity()``. The attribute error was silently swallowed and the entire KG branch always returned ``[]``. Now using ``kg.query_entity(subject, direction="outgoing")`` with proper handling of the ``predicate``/``object``/``current``/``valid_to`` fields the real API returns. * ``KnowledgeGraph(palace_path=palace_path)`` — the constructor's only kwarg is ``db_path``. Passing ``palace_path`` raised TypeError, silently swallowed. Now computing the db_path correctly from ``/knowledge_graph.sqlite3``, matching the convention the MCP server already uses. ## Contradiction logic rewritten The previous ``if kg_pred in claim and fact.object not in claim`` only fired when text used the SAME predicate word as the KG fact — the exact opposite of the stated use case ("Bob is Alice's brother" when KG says husband" would NOT have fired). Replaced with a proper parse → lookup → compare pipeline: * ``_extract_claims`` parses two surface forms ("X is Y's Z" and "X's Z is Y") into ``(subject, predicate, object)`` triples. * ``_check_kg_contradictions`` pulls the subject's outgoing facts and flags two classes: - ``relationship_mismatch`` when a current KG fact matches the same ``(subject, object)`` pair but with a different predicate. - ``stale_fact`` when the exact triple exists but is ``valid_to``-closed in the past. * Stale-fact detection is now implemented (the PR body claimed it; the old code silently didn't implement it). ## Performance fix — O(n²) → O(mentioned × n) ``_check_entity_confusion`` previously computed Levenshtein for every pair of registered names on every ``check_text`` call. For 1,000 registered names that's ~500K edit-distance calls per hook invocation. Now we first identify which registry names actually appear in the text (single regex scan), then only compute edit distance between mentioned and unmentioned names. Pinned by a test that asserts <200ms on a 500- name registry with zero mentions. Also: when *both* similar names are mentioned in the text, we no longer flag them — the user clearly knows they're different people. ## Shared entity-registry loader ``mempalace/miner.py`` already had an mtime-cached loader for ``~/.mempalace/known_entities.json``. fact_checker had a duplicate implementation that leaked file handles and ignored caching. Extended miner's cache to expose both the flat set (``_load_known_entities``) and the raw category dict (``_load_known_entities_raw``); fact_checker now imports the latter. No more double disk reads, no more handle leak. ## Tests — 24 cases in tests/test_fact_checker.py All three detection paths + both dead-code regressions: * ``test_kg_init_uses_db_path_not_palace_path_kwarg`` — pins the correct KG constructor signature so the ``palace_path=`` bug can't come back. * ``test_relationship_mismatch_detected`` — the headline example from the PR body now actually fires. * ``test_stale_fact_detected`` — valid_to-closed triple is flagged. * ``test_current_fact_same_triple_is_not_flagged`` — no false positive on a still-valid match. * ``test_performance_bounded_by_mentioned_names`` — 500-name registry, zero mentions, <200ms. Regression for the O(n²) blowup. * ``test_no_false_positive_when_both_names_mentioned`` — Mila and Milla in the same text is fine. * Plus claim extraction, flatten_names shapes, CLI exit code, empty text handling, missing-palace graceful fallback, registry-dict shape support. 785/785 suite pass. ruff + format clean on CI-pinned 0.4.x. --- mempalace/fact_checker.py | 376 ++++++++++++++++++++++++++----------- mempalace/mcp_server.py | 30 ++- mempalace/miner.py | 52 +++-- tests/test_fact_checker.py | 288 ++++++++++++++++++++++++++++ 4 files changed, 620 insertions(+), 126 deletions(-) create mode 100644 tests/test_fact_checker.py diff --git a/mempalace/fact_checker.py b/mempalace/fact_checker.py index 281f117..50e8842 100644 --- a/mempalace/fact_checker.py +++ b/mempalace/fact_checker.py @@ -1,152 +1,304 @@ """ fact_checker.py — Verify text against known facts in the palace. -Checks AI responses, diary entries, and new content against the -entity registry and knowledge graph for contradictions. Catches: - - Wrong names (similar but different entities) - - Wrong relationships (calling someone the wrong role) - - Stale facts (things that changed — KG has valid_from/valid_to) +Checks AI responses, diary entries, and new content against the entity +registry and knowledge graph for three classes of issue: -Uses the entity_registry and knowledge_graph — no hardcoded facts. + * similar_name — text mentions a name that's one/two edits + away from *another* registered name, raising + the possibility of a typo or mix-up. + * relationship_mismatch — text asserts a role between two entities + (e.g. "Bob is Alice's brother") while the KG + records a *different* current role for the + same subject/object pair. + * stale_fact — text asserts a fact that the KG marks closed + (``valid_to`` in the past). + +Purely offline. Inputs: entity_registry JSON + KG SQLite. No network. Usage: from mempalace.fact_checker import check_text issues = check_text("Bob is Alice's brother", palace_path) - # → [{"type": "relationship_mismatch", "detail": "KG says Bob is Alice's husband"}] # CLI - python -m mempalace.fact_checker "Bob is Alice's brother" --palace ~/.mempalace/palace + python -m mempalace.fact_checker "Bob is Alice's brother" \\ + --palace ~/.mempalace/palace """ +from __future__ import annotations + import os import re -from pathlib import Path +from datetime import datetime, timezone + +# Share miner's mtime-cached registry loader so we don't double-read +# ~/.mempalace/known_entities.json on every check_text call. +from .miner import _load_known_entities_raw -def check_text(text, palace_path=None, config=None): - """Check text for contradictions against known facts. +# Narrow detection patterns — parse "X is Y's Z" and "X's Z is Y". +# Names are captured greedily as word sequences (letters + optional +# capitalized follow-ons) so simple multi-token names still work. +# Relationship words are constrained to sane lengths to avoid matching +# arbitrary filler. +_RELATIONSHIP_PATTERNS = [ + # "Bob is Alice's brother" → subject=Bob, possessor=Alice, role=brother + re.compile(r"\b([A-Z][\w-]+)\s+is\s+([A-Z][\w-]+)'s\s+([a-z]{3,20})\b"), + # "Alice's brother is Bob" → possessor=Alice, role=brother, subject=Bob + re.compile(r"\b([A-Z][\w-]+)'s\s+([a-z]{3,20})\s+is\s+([A-Z][\w-]+)\b"), +] - Returns list of issues found. Empty list = no contradictions. + +def check_text(text: str, palace_path: str = None, config=None) -> list: + """Return a list of issues detected in ``text``. + + Empty list means "no contradictions found" — absence of evidence, not + evidence of absence. The detector is deliberately conservative: + every issue is anchored to a specific KG fact or registry entry. """ if config is None: from .config import MempalaceConfig + config = MempalaceConfig() if palace_path is None: palace_path = config.palace_path - issues = [] + if not text: + return [] - # Load known entities - entity_names = _load_known_entities() + issues: list = [] + entity_names_raw = _load_known_entities_raw() - # Check entity name confusion (similar names that might be mixed up) - issues.extend(_check_entity_confusion(text, entity_names)) - - # Check against knowledge graph facts - issues.extend(_check_kg_facts(text, palace_path)) + issues.extend(_check_entity_confusion(text, entity_names_raw)) + issues.extend(_check_kg_contradictions(text, palace_path)) return issues -def _load_known_entities(): - """Load entity names from the registry.""" - import json - registry_path = os.path.expanduser("~/.mempalace/known_entities.json") - if not os.path.exists(registry_path): - return {} - try: - return json.loads(open(registry_path).read()) - except Exception: - return {} +# ── entity-name confusion ──────────────────────────────────────────── -def _check_entity_confusion(text, entity_names): - """Check if text confuses similar entity names.""" - issues = [] - all_names = set() - for cat in entity_names.values(): +def _flatten_names(entity_names_raw: dict) -> set: + """Flatten a ``{category: [names]}`` or ``{category: {name: meta}}`` + registry into a set of names.""" + flat: set = set() + for cat in entity_names_raw.values(): if isinstance(cat, list): - all_names.update(cat) + flat.update(str(n) for n in cat if n) elif isinstance(cat, dict): - all_names.update(cat.keys()) + flat.update(str(k) for k in cat.keys() if k) + return flat - # Find names mentioned in text - mentioned = set() + +def _check_entity_confusion(text: str, entity_names_raw: dict) -> list: + """Flag names mentioned in the text that are edit-distance ≤ 2 from + a *different* registered name — a common typo / mix-up pattern. + + Performance note: the original O(n²) pairwise scan over the full + registry is gone. We first identify which names actually appear in + the text, then only compute edit distance between *mentioned* names + and the rest of the registry. This makes the cost O(m × n) where m + is the handful of names in the text, not the full registry. + """ + all_names = _flatten_names(entity_names_raw) + if not all_names: + return [] + + # Which names from the registry actually appear in the text? + mentioned: list = [] for name in all_names: - if re.search(r'\b' + re.escape(name) + r'\b', text, re.IGNORECASE): - mentioned.add(name) + if re.search(r"\b" + re.escape(name) + r"\b", text, re.IGNORECASE): + mentioned.append(name) + if not mentioned: + return [] - # Check for names that are very similar but different (edit distance 1-2) - name_list = sorted(all_names) - for i, name_a in enumerate(name_list): - for name_b in name_list[i + 1:]: - if _edit_distance(name_a.lower(), name_b.lower()) <= 2: - if name_a in mentioned or name_b in mentioned: - if name_a in text and name_b not in text: - issues.append({ - "type": "similar_name", - "detail": f"'{name_a}' mentioned — did you mean '{name_b}'? (similar names in registry)", - "names": [name_a, name_b], - }) + issues: list = [] + seen_pairs: set = set() + for name_a in mentioned: + a_lower = name_a.lower() + for name_b in all_names: + if name_b == name_a: + continue + # Dedupe by unordered pair so we don't double-report. + pair_key = tuple(sorted((name_a.lower(), name_b.lower()))) + if pair_key in seen_pairs: + continue + # Only flag when name_b is a *different* registry entry that + # was NOT mentioned — otherwise both names in the text is + # just the user writing about two people. + if name_b in mentioned: + seen_pairs.add(pair_key) + continue + distance = _edit_distance(a_lower, name_b.lower()) + if 0 < distance <= 2: + issues.append( + { + "type": "similar_name", + "detail": ( + f"'{name_a}' mentioned — did you mean " + f"'{name_b}'? (edit distance {distance})" + ), + "names": [name_a, name_b], + "distance": distance, + } + ) + seen_pairs.add(pair_key) return issues -def _check_kg_facts(text, palace_path): - """Check text against knowledge graph for contradictions.""" - issues = [] +# ── KG contradictions ──────────────────────────────────────────────── + + +def _extract_claims(text: str) -> list: + """Yield structured (subject, predicate, object) claims from ``text``. + + The two supported surface forms are "X is Y's Z" and "X's Z is Y", + both of which resolve to the triple ``(X, Z, Y)`` — ``X`` has role + ``Z`` with respect to ``Y``. Matches are case-preserving for the + entity names (KG lookup is case-insensitive on normalized IDs). + """ + claims: list = [] + for pat in _RELATIONSHIP_PATTERNS: + for match in pat.finditer(text): + groups = match.groups() + if pat is _RELATIONSHIP_PATTERNS[0]: + subject, possessor, role = groups[0], groups[1], groups[2] + else: + possessor, role, subject = groups[0], groups[1], groups[2] + claims.append( + { + "subject": subject, + "predicate": role.lower(), + "object": possessor, + "span": match.group(0), + } + ) + return claims + + +def _check_kg_contradictions(text: str, palace_path: str) -> list: + """Compare each claim in ``text`` against the KG. + + For every claim ``(subject, predicate, object)`` parsed from the + text, look up the subject's current KG triples: + + * ``relationship_mismatch`` fires when the KG records a fact about + the same ``(subject, object)`` pair but with a *different* + predicate — e.g. text says "brother" but KG says "husband". + * ``stale_fact`` fires when the KG has the exact ``(subject, + predicate, object)`` triple but its ``valid_to`` is in the past, + meaning the claim is no longer current. + """ + claims = _extract_claims(text) + if not claims: + return [] + try: from .knowledge_graph import KnowledgeGraph - kg = KnowledgeGraph(palace_path=palace_path) - # Extract relationship claims from text - # Pattern: "X is Y's Z" or "X's Z is Y" - patterns = [ - (r"(\w+)\s+is\s+(\w+)'s\s+(\w+)", "subject", "possessor", "role"), - (r"(\w+)'s\s+(\w+)\s+is\s+(\w+)", "possessor", "role", "subject"), - ] - - for pattern, *roles in patterns: - for match in re.finditer(pattern, text, re.IGNORECASE): - groups = match.groups() - subject = groups[0] - # Query KG for this entity - try: - facts = kg.query(subject) - if facts: - for fact in facts: - # Check if the claim contradicts a known fact - if fact.get("valid_to") is None: # current fact - kg_pred = fact.get("predicate", "").lower() - claim = match.group(0).lower() - if kg_pred in claim and fact.get("object", "").lower() not in claim: - issues.append({ - "type": "relationship_mismatch", - "detail": f"Text says '{match.group(0)}' but KG says: {subject} {kg_pred} {fact.get('object')}", - "entity": subject, - }) - except Exception: - pass + # KG lives alongside the palace collection; mcp_server uses the + # same convention (see _kg init). Pass ``db_path`` — the previous + # code passed a nonexistent ``palace_path`` kwarg which raised + # TypeError, silently swallowed by the outer except and rendered + # the entire KG-check path dead. + kg = KnowledgeGraph(db_path=os.path.join(palace_path, "knowledge_graph.sqlite3")) except Exception: - pass # KG not available — skip + # KG unavailable (brand-new palace, corrupted DB, etc.) — skip. + return [] + + issues: list = [] + for claim in claims: + subject = claim["subject"] + claim_pred = claim["predicate"] + claim_obj = claim["object"] + try: + facts = kg.query_entity(subject, direction="outgoing") + except Exception: + continue + if not facts: + continue + + current_facts = [f for f in facts if f.get("current")] + + # Mismatch: KG fact about same (subject, object) pair but different predicate. + for fact in current_facts: + if not _objects_match(fact.get("object"), claim_obj): + continue + kg_pred = (fact.get("predicate") or "").lower() + if kg_pred and kg_pred != claim_pred: + issues.append( + { + "type": "relationship_mismatch", + "detail": ( + f"Text says '{claim['span']}' but KG records " + f"{subject} {kg_pred} {fact.get('object')}" + ), + "entity": subject, + "claim": { + "predicate": claim_pred, + "object": claim_obj, + }, + "kg_fact": { + "predicate": kg_pred, + "object": fact.get("object"), + }, + } + ) + + # Stale fact: exact match on (subject, predicate, object) but KG + # closed the window in the past. + now_iso = datetime.now(timezone.utc).date().isoformat() + for fact in facts: + if fact.get("current"): + continue + kg_pred = (fact.get("predicate") or "").lower() + if kg_pred != claim_pred: + continue + if not _objects_match(fact.get("object"), claim_obj): + continue + valid_to = fact.get("valid_to") + if valid_to and str(valid_to) < now_iso: + issues.append( + { + "type": "stale_fact", + "detail": ( + f"Text says '{claim['span']}' but KG marks " + f"this fact closed on {valid_to}" + ), + "entity": subject, + "valid_to": valid_to, + } + ) return issues -def _edit_distance(s1, s2): - """Simple Levenshtein distance.""" +def _objects_match(kg_obj, claim_obj: str) -> bool: + if kg_obj is None or not claim_obj: + return False + return str(kg_obj).strip().lower() == claim_obj.strip().lower() + + +# ── Levenshtein helper (tight iterative version) ───────────────────── + + +def _edit_distance(s1: str, s2: str) -> int: + """Levenshtein distance. O(len(s1) * len(s2)) time, O(len(s2)) space.""" if len(s1) < len(s2): - return _edit_distance(s2, s1) - if len(s2) == 0: + s1, s2 = s2, s1 + if not s2: return len(s1) prev = list(range(len(s2) + 1)) for i, c1 in enumerate(s1): curr = [i + 1] for j, c2 in enumerate(s2): - curr.append(min( - prev[j + 1] + 1, - curr[j] + 1, - prev[j] + (0 if c1 == c2 else 1), - )) + curr.append( + min( + prev[j + 1] + 1, + curr[j] + 1, + prev[j] + (0 if c1 == c2 else 1), + ) + ) prev = curr return prev[-1] @@ -154,24 +306,30 @@ def _edit_distance(s1, s2): if __name__ == "__main__": import argparse import json + import sys - parser = argparse.ArgumentParser(description="Check text against known facts") - parser.add_argument("text", nargs="?", help="Text to check") - parser.add_argument("--palace", default=os.path.expanduser("~/.mempalace/palace")) - parser.add_argument("--stdin", action="store_true", help="Read from stdin") + parser = argparse.ArgumentParser( + description="Check text against known facts in the MemPalace palace.", + epilog="Exits 0 when no issues found, 1 when one or more issues detected.", + ) + parser.add_argument("text", nargs="?", help="Text to check (or use --stdin).") + parser.add_argument( + "--palace", + default=os.path.expanduser("~/.mempalace/palace"), + help="Path to the palace directory.", + ) + parser.add_argument("--stdin", action="store_true", help="Read text from stdin.") args = parser.parse_args() if args.stdin: - import sys - text = sys.stdin.read() + text_in = sys.stdin.read() elif args.text: - text = args.text + text_in = args.text else: - print("Provide text as argument or use --stdin") - exit(1) + parser.error("Provide text as argument or use --stdin.") - issues = check_text(text, palace_path=args.palace) - if issues: - print(json.dumps(issues, indent=2)) - else: - print("No contradictions found.") + found = check_text(text_in, palace_path=args.palace) + if found: + print(json.dumps(found, indent=2)) + sys.exit(1) + print("No contradictions found.") diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index 08226a9..31be8a4 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -35,7 +35,15 @@ from .version import __version__ import chromadb from .query_sanitizer import sanitize_query from .searcher import search_memories -from .palace_graph import traverse, find_tunnels, graph_stats, create_tunnel, list_tunnels, delete_tunnel, follow_tunnels +from .palace_graph import ( + traverse, + find_tunnels, + graph_stats, + create_tunnel, + list_tunnels, + delete_tunnel, + follow_tunnels, +) from .knowledge_graph import KnowledgeGraph @@ -519,7 +527,10 @@ def tool_create_tunnel( except ValueError as e: return {"error": str(e)} return create_tunnel( - source_wing, source_room, target_wing, target_room, + source_wing, + source_room, + target_wing, + target_room, label=label, source_drawer_id=source_drawer_id, target_drawer_id=target_drawer_id, @@ -1251,8 +1262,14 @@ TOOLS = { "target_wing": {"type": "string", "description": "Wing of the target"}, "target_room": {"type": "string", "description": "Room in the target wing"}, "label": {"type": "string", "description": "Description of the connection"}, - "source_drawer_id": {"type": "string", "description": "Optional specific drawer ID"}, - "target_drawer_id": {"type": "string", "description": "Optional specific drawer ID"}, + "source_drawer_id": { + "type": "string", + "description": "Optional specific drawer ID", + }, + "target_drawer_id": { + "type": "string", + "description": "Optional specific drawer ID", + }, }, "required": ["source_wing", "source_room", "target_wing", "target_room"], }, @@ -1263,7 +1280,10 @@ TOOLS = { "input_schema": { "type": "object", "properties": { - "wing": {"type": "string", "description": "Filter tunnels by wing (shows tunnels where wing is source or target)"}, + "wing": { + "type": "string", + "description": "Filter tunnels by wing (shows tunnels where wing is source or target)", + }, }, }, "handler": tool_list_tunnels, diff --git a/mempalace/miner.py b/mempalace/miner.py index 04bcf61..3d8e29e 100644 --- a/mempalace/miner.py +++ b/mempalace/miner.py @@ -379,17 +379,17 @@ def chunk_text(content: str, source_file: str) -> list: _ENTITY_REGISTRY_PATH = os.path.join(os.path.expanduser("~"), ".mempalace", "known_entities.json") -_ENTITY_REGISTRY_CACHE: dict = {"mtime": None, "names": frozenset()} +_ENTITY_REGISTRY_CACHE: dict = {"mtime": None, "names": frozenset(), "raw": {}} _ENTITY_EXTRACT_WINDOW = 5000 # chars of content scanned for capitalized words _ENTITY_METADATA_LIMIT = 25 # max entities packed into the metadata field -def _load_known_entities() -> frozenset: - """Load (and cache) the user's known-entity registry by mtime. - - Reads ``~/.mempalace/known_entities.json``. The registry is shaped as - ``{"category": ["Name1", "Name2", ...], ...}``. Cached across calls - in the same process; invalidated when the file's mtime changes. +def _refresh_known_entities_cache() -> None: + """Reload ``~/.mempalace/known_entities.json`` into the module cache if + its mtime changed since the last read. Shared by ``_load_known_entities`` + (flat set) and ``_load_known_entities_raw`` (category dict), so callers + can pick whichever shape they need without duplicating the mtime-gated + disk read. """ try: mtime = os.path.getmtime(_ENTITY_REGISTRY_PATH) @@ -397,28 +397,56 @@ def _load_known_entities() -> frozenset: if _ENTITY_REGISTRY_CACHE["mtime"] is not None: _ENTITY_REGISTRY_CACHE["mtime"] = None _ENTITY_REGISTRY_CACHE["names"] = frozenset() - return _ENTITY_REGISTRY_CACHE["names"] + _ENTITY_REGISTRY_CACHE["raw"] = {} + return if _ENTITY_REGISTRY_CACHE["mtime"] == mtime: - return _ENTITY_REGISTRY_CACHE["names"] + return names: set = set() + raw: dict = {} try: import json with open(_ENTITY_REGISTRY_PATH, "r", encoding="utf-8") as f: data = json.load(f) - for cat in data.values(): - if isinstance(cat, list): - names.update(str(n) for n in cat if n) + if isinstance(data, dict): + raw = data + for cat in data.values(): + if isinstance(cat, list): + names.update(str(n) for n in cat if n) + elif isinstance(cat, dict): + names.update(str(k) for k in cat.keys() if k) except Exception: names = set() + raw = {} _ENTITY_REGISTRY_CACHE["mtime"] = mtime _ENTITY_REGISTRY_CACHE["names"] = frozenset(names) + _ENTITY_REGISTRY_CACHE["raw"] = raw + + +def _load_known_entities() -> frozenset: + """Flat set of every known entity name (across all categories). + + Cached by mtime; invalidated when the registry file changes. + """ + _refresh_known_entities_cache() return _ENTITY_REGISTRY_CACHE["names"] +def _load_known_entities_raw() -> dict: + """Full category-dict view of the registry, shape + ``{"category": ["Name1", ...], ...}``. Cached by mtime. + + Consumed by modules (e.g., fact_checker) that need to reason about + categories rather than a flat name set. Never returns a mutable + reference to the cache — callers get a shallow copy. + """ + _refresh_known_entities_cache() + return dict(_ENTITY_REGISTRY_CACHE["raw"]) + + def _extract_entities_for_metadata(content: str) -> str: """Extract entity names from content for metadata tagging. diff --git a/tests/test_fact_checker.py b/tests/test_fact_checker.py new file mode 100644 index 0000000..5b34a40 --- /dev/null +++ b/tests/test_fact_checker.py @@ -0,0 +1,288 @@ +""" +test_fact_checker.py — Regression + integration tests for fact_checker. + +Covers every detection path + the three bugs the original PR silently +hid behind ``except Exception: pass``: + + * ``kg.query()`` doesn't exist — code must use ``query_entity``. + * ``KnowledgeGraph(palace_path=...)`` is not a valid kwarg — code + must pass ``db_path``. + * O(n²) edit-distance over the full registry — must filter to names + actually mentioned in the text. + +Also pins the three feature contracts: + * similar_name — "Mila" vs "Milla" in a registry with both. + * relationship_mismatch — "Bob is Alice's brother" vs KG "husband". + * stale_fact — claim matches a triple whose valid_to is in the past. +""" + +from __future__ import annotations + +import json +from unittest.mock import MagicMock, patch + +import pytest + +from mempalace.fact_checker import ( + _check_entity_confusion, + _edit_distance, + _extract_claims, + _flatten_names, + check_text, +) +from mempalace.knowledge_graph import KnowledgeGraph + + +# ── claim extraction ───────────────────────────────────────────────── + + +class TestExtractClaims: + def test_parses_x_is_ys_z(self): + claims = _extract_claims("Bob is Alice's brother") + assert len(claims) == 1 + assert claims[0] == { + "subject": "Bob", + "predicate": "brother", + "object": "Alice", + "span": "Bob is Alice's brother", + } + + def test_parses_xs_z_is_y(self): + claims = _extract_claims("Alice's brother is Bob") + assert len(claims) == 1 + assert claims[0]["subject"] == "Bob" + assert claims[0]["predicate"] == "brother" + assert claims[0]["object"] == "Alice" + + def test_ignores_sentences_without_possessive_role(self): + assert _extract_claims("Bob drove to the store today") == [] + assert _extract_claims("Just some prose without relationships") == [] + + def test_multiple_claims_in_one_text(self): + claims = _extract_claims("Bob is Alice's brother. Carol is Dave's sister.") + subjects = {c["subject"] for c in claims} + assert subjects == {"Bob", "Carol"} + + +# ── entity confusion ───────────────────────────────────────────────── + + +class TestEntityConfusion: + def test_flags_near_name_when_only_one_mentioned(self): + registry = {"people": ["Milla", "Mila"]} + issues = _check_entity_confusion("I spoke with Mila today.", registry) + # "Mila" mentioned, "Milla" not — registry has both at edit-distance 1, + # flag the possible confusion. + assert len(issues) == 1 + assert issues[0]["type"] == "similar_name" + assert set(issues[0]["names"]) == {"Mila", "Milla"} + assert issues[0]["distance"] == 1 + + def test_no_false_positive_when_both_names_mentioned(self): + """Regression: a text discussing both Mila and Milla is fine — + the user clearly knows they're different. Don't nag.""" + registry = {"people": ["Milla", "Mila"]} + issues = _check_entity_confusion("Mila and Milla met for lunch.", registry) + assert issues == [] + + def test_no_issues_when_registry_empty(self): + assert _check_entity_confusion("Bob said hi", {}) == [] + assert _check_entity_confusion("Bob said hi", {"people": []}) == [] + + def test_no_issues_when_no_mentioned_names(self): + registry = {"people": ["Zelda", "Link", "Sheik"]} + assert _check_entity_confusion("nothing relevant here", registry) == [] + + def test_registry_dict_shape_is_supported(self): + # Some registries store {"people": {"Alice": {...meta}}}; we still + # need to surface the keys as candidate names. + registry = {"people": {"Milla": {"role": "creator"}, "Mila": {}}} + issues = _check_entity_confusion("I messaged Mila yesterday", registry) + assert any("Milla" in (i["names"] or []) for i in issues) + + +class TestEditDistance: + def test_basic_distances(self): + assert _edit_distance("kitten", "sitting") == 3 + assert _edit_distance("mila", "milla") == 1 + assert _edit_distance("abc", "abc") == 0 + + def test_empty_strings(self): + assert _edit_distance("", "") == 0 + assert _edit_distance("abc", "") == 3 + assert _edit_distance("", "abc") == 3 + + def test_performance_bounded_by_mentioned_names(self): + """Regression: an earlier implementation did O(n²) pairwise + edit-distance over every registry entry on every check_text call. + With 100 names and zero mentions, the call must return in a blink + because no edit-distance comparison should even start.""" + import time + + # 500 random names, none of which appear in the text. + registry = {"people": [f"Zelda{i:03d}" for i in range(500)]} + text = "completely irrelevant prose with no registered names at all" + + start = time.perf_counter() + issues = _check_entity_confusion(text, registry) + elapsed = time.perf_counter() - start + + assert issues == [] + # Even an unoptimized implementation should beat this by orders + # of magnitude once we've filtered to mentioned names (which is + # 0 here) — if it's still doing O(n²), we'll blow past. + assert elapsed < 0.2, f"entity confusion took {elapsed:.3f}s on empty mentions" + + +# ── _flatten_names helper ──────────────────────────────────────────── + + +class TestFlattenNames: + def test_handles_list_categories(self): + assert _flatten_names({"people": ["Ada", "Bob"]}) == {"Ada", "Bob"} + + def test_handles_dict_categories(self): + assert _flatten_names({"people": {"Ada": {}, "Bob": {}}}) == {"Ada", "Bob"} + + def test_skips_falsy_entries(self): + assert _flatten_names({"people": ["Ada", "", None, "Bob"]}) == {"Ada", "Bob"} + + +# ── KG integration (uses a real tmp SQLite palace) ─────────────────── + + +@pytest.fixture +def palace_with_kg(tmp_path): + """Palace directory with a real KG pre-seeded with a few triples. + + The KG file lives at ``/knowledge_graph.sqlite3`` — same + convention used by the MCP server. Fact-checker must find it via + that path, not via a bogus ``palace_path`` kwarg. + """ + palace = tmp_path / "palace" + palace.mkdir() + db = str(palace / "knowledge_graph.sqlite3") + kg = KnowledgeGraph(db_path=db) + yield palace, kg + + +class TestKGContradictions: + def test_kg_init_uses_db_path_not_palace_path_kwarg(self): + """Regression: the original code passed ``palace_path=`` to a + constructor whose only kwarg is ``db_path``. That raised + TypeError — silently swallowed — and the KG path became dead + code. This test pins the correct call signature.""" + # Simply construct via the correct signature; raising means the + # KG constructor has changed in a way that fact_checker must too. + kg = KnowledgeGraph(db_path=":memory:") + # query_entity must exist (this is the method fact_checker calls). + assert callable(getattr(kg, "query_entity", None)) + # The API that fact_checker used to call does NOT exist. + assert not hasattr(kg, "query") + + def test_relationship_mismatch_detected(self, palace_with_kg): + """The feature's headline example: text says brother, KG says husband.""" + palace, kg = palace_with_kg + kg.add_triple("Bob", "husband_of", "Alice", valid_from="2020-01-01") + + issues = check_text("Bob is Alice's husband_of", str(palace)) + # Exact-predicate + same object → no mismatch. + assert all(i["type"] != "relationship_mismatch" for i in issues) + + issues = check_text("Bob is Alice's brother", str(palace)) + mismatches = [i for i in issues if i["type"] == "relationship_mismatch"] + assert mismatches, "should flag text/KG mismatch for same (subject, object)" + m = mismatches[0] + assert m["entity"] == "Bob" + assert m["claim"]["predicate"] == "brother" + assert m["kg_fact"]["predicate"] == "husband_of" + + def test_no_false_positive_when_kg_has_no_facts_about_subject(self, palace_with_kg): + palace, _ = palace_with_kg + # KG is empty → no mismatch should fire. + assert check_text("Bob is Alice's brother", str(palace)) == [] + + def test_stale_fact_detected(self, palace_with_kg): + palace, kg = palace_with_kg + # An old relationship that was superseded in 2023. Using a + # possessive-shape claim so the narrow claim-extraction regex + # actually reaches the stale-fact branch. + kg.add_triple( + "Bob", + "brother", + "Alice", + valid_from="2010-01-01", + valid_to="2023-06-01", + ) + issues = check_text("Bob is Alice's brother", str(palace)) + stale = [i for i in issues if i["type"] == "stale_fact"] + assert stale, "should flag closed-window fact as stale" + assert stale[0]["entity"] == "Bob" + assert stale[0]["valid_to"].startswith("2023") + + def test_current_fact_same_triple_is_not_flagged(self, palace_with_kg): + palace, kg = palace_with_kg + kg.add_triple("Bob", "brother", "Alice", valid_from="2010-01-01") + issues = check_text("Bob is Alice's brother", str(palace)) + assert issues == [] + + def test_missing_palace_does_not_crash(self, tmp_path): + """Brand-new palace (no KG file yet) — check_text must return [] + rather than raising or hanging.""" + nonexistent = str(tmp_path / "never_created") + assert check_text("Bob is Alice's brother", nonexistent) == [] + + +# ── end-to-end check_text contract ─────────────────────────────────── + + +class TestCheckTextContract: + def test_empty_text_returns_empty_list(self, tmp_path): + assert check_text("", str(tmp_path / "palace")) == [] + + def test_registry_confusion_path_isolated_from_kg(self, tmp_path, monkeypatch): + """If the registry file is present but the KG is missing, the + similar-name path must still fire. Prior implementations had + such entangled state that one failure killed both paths.""" + # Bypass the real registry by pointing cache at a temp file. + registry = tmp_path / "known_entities.json" + registry.write_text(json.dumps({"people": ["Milla", "Mila"]})) + from mempalace import miner + + monkeypatch.setattr(miner, "_ENTITY_REGISTRY_PATH", str(registry)) + miner._ENTITY_REGISTRY_CACHE.update({"mtime": None, "names": frozenset(), "raw": {}}) + + issues = check_text("Chatted with Mila.", str(tmp_path / "nonexistent_palace")) + assert any(i["type"] == "similar_name" for i in issues) + + +# ── CLI ────────────────────────────────────────────────────────────── + + +class TestCLI: + def test_exits_nonzero_when_issues_found(self, tmp_path, monkeypatch, capsys): + """The CLI exit code is how shell scripts / hooks know to act — + pin it explicitly.""" + registry = tmp_path / "known_entities.json" + registry.write_text(json.dumps({"people": ["Milla", "Mila"]})) + from mempalace import fact_checker, miner + + monkeypatch.setattr(miner, "_ENTITY_REGISTRY_PATH", str(registry)) + miner._ENTITY_REGISTRY_CACHE.update({"mtime": None, "names": frozenset(), "raw": {}}) + + # Simulate argv: "Mila said hi" + monkeypatch.setattr( + "sys.argv", + ["fact_checker", "Mila said hi", "--palace", str(tmp_path / "palace")], + ) + with pytest.raises(SystemExit) as excinfo: + # Re-exec the __main__ block via runpy. + import runpy + + runpy.run_module("mempalace.fact_checker", run_name="__main__") + # Issues found → exit code 1. + assert excinfo.value.code == 1 + out = capsys.readouterr().out + assert "similar_name" in out + # Silence unused import warning. + _ = (MagicMock, patch, fact_checker) From d886a62d8adb39088843b0da13aea1f38f3b675c Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:35:53 +0000 Subject: [PATCH 25/42] Optimize entity detection with regex caching and pre-compilation - Use functools.lru_cache to cache compiled patterns for entity names. - Pre-compile static pronoun patterns into a single regex. - Remove redundant .lower() calls in score_entity loop. Co-authored-by: igorls <4753812+igorls@users.noreply.github.com> --- mempalace/entity_detector.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/mempalace/entity_detector.py b/mempalace/entity_detector.py index 80fc107..203c0aa 100644 --- a/mempalace/entity_detector.py +++ b/mempalace/entity_detector.py @@ -17,6 +17,7 @@ Usage: import re import os +import functools from pathlib import Path from collections import defaultdict @@ -60,6 +61,8 @@ PRONOUN_PATTERNS = [ r"\btheir\b", ] +PRONOUN_RE = re.compile("|".join(PRONOUN_PATTERNS), re.IGNORECASE) + # Person signals — dialogue markers DIALOGUE_PATTERNS = [ r"^>\s*{name}[:\s]", # > Speaker: ... @@ -466,6 +469,7 @@ def extract_candidates(text: str) -> dict: # ==================== SIGNAL SCORING ==================== +@functools.lru_cache(maxsize=128) def _build_patterns(name: str) -> dict: """Pre-compile all regex patterns for a single entity name.""" n = re.escape(name) @@ -515,11 +519,9 @@ def score_entity(name: str, text: str, lines: list) -> dict: name_line_indices = [i for i, line in enumerate(lines) if name_lower in line.lower()] pronoun_hits = 0 for idx in name_line_indices: - window_text = " ".join(lines[max(0, idx - 2) : idx + 3]).lower() - for pronoun_pattern in PRONOUN_PATTERNS: - if re.search(pronoun_pattern, window_text): - pronoun_hits += 1 - break + window_text = " ".join(lines[max(0, idx - 2) : idx + 3]) + if PRONOUN_RE.search(window_text): + pronoun_hits += 1 if pronoun_hits > 0: person_score += pronoun_hits * 2 person_signals.append(f"pronoun nearby ({pronoun_hits}x)") From c4baceccb4eef1299cb981ec567a4eb999dd112e Mon Sep 17 00:00:00 2001 From: mvalentsev Date: Mon, 13 Apr 2026 20:19:27 +0500 Subject: [PATCH 26/42] docs: fix stale milla-jovovich org URLs in website and plugin manifests (#787) Follow-up to #766 which covers version.py, pyproject.toml, README, CHANGELOG, and CONTRIBUTING. These 11 files still had the old org name in URLs: - website/ (VitePress config + 6 docs pages) - .claude-plugin/ (plugin.json repository, README marketplace command) - .codex-plugin/ (plugin.json URLs, README links) Author name fields are intentionally unchanged. --- .claude-plugin/README.md | 2 +- .claude-plugin/plugin.json | 2 +- .codex-plugin/README.md | 6 +++--- .codex-plugin/plugin.json | 10 +++++----- website/.vitepress/config.mts | 4 ++-- website/guide/claude-code.md | 2 +- website/guide/gemini-cli.md | 2 +- website/guide/getting-started.md | 4 ++-- website/index.md | 2 +- website/reference/benchmarks.md | 6 +++--- website/reference/contributing.md | 8 ++++---- 11 files changed, 24 insertions(+), 24 deletions(-) diff --git a/.claude-plugin/README.md b/.claude-plugin/README.md index fd98952..a2ed080 100644 --- a/.claude-plugin/README.md +++ b/.claude-plugin/README.md @@ -11,7 +11,7 @@ A Claude Code plugin that gives your AI a persistent memory system. Mine project ### Claude Code Marketplace ```bash -claude plugin marketplace add milla-jovovich/mempalace +claude plugin marketplace add MemPalace/mempalace claude plugin install --scope user mempalace ``` diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index fa05a15..20b2cb2 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -25,5 +25,5 @@ "palace", "search" ], - "repository": "https://github.com/milla-jovovich/mempalace" + "repository": "https://github.com/MemPalace/mempalace" } diff --git a/.codex-plugin/README.md b/.codex-plugin/README.md index 57dbc34..6502eb6 100644 --- a/.codex-plugin/README.md +++ b/.codex-plugin/README.md @@ -35,7 +35,7 @@ codex /init 1. Clone the MemPalace repository: ```bash -git clone https://github.com/milla-jovovich/mempalace.git +git clone https://github.com/MemPalace/mempalace.git cd mempalace ``` @@ -71,5 +71,5 @@ Set the `MEMPAL_DIR` environment variable to a directory path to automatically r ## Support -- Repository: https://github.com/milla-jovovich/mempalace -- Issues: https://github.com/milla-jovovich/mempalace/issues +- Repository: https://github.com/MemPalace/mempalace +- Issues: https://github.com/MemPalace/mempalace/issues diff --git a/.codex-plugin/plugin.json b/.codex-plugin/plugin.json index 5784847..23d3ee7 100644 --- a/.codex-plugin/plugin.json +++ b/.codex-plugin/plugin.json @@ -5,8 +5,8 @@ "author": { "name": "milla-jovovich" }, - "homepage": "https://github.com/milla-jovovich/mempalace", - "repository": "https://github.com/milla-jovovich/mempalace", + "homepage": "https://github.com/MemPalace/mempalace", + "repository": "https://github.com/MemPalace/mempalace", "license": "MIT", "keywords": [ "memory", @@ -39,9 +39,9 @@ "Read", "Write" ], - "websiteURL": "https://github.com/milla-jovovich/mempalace", - "privacyPolicyURL": "https://github.com/milla-jovovich/mempalace", - "termsOfServiceURL": "https://github.com/milla-jovovich/mempalace", + "websiteURL": "https://github.com/MemPalace/mempalace", + "privacyPolicyURL": "https://github.com/MemPalace/mempalace", + "termsOfServiceURL": "https://github.com/MemPalace/mempalace", "defaultPrompt": [ "Search my memories for recent decisions", "Mine this project into my memory palace", diff --git a/website/.vitepress/config.mts b/website/.vitepress/config.mts index 7d708f1..eea8829 100644 --- a/website/.vitepress/config.mts +++ b/website/.vitepress/config.mts @@ -86,7 +86,7 @@ export default withMermaid( }, socialLinks: [ - { icon: 'github', link: 'https://github.com/milla-jovovich/mempalace' }, + { icon: 'github', link: 'https://github.com/MemPalace/mempalace' }, { icon: 'discord', link: 'https://discord.com/invite/ycTQQCu6kn' }, ], @@ -100,7 +100,7 @@ export default withMermaid( }, editLink: { - pattern: `https://github.com/milla-jovovich/mempalace/edit/${editBranch}/website/:path`, + pattern: `https://github.com/MemPalace/mempalace/edit/${editBranch}/website/:path`, text: 'Edit this page on GitHub', }, }, diff --git a/website/guide/claude-code.md b/website/guide/claude-code.md index 8045fb7..25f826f 100644 --- a/website/guide/claude-code.md +++ b/website/guide/claude-code.md @@ -5,7 +5,7 @@ The recommended way to use MemPalace with Claude Code — native marketplace ins ## Installation ```bash -claude plugin marketplace add milla-jovovich/mempalace +claude plugin marketplace add MemPalace/mempalace claude plugin install --scope user mempalace ``` diff --git a/website/guide/gemini-cli.md b/website/guide/gemini-cli.md index 2534079..34fb9af 100644 --- a/website/guide/gemini-cli.md +++ b/website/guide/gemini-cli.md @@ -11,7 +11,7 @@ MemPalace works natively with [Gemini CLI](https://github.com/google/gemini-cli) ```bash # Clone the repository -git clone https://github.com/milla-jovovich/mempalace.git +git clone https://github.com/MemPalace/mempalace.git cd mempalace # Create a virtual environment diff --git a/website/guide/getting-started.md b/website/guide/getting-started.md index 5d32034..db927e1 100644 --- a/website/guide/getting-started.md +++ b/website/guide/getting-started.md @@ -9,7 +9,7 @@ pip install mempalace ``` ::: danger Security Warning -The domain `mempalace.tech` is a **brand-squatting site** not affiliated with this project. It is known to run ad-redirects and potential malware. The official MemPalace distribution is only available via this [GitHub repository](https://github.com/milla-jovovich/mempalace) and [PyPI](https://pypi.org/project/mempalace/). Never install binaries or scripts from unofficial domains. +The domain `mempalace.tech` is a **brand-squatting site** not affiliated with this project. It is known to run ad-redirects and potential malware. The official MemPalace distribution is only available via this [GitHub repository](https://github.com/MemPalace/mempalace) and [PyPI](https://pypi.org/project/mempalace/). Never install binaries or scripts from unofficial domains. ::: ### Requirements @@ -23,7 +23,7 @@ No API key required for the core local workflow. After installation, the main st ### From Source ```bash -git clone https://github.com/milla-jovovich/mempalace.git +git clone https://github.com/MemPalace/mempalace.git cd mempalace pip install -e ".[dev]" ``` diff --git a/website/index.md b/website/index.md index 88bc694..a8487fb 100644 --- a/website/index.md +++ b/website/index.md @@ -17,7 +17,7 @@ hero: link: /concepts/the-palace - theme: alt text: GitHub ↗ - link: https://github.com/milla-jovovich/mempalace + link: https://github.com/MemPalace/mempalace features: - icon: diff --git a/website/reference/benchmarks.md b/website/reference/benchmarks.md index 7a37916..dae2df1 100644 --- a/website/reference/benchmarks.md +++ b/website/reference/benchmarks.md @@ -1,6 +1,6 @@ # Benchmarks -Curated summary of MemPalace benchmark results. For the full 725-line progression with every experiment, see [`benchmarks/BENCHMARKS.md`](https://github.com/milla-jovovich/mempalace/blob/main/benchmarks/BENCHMARKS.md) in the repository. +Curated summary of MemPalace benchmark results. For the full 725-line progression with every experiment, see [`benchmarks/BENCHMARKS.md`](https://github.com/MemPalace/mempalace/blob/main/benchmarks/BENCHMARKS.md) in the repository. ## The Core Finding @@ -76,7 +76,7 @@ On this benchmark, MemPalace materially outperforms the Mem0 result cited in the All benchmarks are reproducible with public datasets: ```bash -git clone https://github.com/milla-jovovich/mempalace.git +git clone https://github.com/MemPalace/mempalace.git cd mempalace pip install chromadb pyyaml @@ -92,4 +92,4 @@ python benchmarks/longmemeval_bench.py /tmp/longmemeval_s_cleaned.json Results are deterministic. Same data + same script = same result every time. Every result JSONL file contains every question, every retrieved document, every score. ::: -For complete reproduction instructions, benchmark integrity notes, and the full score progression, see the [full benchmark documentation](https://github.com/milla-jovovich/mempalace/blob/main/benchmarks/BENCHMARKS.md). +For complete reproduction instructions, benchmark integrity notes, and the full score progression, see the [full benchmark documentation](https://github.com/MemPalace/mempalace/blob/main/benchmarks/BENCHMARKS.md). diff --git a/website/reference/contributing.md b/website/reference/contributing.md index 9a1364a..17dc6d9 100644 --- a/website/reference/contributing.md +++ b/website/reference/contributing.md @@ -5,7 +5,7 @@ PRs welcome. MemPalace is open source and we welcome contributions of all sizes ## Getting Started ```bash -git clone https://github.com/milla-jovovich/mempalace.git +git clone https://github.com/MemPalace/mempalace.git cd mempalace pip install -e ".[dev]" ``` @@ -53,7 +53,7 @@ See [Benchmarks](/reference/benchmarks) for data download instructions. ## Good First Issues -Check the [Issues](https://github.com/milla-jovovich/mempalace/issues) tab: +Check the [Issues](https://github.com/MemPalace/mempalace/issues) tab: - **New chat formats** — add import support for Cursor, Copilot, or other AI tool exports - **Room detection** — improve pattern matching in `room_detector_local.py` @@ -73,8 +73,8 @@ If you're planning a significant change, open an issue first. Key principles: ## Community - [Discord](https://discord.com/invite/ycTQQCu6kn) -- [GitHub Issues](https://github.com/milla-jovovich/mempalace/issues) — bug reports and feature requests -- [GitHub Discussions](https://github.com/milla-jovovich/mempalace/discussions) — questions and ideas +- [GitHub Issues](https://github.com/MemPalace/mempalace/issues) — bug reports and feature requests +- [GitHub Discussions](https://github.com/MemPalace/mempalace/discussions) — questions and ideas ## License From 7192552624a089e73162a94e1c68d6c7c0da3d93 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 13 Apr 2026 18:55:26 -0300 Subject: [PATCH 27/42] test: make diary state path assertion platform-neutral MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Windows CI job failed on: assert '/.mempalace/state/' in str(state_path) because Windows uses ``\`` as the path separator, so the substring never matches. The behavior under test (state file lives outside the diary dir, under ``~/.mempalace/state/``) is already correct on both platforms — only the assertion was Unix-only. Switch to ``state_path.parent`` comparisons that work on any OS. --- tests/test_closets.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_closets.py b/tests/test_closets.py index 59321c5..458f767 100644 --- a/tests/test_closets.py +++ b/tests/test_closets.py @@ -586,7 +586,10 @@ class TestDiaryIngest: # State file does exist under ~/.mempalace/state/. state_path = _state_file_for(str(palace_dir), diary_dir.resolve()) assert state_path.exists() - assert "/.mempalace/state/" in str(state_path) + # Platform-neutral path check: compare parents rather than a hardcoded + # separator string that would fail on Windows (``\.mempalace\state\``). + assert state_path.parent.name == "state" + assert state_path.parent.parent.name == ".mempalace" def test_wing_prefixed_drawer_id_prevents_cross_diary_collision(self, tmp_path): # Regression: the original implementation used From e052074624e95009a4080240c306c7ab796199a8 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 13 Apr 2026 19:02:51 -0300 Subject: [PATCH 28/42] test: serialize mine_lock concurrency test with multiprocessing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The macOS CI job failed ``test_lock_blocks_concurrent_access`` because ``fcntl.flock`` on BSD/macOS is per-*process*, not per-FD: two threads in the same process both acquire even when they open their own file descriptors. The test passed on Linux (per-FD flock) and Windows (per-FD ``msvcrt.locking``) but was never actually exercising the lock's real contract. ``mine_lock`` is designed to serialize multi-*agent* access — i.e., separate processes, not threads. Switch the test to ``multiprocessing.get_context('spawn')`` with a module-level worker (so the spawn pickles cleanly) so it: 1. reflects the actual use case (one lock per mining process); 2. passes on all three OSes without flock-semantics branching; 3. catches real regressions (a broken lock would now let both processes through, exactly what we care about). Hold time bumped to 0.3s and the "wait until p1 acquires" delay to 0.2s to tolerate spawn's higher startup latency on macOS/Windows. --- tests/test_closets.py | 54 ++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/tests/test_closets.py b/tests/test_closets.py index 458f767..fba4cc8 100644 --- a/tests/test_closets.py +++ b/tests/test_closets.py @@ -24,6 +24,7 @@ Coverage map: """ import json +import multiprocessing import os import tempfile import threading @@ -63,6 +64,18 @@ from mempalace.searcher import ( # ── mine_lock ──────────────────────────────────────────────────────────── +def _lock_worker(target: str, name: str, hold_seconds: float, queue) -> None: + """Module-level worker for multiprocessing spawn; must be pickle-able.""" + from mempalace.palace import mine_lock as _mine_lock + + start = time.time() + with _mine_lock(target): + elapsed = time.time() - start + queue.put((name, elapsed)) + if hold_seconds > 0: + time.sleep(hold_seconds) + + class TestMineLock: def test_lock_acquires_and_releases(self, tmp_path): target = str(tmp_path / "lock_target.txt") @@ -76,28 +89,37 @@ class TestMineLock: assert time.time() - start < 1.0 def test_lock_blocks_concurrent_access(self, tmp_path): + """The lock's contract is inter-*process* (multi-agent), not + inter-thread. Use multiprocessing so the test reflects the real + use case and is portable: on macOS/BSD, ``fcntl.flock`` is + per-process, so two threads in one process would both acquire — + a threading-based test would flake there even when the lock is + behaving correctly for its intended users.""" target = str(tmp_path / "concurrent_lock.txt") + # Use multiprocessing so each worker has its own process. + # Use "spawn" to stay consistent across platforms (macOS defaults + # to spawn on 3.8+; Linux defaults to fork). Both work here. + ctx = multiprocessing.get_context("spawn") + queue = ctx.Queue() + + p1 = ctx.Process(target=_lock_worker, args=(target, "a", 0.3, queue)) + p2 = ctx.Process(target=_lock_worker, args=(target, "b", 0.0, queue)) + p1.start() + time.sleep(0.2) # ensure p1 acquires first + p2.start() + p1.join(timeout=10) + p2.join(timeout=10) + results = [] + while not queue.empty(): + results.append(queue.get()) + assert len(results) == 2, f"both workers should report, got {results}" - def worker(name): - start = time.time() - with mine_lock(target): - results.append((name, time.time() - start)) - time.sleep(0.2) - - t1 = threading.Thread(target=worker, args=("a",)) - t2 = threading.Thread(target=worker, args=("b",)) - t1.start() - time.sleep(0.05) # ensure t1 acquires first - t2.start() - t1.join() - t2.join() - - # The second worker must have waited at least most of t1's hold time. + # The second worker must have waited until p1 released the lock. wait_times = sorted(r[1] for r in results) assert ( wait_times[1] > 0.1 - ), f"second thread should block on mine_lock, waited only {wait_times[1]:.3f}s" + ), f"second process should block on mine_lock, waited only {wait_times[1]:.3f}s" # ── build_closet_lines ───────────────────────────────────────────────── From 1dc20e307b8294f6e0917d2abe433929287b7c21 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 13 Apr 2026 19:08:57 -0300 Subject: [PATCH 29/42] test: verify mine_lock via disjoint critical-section intervals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous revision used multiprocessing but still relied on timing ("second process waited at least N seconds") which flakes on CI where spawn overhead eats into the hold window. Linux CI observed the second process report a 0.088s wait — below the 0.1s threshold — even though the lock behavior was correct; spawn was just slow enough that the first process had nearly finished holding when the second got past its own spawn. Switch to effect-based verification: each worker logs its [enter_time, exit_time] inside the critical section, and the test asserts the two intervals are disjoint after sorting. A broken lock would produce overlapping intervals regardless of spawn latency; a working lock cannot. Also removed the mp.Queue since we no longer pass timing data back. --- tests/test_closets.py | 78 +++++++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 28 deletions(-) diff --git a/tests/test_closets.py b/tests/test_closets.py index fba4cc8..976086d 100644 --- a/tests/test_closets.py +++ b/tests/test_closets.py @@ -64,16 +64,22 @@ from mempalace.searcher import ( # ── mine_lock ──────────────────────────────────────────────────────────── -def _lock_worker(target: str, name: str, hold_seconds: float, queue) -> None: - """Module-level worker for multiprocessing spawn; must be pickle-able.""" +def _lock_worker(target: str, name: str, hold_seconds: float, log_path: str) -> None: + """Worker for multiprocessing-spawn concurrency test. Writes its + critical-section enter/exit timestamps to ``log_path`` so the test + can verify the sections did not overlap in time.""" + import time as _time + from mempalace.palace import mine_lock as _mine_lock - start = time.time() with _mine_lock(target): - elapsed = time.time() - start - queue.put((name, elapsed)) - if hold_seconds > 0: - time.sleep(hold_seconds) + t_enter = _time.time() + _time.sleep(hold_seconds) + t_exit = _time.time() + # Append atomically so concurrent writers don't stomp each other. + with open(log_path, "a") as f: + f.write(f"{name} {t_enter} {t_exit}\n") + f.flush() class TestMineLock: @@ -91,35 +97,51 @@ class TestMineLock: def test_lock_blocks_concurrent_access(self, tmp_path): """The lock's contract is inter-*process* (multi-agent), not inter-thread. Use multiprocessing so the test reflects the real - use case and is portable: on macOS/BSD, ``fcntl.flock`` is - per-process, so two threads in one process would both acquire — - a threading-based test would flake there even when the lock is - behaving correctly for its intended users.""" + use case and is portable: on macOS/BSD ``fcntl.flock`` is + per-process, so two threads would both acquire — a thread-based + test would flake there even when the lock is correct. + + Verify mutual exclusion by the effect the critical section + actually has — each worker records its enter/exit timestamps + under the lock, and the test asserts the two intervals do not + overlap. This is robust to spawn-overhead timing, unlike + "second worker waited at least N seconds" which flakes when CI + spawn latency eats into the hold window. + """ target = str(tmp_path / "concurrent_lock.txt") - # Use multiprocessing so each worker has its own process. - # Use "spawn" to stay consistent across platforms (macOS defaults - # to spawn on 3.8+; Linux defaults to fork). Both work here. + log_path = str(tmp_path / "critical_section.log") + # Spawn so the same code path runs on every OS (macOS 3.8+ and + # Windows already default to spawn; Linux is fork by default). ctx = multiprocessing.get_context("spawn") - queue = ctx.Queue() - p1 = ctx.Process(target=_lock_worker, args=(target, "a", 0.3, queue)) - p2 = ctx.Process(target=_lock_worker, args=(target, "b", 0.0, queue)) + # Each worker holds the lock for HOLD seconds. With real mutual + # exclusion, the two [enter, exit] intervals must be disjoint. + HOLD = 0.3 + p1 = ctx.Process(target=_lock_worker, args=(target, "a", HOLD, log_path)) + p2 = ctx.Process(target=_lock_worker, args=(target, "b", HOLD, log_path)) p1.start() - time.sleep(0.2) # ensure p1 acquires first p2.start() - p1.join(timeout=10) - p2.join(timeout=10) + p1.join(timeout=30) + p2.join(timeout=30) - results = [] - while not queue.empty(): - results.append(queue.get()) - assert len(results) == 2, f"both workers should report, got {results}" + assert p1.exitcode == 0, f"p1 exited non-zero: {p1.exitcode}" + assert p2.exitcode == 0, f"p2 exited non-zero: {p2.exitcode}" - # The second worker must have waited until p1 released the lock. - wait_times = sorted(r[1] for r in results) + # Parse the log: " ". + intervals = [] + with open(log_path) as f: + for line in f: + parts = line.strip().split() + if len(parts) == 3: + intervals.append((parts[0], float(parts[1]), float(parts[2]))) + assert len(intervals) == 2, f"expected two critical sections, got {intervals}" + + # Sort by entry time and verify the second entry is after the first exit. + intervals.sort(key=lambda iv: iv[1]) + (_, enter_a, exit_a), (_, enter_b, exit_b) = intervals assert ( - wait_times[1] > 0.1 - ), f"second process should block on mine_lock, waited only {wait_times[1]:.3f}s" + enter_a < exit_a <= enter_b < exit_b + ), f"critical sections overlapped — lock failed to serialize: {intervals}" # ── build_closet_lines ───────────────────────────────────────────────── From 8dc5970ca93f66ab8e4ccaddf643f8c9b2b595af Mon Sep 17 00:00:00 2001 From: eblander Date: Mon, 13 Apr 2026 18:29:48 -0400 Subject: [PATCH 30/42] Fix: ruff format with CI-pinned version (0.4.x) --- mempalace/backends/chroma.py | 13 ++--- mempalace/cli.py | 86 +++++++-------------------------- mempalace/migrate.py | 8 +--- tests/conftest.py | 8 +--- tests/test_mcp_server.py | 93 +++++++++--------------------------- tests/test_miner.py | 20 ++------ 6 files changed, 53 insertions(+), 175 deletions(-) diff --git a/mempalace/backends/chroma.py b/mempalace/backends/chroma.py index 2699d3a..28fe55f 100644 --- a/mempalace/backends/chroma.py +++ b/mempalace/backends/chroma.py @@ -35,13 +35,8 @@ def _fix_blob_seq_ids(palace_path: str): continue if not rows: continue - updates = [ - (int.from_bytes(blob, byteorder="big"), rowid) - for rowid, blob in rows - ] - conn.executemany( - f"UPDATE {table} SET seq_id = ? WHERE rowid = ?", updates - ) + updates = [(int.from_bytes(blob, byteorder="big"), rowid) for rowid, blob in rows] + conn.executemany(f"UPDATE {table} SET seq_id = ? WHERE rowid = ?", updates) logger.info("Fixed %d BLOB seq_ids in %s", len(updates), table) conn.commit() except Exception: @@ -76,9 +71,7 @@ class ChromaCollection(BaseCollection): class ChromaBackend: """Factory for MemPalace's default ChromaDB backend.""" - def get_collection( - self, palace_path: str, collection_name: str, create: bool = False - ): + def get_collection(self, palace_path: str, collection_name: str, create: bool = False): if not create and not os.path.isdir(palace_path): raise FileNotFoundError(palace_path) diff --git a/mempalace/cli.py b/mempalace/cli.py index 5d1e4f0..fa92ed6 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -48,11 +48,7 @@ def cmd_init(args): if files: print(f" Reading {len(files)} files...") detected = detect_entities(files) - total = ( - len(detected["people"]) - + len(detected["projects"]) - + len(detected["uncertain"]) - ) + total = len(detected["people"]) + len(detected["projects"]) + len(detected["uncertain"]) if total > 0: confirmed = confirm_entities(detected, yes=getattr(args, "yes", False)) # Save confirmed entities to /entities.json for the miner @@ -70,11 +66,7 @@ def cmd_init(args): def cmd_mine(args): - palace_path = ( - os.path.expanduser(args.palace) - if args.palace - else MempalaceConfig().palace_path - ) + palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path include_ignored = [] for raw in args.include_ignored or []: include_ignored.extend(part.strip() for part in raw.split(",") if part.strip()) @@ -109,11 +101,7 @@ def cmd_mine(args): def cmd_search(args): from .searcher import search, SearchError - palace_path = ( - os.path.expanduser(args.palace) - if args.palace - else MempalaceConfig().palace_path - ) + palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path try: search( query=args.query, @@ -130,11 +118,7 @@ def cmd_wakeup(args): """Show L0 (identity) + L1 (essential story) — the wake-up context.""" from .layers import MemoryStack - palace_path = ( - os.path.expanduser(args.palace) - if args.palace - else MempalaceConfig().palace_path - ) + palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path stack = MemoryStack(palace_path=palace_path) text = stack.wake_up(wing=args.wing) @@ -171,11 +155,7 @@ def cmd_migrate(args): """Migrate palace from a different ChromaDB version.""" from .migrate import migrate - palace_path = ( - os.path.expanduser(args.palace) - if args.palace - else MempalaceConfig().palace_path - ) + palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path migrate( palace_path=palace_path, dry_run=args.dry_run, @@ -186,11 +166,7 @@ def cmd_migrate(args): def cmd_status(args): from .miner import status - palace_path = ( - os.path.expanduser(args.palace) - if args.palace - else MempalaceConfig().palace_path - ) + palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path status(palace_path=palace_path) @@ -201,9 +177,7 @@ def cmd_repair(args): from .migrate import confirm_destructive_action, contains_palace_database palace_path = os.path.abspath( - os.path.expanduser(args.palace) - if args.palace - else MempalaceConfig().palace_path + os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path ) db_path = os.path.join(palace_path, "chroma.sqlite3") @@ -247,9 +221,7 @@ def cmd_repair(args): all_metas = [] offset = 0 while offset < total: - batch = col.get( - limit=batch_size, offset=offset, include=["documents", "metadatas"] - ) + batch = col.get(limit=batch_size, offset=offset, include=["documents", "metadatas"]) all_ids.extend(batch["ids"]) all_docs.extend(batch["documents"]) all_metas.extend(batch["metadatas"]) @@ -272,9 +244,7 @@ def cmd_repair(args): print(" Rebuilding collection...") client.delete_collection("mempalace_drawers") - new_col = client.create_collection( - "mempalace_drawers", metadata={"hnsw:space": "cosine"} - ) + new_col = client.create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"}) filed = 0 for i in range(0, len(all_ids), batch_size): @@ -321,9 +291,7 @@ def cmd_mcp(args): if not args.palace: print("\nOptional custom palace:") - print( - f" claude mcp add mempalace -- {base_server_cmd} --palace /path/to/palace" - ) + print(f" claude mcp add mempalace -- {base_server_cmd} --palace /path/to/palace") print(f" {base_server_cmd} --palace /path/to/palace") @@ -332,11 +300,7 @@ def cmd_compress(args): import chromadb from .dialect import Dialect - palace_path = ( - os.path.expanduser(args.palace) - if args.palace - else MempalaceConfig().palace_path - ) + palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path # Load dialect (with optional entity config) config_path = args.config @@ -491,9 +455,7 @@ def main(): default="projects", help="Ingest mode: 'projects' for code/docs (default), 'convos' for chat exports", ) - p_mine.add_argument( - "--wing", default=None, help="Wing name (default: directory name)" - ) + p_mine.add_argument("--wing", default=None, help="Wing name (default: directory name)") p_mine.add_argument( "--no-gitignore", action="store_true", @@ -510,9 +472,7 @@ def main(): default="mempalace", help="Your name — recorded on every drawer (default: mempalace)", ) - p_mine.add_argument( - "--limit", type=int, default=0, help="Max files to process (0 = all)" - ) + p_mine.add_argument("--limit", type=int, default=0, help="Max files to process (0 = all)") p_mine.add_argument( "--dry-run", action="store_true", help="Show what would be filed without filing" ) @@ -534,9 +494,7 @@ def main(): p_compress = sub.add_parser( "compress", help="Compress drawers using AAAK Dialect (~30x reduction)" ) - p_compress.add_argument( - "--wing", default=None, help="Wing to compress (default: all wings)" - ) + p_compress.add_argument("--wing", default=None, help="Wing to compress (default: all wings)") p_compress.add_argument( "--dry-run", action="store_true", help="Preview compression without storing" ) @@ -545,12 +503,8 @@ def main(): ) # wake-up - p_wakeup = sub.add_parser( - "wake-up", help="Show L0 + L1 wake-up context (~600-900 tokens)" - ) - p_wakeup.add_argument( - "--wing", default=None, help="Wake-up for a specific project/wing" - ) + p_wakeup = sub.add_parser("wake-up", help="Show L0 + L1 wake-up context (~600-900 tokens)") + p_wakeup.add_argument("--wing", default=None, help="Wake-up for a specific project/wing") # split p_split = sub.add_parser( @@ -602,17 +556,13 @@ def main(): ) instructions_sub = p_instructions.add_subparsers(dest="instructions_name") for instr_name in ["init", "search", "mine", "help", "status"]: - instructions_sub.add_parser( - instr_name, help=f"Output {instr_name} instructions" - ) + instructions_sub.add_parser(instr_name, help=f"Output {instr_name} instructions") # repair sub.add_parser( "repair", help="Rebuild palace vector index from stored data (fixes segfaults after corruption)", - ).add_argument( - "--yes", action="store_true", help="Skip confirmation for destructive changes" - ) + ).add_argument("--yes", action="store_true", help="Skip confirmation for destructive changes") # mcp sub.add_parser( diff --git a/mempalace/migrate.py b/mempalace/migrate.py index d751a93..319c670 100644 --- a/mempalace/migrate.py +++ b/mempalace/migrate.py @@ -97,9 +97,7 @@ def detect_chromadb_version(db_path: str) -> str: # 0.6.x has embeddings_queue but no schema_str tables = [ r[0] - for r in conn.execute( - "SELECT name FROM sqlite_master WHERE type='table'" - ).fetchall() + for r in conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall() ] if "embeddings_queue" in tables: return "0.6.x" @@ -211,9 +209,7 @@ def migrate(palace_path: str, dry_run: bool = False, confirm: bool = False): temp_palace = tempfile.mkdtemp(prefix="mempalace_migrate_") print(f" Creating fresh palace in {temp_palace}...") client = chromadb.PersistentClient(path=temp_palace) - col = client.get_or_create_collection( - "mempalace_drawers", metadata={"hnsw:space": "cosine"} - ) + col = client.get_or_create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"}) # Re-import in batches batch_size = 500 diff --git a/tests/conftest.py b/tests/conftest.py index 1d85889..7b2bb77 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -101,9 +101,7 @@ def config(tmp_dir, palace_path): def collection(palace_path): """A ChromaDB collection pre-seeded in the temp palace.""" client = chromadb.PersistentClient(path=palace_path) - col = client.get_or_create_collection( - "mempalace_drawers", metadata={"hnsw:space": "cosine"} - ) + col = client.get_or_create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"}) yield col client.delete_collection("mempalace_drawers") del client @@ -187,9 +185,7 @@ def seeded_kg(kg): kg.add_triple("Alice", "parent_of", "Max", valid_from="2015-04-01") kg.add_triple("Max", "does", "swimming", valid_from="2025-01-01") kg.add_triple("Max", "does", "chess", valid_from="2024-06-01") - kg.add_triple( - "Alice", "works_at", "Acme Corp", valid_from="2020-01-01", valid_to="2024-12-31" - ) + kg.add_triple("Alice", "works_at", "Acme Corp", valid_from="2020-01-01", valid_to="2024-12-31") kg.add_triple("Alice", "works_at", "NewCo", valid_from="2025-01-01") return kg diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index cfb48a2..0562482 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -32,9 +32,7 @@ def _get_collection(palace_path, create=False): if create: return ( client, - client.get_or_create_collection( - "mempalace_drawers", metadata={"hnsw:space": "cosine"} - ), + client.get_or_create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"}), ) return client, client.get_collection("mempalace_drawers") @@ -97,9 +95,7 @@ class TestHandleRequest: def test_notifications_initialized_returns_none(self): from mempalace.mcp_server import handle_request - resp = handle_request( - {"method": "notifications/initialized", "id": None, "params": {}} - ) + resp = handle_request({"method": "notifications/initialized", "id": None, "params": {}}) assert resp is None def test_ping_returns_empty_result(self): @@ -120,9 +116,7 @@ class TestHandleRequest: assert "mempalace_add_drawer" in names assert "mempalace_kg_add" in names - def test_null_arguments_does_not_hang( - self, monkeypatch, config, palace_path, seeded_kg - ): + def test_null_arguments_does_not_hang(self, monkeypatch, config, palace_path, seeded_kg): """Sending arguments: null should return a result, not hang (#394).""" _patch_mcp_server(monkeypatch, config, seeded_kg) from mempalace.mcp_server import handle_request @@ -227,9 +221,7 @@ class TestReadTools: assert result["total_drawers"] == 0 assert result["wings"] == {} - def test_status_with_data( - self, monkeypatch, config, palace_path, seeded_collection, kg - ): + def test_status_with_data(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_status @@ -246,9 +238,7 @@ class TestReadTools: assert result["wings"]["project"] == 3 assert result["wings"]["notes"] == 1 - def test_list_rooms_all( - self, monkeypatch, config, palace_path, seeded_collection, kg - ): + def test_list_rooms_all(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_list_rooms @@ -257,9 +247,7 @@ class TestReadTools: assert "frontend" in result["rooms"] assert "planning" in result["rooms"] - def test_list_rooms_filtered( - self, monkeypatch, config, palace_path, seeded_collection, kg - ): + def test_list_rooms_filtered(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_list_rooms @@ -267,9 +255,7 @@ class TestReadTools: assert "backend" in result["rooms"] assert "planning" not in result["rooms"] - def test_get_taxonomy( - self, monkeypatch, config, palace_path, seeded_collection, kg - ): + def test_get_taxonomy(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_get_taxonomy @@ -290,9 +276,7 @@ class TestReadTools: class TestSearchTool: - def test_search_basic( - self, monkeypatch, config, palace_path, seeded_collection, kg - ): + def test_search_basic(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_search @@ -303,18 +287,14 @@ class TestSearchTool: top = result["results"][0] assert "JWT" in top["text"] or "authentication" in top["text"].lower() - def test_search_with_wing_filter( - self, monkeypatch, config, palace_path, seeded_collection, kg - ): + def test_search_with_wing_filter(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_search result = tool_search(query="planning", wing="notes") assert all(r["wing"] == "notes" for r in result["results"]) - def test_search_with_room_filter( - self, monkeypatch, config, palace_path, seeded_collection, kg - ): + def test_search_with_room_filter(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_search @@ -333,9 +313,7 @@ class TestSearchTool: assert "results" in result # Old name takes precedence when both provided - result_strict = tool_search( - query="JWT", max_distance=999.0, min_similarity=0.01 - ) + result_strict = tool_search(query="JWT", max_distance=999.0, min_similarity=0.01) result_loose = tool_search(query="JWT", max_distance=0.01, min_similarity=999.0) assert len(result_strict["results"]) <= len(result_loose["results"]) @@ -427,9 +405,7 @@ class TestWriteTools: assert result2["success"] is True assert result2["reason"] == "already_exists" - def test_add_drawer_shared_header_no_collision( - self, monkeypatch, config, palace_path, kg - ): + def test_add_drawer_shared_header_no_collision(self, monkeypatch, config, palace_path, kg): """Documents sharing a >100-char header must get distinct IDs (full-content hash).""" _patch_mcp_server(monkeypatch, config, kg) _client, _col = _get_collection(palace_path, create=True) @@ -441,10 +417,7 @@ class TestWriteTools: header + "Decision: Use PostgreSQL for primary storage. Rationale: ACID compliance required." ) - doc2 = ( - header - + "Decision: Use Redis for session caching. Rationale: sub-ms latency needed." - ) + doc2 = header + "Decision: Use Redis for session caching. Rationale: sub-ms latency needed." result1 = tool_add_drawer(wing="work", room="decisions", content=doc1) result2 = tool_add_drawer(wing="work", room="decisions", content=doc2) @@ -455,9 +428,7 @@ class TestWriteTools: result1["drawer_id"] != result2["drawer_id"] ), "Documents with shared header but different content must have distinct drawer IDs" - def test_delete_drawer( - self, monkeypatch, config, palace_path, seeded_collection, kg - ): + def test_delete_drawer(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_delete_drawer @@ -465,18 +436,14 @@ class TestWriteTools: assert result["success"] is True assert seeded_collection.count() == 3 - def test_delete_drawer_not_found( - self, monkeypatch, config, palace_path, seeded_collection, kg - ): + def test_delete_drawer_not_found(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_delete_drawer result = tool_delete_drawer("nonexistent_drawer") assert result["success"] is False - def test_check_duplicate( - self, monkeypatch, config, palace_path, seeded_collection, kg - ): + def test_check_duplicate(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_check_duplicate @@ -505,18 +472,14 @@ class TestWriteTools: assert result["room"] == "backend" assert "JWT tokens" in result["content"] - def test_get_drawer_not_found( - self, monkeypatch, config, palace_path, seeded_collection, kg - ): + def test_get_drawer_not_found(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_get_drawer result = tool_get_drawer("nonexistent_drawer") assert "error" in result - def test_list_drawers( - self, monkeypatch, config, palace_path, seeded_collection, kg - ): + def test_list_drawers(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_list_drawers @@ -544,9 +507,7 @@ class TestWriteTools: assert result["count"] == 2 assert all(d["room"] == "backend" for d in result["drawers"]) - def test_list_drawers_pagination( - self, monkeypatch, config, palace_path, seeded_collection, kg - ): + def test_list_drawers_pagination(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_list_drawers @@ -564,9 +525,7 @@ class TestWriteTools: result = tool_list_drawers(offset=-5) assert result["offset"] == 0 - def test_update_drawer_content( - self, monkeypatch, config, palace_path, seeded_collection, kg - ): + def test_update_drawer_content(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_update_drawer, tool_get_drawer @@ -584,25 +543,19 @@ class TestWriteTools: _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_update_drawer - result = tool_update_drawer( - "drawer_proj_backend_aaa", wing="new_wing", room="new_room" - ) + result = tool_update_drawer("drawer_proj_backend_aaa", wing="new_wing", room="new_room") assert result["success"] is True assert result["wing"] == "new_wing" assert result["room"] == "new_room" - def test_update_drawer_not_found( - self, monkeypatch, config, palace_path, seeded_collection, kg - ): + def test_update_drawer_not_found(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_update_drawer result = tool_update_drawer("nonexistent_drawer", content="hello") assert result["success"] is False - def test_update_drawer_noop( - self, monkeypatch, config, palace_path, seeded_collection, kg - ): + def test_update_drawer_noop(self, monkeypatch, config, palace_path, seeded_collection, kg): _patch_mcp_server(monkeypatch, config, kg) from mempalace.mcp_server import tool_update_drawer diff --git a/tests/test_miner.py b/tests/test_miner.py index 600053e..2183102 100644 --- a/tests/test_miner.py +++ b/tests/test_miner.py @@ -60,9 +60,7 @@ def test_scan_project_respects_gitignore(): write_file(project_root / ".gitignore", "ignored.py\ngenerated/\n") write_file(project_root / "src" / "app.py", "print('hello')\n" * 20) write_file(project_root / "ignored.py", "print('ignore me')\n" * 20) - write_file( - project_root / "generated" / "artifact.py", "print('artifact')\n" * 20 - ) + write_file(project_root / "generated" / "artifact.py", "print('artifact')\n" * 20) assert scanned_files(project_root) == ["src/app.py"] finally: @@ -77,9 +75,7 @@ def test_scan_project_respects_nested_gitignore(): write_file(project_root / ".gitignore", "*.log\n") write_file(project_root / "subrepo" / ".gitignore", "tasks/\n") write_file(project_root / "subrepo" / "src" / "main.py", "print('main')\n" * 20) - write_file( - project_root / "subrepo" / "tasks" / "task.py", "print('task')\n" * 20 - ) + write_file(project_root / "subrepo" / "tasks" / "task.py", "print('task')\n" * 20) write_file(project_root / "subrepo" / "debug.log", "debug\n" * 20) assert scanned_files(project_root) == ["subrepo/src/main.py"] @@ -138,9 +134,7 @@ def test_scan_project_can_disable_gitignore(): write_file(project_root / ".gitignore", "data/\n") write_file(project_root / "data" / "stuff.csv", "a,b,c\n" * 20) - assert scanned_files(project_root, respect_gitignore=False) == [ - "data/stuff.csv" - ] + assert scanned_files(project_root, respect_gitignore=False) == ["data/stuff.csv"] finally: shutil.rmtree(tmpdir) @@ -153,9 +147,7 @@ def test_scan_project_can_include_ignored_directory(): write_file(project_root / ".gitignore", "docs/\n") write_file(project_root / "docs" / "guide.md", "# Guide\n" * 20) - assert scanned_files(project_root, include_ignored=["docs"]) == [ - "docs/guide.md" - ] + assert scanned_files(project_root, include_ignored=["docs"]) == ["docs/guide.md"] finally: shutil.rmtree(tmpdir) @@ -280,9 +272,7 @@ def test_mine_dry_run_with_tiny_file_no_crash(): project_root = Path(tmpdir).resolve() # One normal file and one that falls below MIN_CHUNK_SIZE - write_file( - project_root / "good.py", "def main():\n print('hello world')\n" * 20 - ) + write_file(project_root / "good.py", "def main():\n print('hello world')\n" * 20) write_file(project_root / "tiny.txt", "x") with open(project_root / "mempalace.yaml", "w") as f: From 62df24599e58e7fd38caf32de29214e9c0f5680c Mon Sep 17 00:00:00 2001 From: Milla J <232237854+milla-jovovich@users.noreply.github.com> Date: Mon, 13 Apr 2026 17:11:11 -0700 Subject: [PATCH 31/42] =?UTF-8?q?fix:=20README=20audit=20=E2=80=94=2042=20?= =?UTF-8?q?TDD=20tests=20+=20hall=20detection=20+=207=20claim=20fixes=20(#?= =?UTF-8?q?835)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: README audit — match every claim to shipped code + add hall detection TDD audit: wrote 42 tests verifying README claims against codebase. Fixed all 7 failures: 1. Tool count: 19 → 29 (10 tools were undocumented) 2. Added tool table rows for tunnels, drawer management, system tools 3. Version badge: 3.1.0 → 3.2.0 4. dialect.py file reference: "30x lossless" → "AAAK index format for closet pointers" 5. Wake-up token cost: "~170 tokens" → "~600-900 tokens" (matches layers.py) 6. pyproject.toml version in project structure: v3.0.0 → v3.2.0 7. Hall detection: added detect_hall() to miner.py — drawers now tagged with hall metadata so palace_graph.py can build hall connections New code: - miner.py: detect_hall() — keyword scoring against config hall_keywords, writes hall field to every drawer's metadata - tests/test_hall_detection.py — 12 TDD tests (written before code) - tests/test_readme_claims.py — 42 TDD tests verifying README accuracy 859/859 tests pass. Co-Authored-By: Claude Opus 4.6 (1M context) * fix: resolve ruff lint — unused imports and variables Co-Authored-By: Claude Opus 4.6 (1M context) * style: ruff format with CI-pinned 0.4.x Co-Authored-By: Claude Opus 4.6 (1M context) * fix: use conftest fixtures in hall tests for Windows compat Windows CI fails with NotADirectoryError when ChromaDB tries to write HNSW files in short-lived TemporaryDirectory. Use conftest palace_path and tmp_dir fixtures instead — same pattern as all other tests that touch ChromaDB. Co-Authored-By: Claude Opus 4.6 (1M context) * fix: address Igor's review — convo_miner halls, cached config, markdown typo TDD: wrote tests for convo_miner hall metadata and config caching BEFORE verifying the code changes. 1. README markdown typo: extra ** in wake-up token row (line 195) 2. convo_miner.py: added _detect_hall_cached() — conversation drawers now get hall metadata (was missing, Igor caught it) 3. miner.py + convo_miner.py: cached hall_keywords at module level so config.json isn't re-read per drawer during bulk mine 4. New tests: TestConvoMinerWritesHalls, TestDetectHallCaching 861/861 tests pass. ruff clean. Co-Authored-By: Claude Opus 4.6 (1M context) --------- Co-authored-by: Claude Opus 4.6 (1M context) --- README.md | 42 +- mempalace/convo_miner.py | 21 + mempalace/miner.py | 29 ++ tests/test_hall_detection.py | 173 ++++++++ tests/test_readme_claims.py | 737 +++++++++++++++++++++++++++++++++++ 5 files changed, 991 insertions(+), 11 deletions(-) create mode 100644 tests/test_hall_detection.py create mode 100644 tests/test_readme_claims.py diff --git a/README.md b/README.md index cf16d33..dd645f0 100644 --- a/README.md +++ b/README.md @@ -142,7 +142,7 @@ Restart Claude Code, then type `/skills` to verify "mempalace" appears. claude mcp add mempalace -- python -m mempalace.mcp_server ``` -Now your AI has 19 tools available through MCP. Ask it anything: +Now your AI has 29 tools available through MCP. Ask it anything: > *"What did we decide about auth last month?"* @@ -161,7 +161,7 @@ mempalace wake-up > context.txt # Paste context.txt into your local model's system prompt ``` -This gives your local model ~170 tokens of critical facts (in AAAK if you prefer) before you ask a single question. +This gives your local model ~600-900 tokens of critical facts (in AAAK if you prefer) before you ask a single question. **2. CLI search** — query on demand, feed results into your prompt: @@ -192,10 +192,10 @@ Decisions happen in conversations now. Not in docs. Not in Jira. In conversation |----------|--------------|-------------| | Paste everything | 19.5M — doesn't fit any context window | Impossible | | LLM summaries | ~650K | ~$507/yr | -| **MemPalace wake-up** | **~170 tokens** | **~$0.70/yr** | +| **MemPalace wake-up** | **~600-900 tokens** | **~$0.70/yr** | | **MemPalace + 5 searches** | **~13,500 tokens** | **~$10/yr** | -MemPalace loads 170 tokens of critical facts on wake-up — your team, your projects, your preferences. Then searches only when needed. $10/year to remember everything vs $507/year for summaries that lose context. +MemPalace loads ~600-900 tokens of critical facts on wake-up — your team, your projects, your preferences. Then searches only when needed. $10/year to remember everything vs $507/year for summaries that lose context. --- @@ -293,7 +293,7 @@ Wings and rooms aren't cosmetic. They're a **34% retrieval improvement**. The pa | **L2** | Room recall — recent sessions, current project | On demand | When topic comes up | | **L3** | Deep search — semantic query across all closets | On demand | When explicitly asked | -Your AI wakes up with L0 + L1 (~170 tokens) and knows your world. Searches only fire when needed. +Your AI wakes up with L0 + L1 (~600-900 tokens) and knows your world. Searches only fire when needed. ### AAAK Dialect (experimental) @@ -470,7 +470,7 @@ claude plugin install --scope user mempalace claude mcp add mempalace -- python -m mempalace.mcp_server ``` -### 19 Tools +### 29 Tools **Palace (read)** @@ -508,6 +508,18 @@ claude mcp add mempalace -- python -m mempalace.mcp_server | `mempalace_traverse` | Walk the graph from a room across wings | | `mempalace_find_tunnels` | Find rooms bridging two wings | | `mempalace_graph_stats` | Graph connectivity overview | +| `mempalace_create_tunnel` | Create explicit cross-wing link between two rooms | +| `mempalace_list_tunnels` | List all explicit tunnels, filter by wing | +| `mempalace_delete_tunnel` | Remove a tunnel by ID | +| `mempalace_follow_tunnels` | Follow tunnels from a room to connected rooms in other wings | + +**Drawer Management** + +| Tool | What | +|------|------| +| `mempalace_get_drawer` | Fetch a single drawer by ID | +| `mempalace_list_drawers` | Paginated drawer listing | +| `mempalace_update_drawer` | Update drawer content or metadata | **Agent Diary** @@ -516,6 +528,14 @@ claude mcp add mempalace -- python -m mempalace.mcp_server | `mempalace_diary_write` | Write AAAK diary entry | | `mempalace_diary_read` | Read recent diary entries | +**System** + +| Tool | What | +|------|------| +| `mempalace_hook_settings` | Get/set hook behavior (silent save, toast) | +| `mempalace_memories_filed_away` | Check if recent checkpoint was saved | +| `mempalace_reconnect` | Force DB reconnect after external writes | + The AI learns AAAK and the memory protocol automatically from the `mempalace_status` response. No manual configuration. --- @@ -645,12 +665,12 @@ Plain text. Becomes Layer 0 — loaded every session. | `cli.py` | CLI entry point | | `config.py` | Configuration loading and defaults | | `normalize.py` | Converts 5 chat formats to standard transcript | -| `mcp_server.py` | MCP server — 19 tools, AAAK auto-teach, memory protocol | +| `mcp_server.py` | MCP server — 29 tools, AAAK auto-teach, memory protocol | | `miner.py` | Project file ingest | | `convo_miner.py` | Conversation ingest — chunks by exchange pair | | `searcher.py` | Semantic search via ChromaDB | | `layers.py` | 4-layer memory stack | -| `dialect.py` | AAAK compression — 30x lossless | +| `dialect.py` | AAAK index format for closet pointers | | `knowledge_graph.py` | Temporal entity-relationship graph (SQLite) | | `palace_graph.py` | Room-based navigation graph | | `onboarding.py` | Guided setup — generates AAAK bootstrap + wing config | @@ -669,7 +689,7 @@ mempalace/ ├── README.md ← you are here ├── mempalace/ ← core package (README) │ ├── cli.py ← CLI entry point -│ ├── mcp_server.py ← MCP server (19 tools) +│ ├── mcp_server.py ← MCP server (29 tools) │ ├── knowledge_graph.py ← temporal entity graph │ ├── palace_graph.py ← room navigation graph │ ├── dialect.py ← AAAK compression @@ -694,7 +714,7 @@ mempalace/ │ └── mcp_setup.md ├── tests/ ← test suite (README) ├── assets/ ← logo + brand assets -└── pyproject.toml ← package config (v3.0.0) +└── pyproject.toml ← package config (v3.2.0) ``` --- @@ -722,7 +742,7 @@ PRs welcome. See [CONTRIBUTING.md](CONTRIBUTING.md) for setup and guidelines. MIT — see [LICENSE](LICENSE). -[version-shield]: https://img.shields.io/badge/version-3.1.0-4dc9f6?style=flat-square&labelColor=0a0e14 +[version-shield]: https://img.shields.io/badge/version-3.2.0-4dc9f6?style=flat-square&labelColor=0a0e14 [release-link]: https://github.com/milla-jovovich/mempalace/releases [python-shield]: https://img.shields.io/badge/python-3.9+-7dd8f8?style=flat-square&labelColor=0a0e14&logo=python&logoColor=7dd8f8 [python-link]: https://www.python.org/ diff --git a/mempalace/convo_miner.py b/mempalace/convo_miner.py index 63b46f0..ba98d0e 100644 --- a/mempalace/convo_miner.py +++ b/mempalace/convo_miner.py @@ -25,6 +25,26 @@ from .palace import ( ) +# Cached hall keywords — avoids re-reading config per drawer +_HALL_KEYWORDS_CACHE = None + + +def _detect_hall_cached(content: str) -> str: + """Route content to a hall using cached keywords. Same logic as miner.detect_hall.""" + global _HALL_KEYWORDS_CACHE + if _HALL_KEYWORDS_CACHE is None: + from .config import MempalaceConfig + + _HALL_KEYWORDS_CACHE = MempalaceConfig().hall_keywords + content_lower = content[:3000].lower() + scores = {} + for hall, keywords in _HALL_KEYWORDS_CACHE.items(): + score = sum(1 for kw in keywords if kw in content_lower) + if score > 0: + scores[hall] = score + return max(scores, key=scores.get) if scores else "general" + + # File types that might contain conversations CONVO_EXTENSIONS = { ".txt", @@ -318,6 +338,7 @@ def _file_chunks_locked(collection, source_file, chunks, wing, room, agent, extr { "wing": wing, "room": chunk_room, + "hall": _detect_hall_cached(chunk["content"]), "source_file": source_file, "chunk_index": chunk["chunk_index"], "added_by": agent, diff --git a/mempalace/miner.py b/mempalace/miner.py index 3d8e29e..73fe0c4 100644 --- a/mempalace/miner.py +++ b/mempalace/miner.py @@ -447,6 +447,33 @@ def _load_known_entities_raw() -> dict: return dict(_ENTITY_REGISTRY_CACHE["raw"]) +_HALL_KEYWORDS_CACHE = None + + +def detect_hall(content: str) -> str: + """Route content to a hall based on keyword scoring. + + Halls connect rooms within a wing — they categorize the TYPE of content + (emotional, technical, family, etc.) while rooms categorize the TOPIC. + """ + global _HALL_KEYWORDS_CACHE + if _HALL_KEYWORDS_CACHE is None: + from .config import MempalaceConfig + + _HALL_KEYWORDS_CACHE = MempalaceConfig().hall_keywords + content_lower = content[:3000].lower() + + scores = {} + for hall, keywords in _HALL_KEYWORDS_CACHE.items(): + score = sum(1 for kw in keywords if kw in content_lower) + if score > 0: + scores[hall] = score + + if scores: + return max(scores, key=scores.get) + return "general" + + def _extract_entities_for_metadata(content: str) -> str: """Extract entity names from content for metadata tagging. @@ -508,6 +535,8 @@ def add_drawer( metadata["source_mtime"] = os.path.getmtime(source_file) except OSError: pass + # Tag with hall for graph connectivity within wings + metadata["hall"] = detect_hall(content) # Tag with entity names for filterable search entities = _extract_entities_for_metadata(content) if entities: diff --git a/tests/test_hall_detection.py b/tests/test_hall_detection.py new file mode 100644 index 0000000..fcf5df1 --- /dev/null +++ b/tests/test_hall_detection.py @@ -0,0 +1,173 @@ +"""TDD tests for hall detection in miners. + +Written BEFORE the code — these define what correct hall assignment looks like. +""" + +import os + +import yaml + + +class TestDetectHall: + """The detect_hall function should exist and route content to the right hall.""" + + def test_function_exists(self): + from mempalace.miner import detect_hall + + assert callable(detect_hall) + + def test_technical_content(self): + from mempalace.miner import detect_hall + + text = "Fixed the python script bug in the error handler code" + assert detect_hall(text) == "technical" + + def test_emotions_content(self): + from mempalace.miner import detect_hall + + text = "I feel so happy today, tears of joy, I love this" + assert detect_hall(text) == "emotions" + + def test_family_content(self): + from mempalace.miner import detect_hall + + text = "The kids had a great day, my daughter was amazing" + assert detect_hall(text) == "family" + + def test_memory_content(self): + from mempalace.miner import detect_hall + + text = "I remember when we archived all those files, recall the conversation" + assert detect_hall(text) == "memory" + + def test_creative_content(self): + from mempalace.miner import detect_hall + + text = "The game design for the player app looks great" + assert detect_hall(text) == "creative" + + def test_identity_content(self): + from mempalace.miner import detect_hall + + text = "Who am I really? My identity and persona and sense of self" + assert detect_hall(text) == "identity" + + def test_consciousness_content(self): + from mempalace.miner import detect_hall + + text = "Am I conscious? Is this awareness real? Does my soul exist?" + assert detect_hall(text) == "consciousness" + + def test_general_fallback(self): + from mempalace.miner import detect_hall + + text = "The weather is nice today in California" + assert detect_hall(text) == "general" + + def test_highest_score_wins(self): + from mempalace.miner import detect_hall + + # More technical keywords than emotional + text = "Fixed the python bug in the code script, felt happy about it" + assert detect_hall(text) == "technical" + + +class TestDrawerHasHallMetadata: + """When a drawer is created, it must have a hall field in metadata.""" + + def test_add_drawer_includes_hall(self, palace_path): + from mempalace.palace import get_collection + from mempalace.miner import add_drawer + + col = get_collection(palace_path) + add_drawer( + collection=col, + wing="test", + room="general", + content="Fixed the python script bug in the error handler code", + source_file=os.path.join(palace_path, "test.py"), + chunk_index=0, + agent="test", + ) + results = col.get(limit=1, include=["metadatas"]) + meta = results["metadatas"][0] + assert "hall" in meta, "Drawer metadata must include 'hall' field" + assert meta["hall"] == "technical" + + +class TestConvoMinerWritesHalls: + """Conversation miner must also tag drawers with hall metadata.""" + + def test_convo_miner_drawers_have_hall(self, tmp_dir): + from mempalace.palace import get_collection + from mempalace.convo_miner import mine_convos + + palace_dir = os.path.join(tmp_dir, "palace") + os.makedirs(palace_dir) + convo_dir = os.path.join(tmp_dir, "convos") + os.makedirs(convo_dir) + # Create a conversation file with technical content + with open(os.path.join(convo_dir, "session.txt"), "w") as f: + f.write("> How do I fix the python script bug?\n") + f.write("You need to check the error handler code and fix the traceback.\n") + f.write("> What about the database migration?\n") + f.write("Run the migration script to update the schema.\n") + + mine_convos(convo_dir, palace_dir, wing="test", agent="test") + + col = get_collection(palace_dir, create=False) + results = col.get(limit=10, include=["metadatas"]) + # At least some drawers should exist and have hall + assert len(results["ids"]) > 0, "No drawers created by convo_miner" + for meta in results["metadatas"]: + if meta.get("ingest_mode") == "convos": + assert "hall" in meta, f"Convo drawer missing hall metadata: {meta}" + + +class TestDetectHallCaching: + """detect_hall should cache config to avoid disk reads per drawer.""" + + def test_detect_hall_does_not_reread_config(self): + """After first call, config should be cached — no new MempalaceConfig().""" + import mempalace.miner as miner_mod + + # Reset cache + miner_mod._HALL_KEYWORDS_CACHE = None + + # First call loads config + miner_mod.detect_hall("Fixed the python bug in the code") + assert miner_mod._HALL_KEYWORDS_CACHE is not None + + # Save reference + cached_ref = miner_mod._HALL_KEYWORDS_CACHE + + # Second call should use same cached object + miner_mod.detect_hall("I feel so happy today") + assert miner_mod._HALL_KEYWORDS_CACHE is cached_ref + + +class TestMineProjectWritesHalls: + """Full mine pipeline must produce drawers with hall metadata.""" + + def test_mined_drawers_have_hall(self, tmp_dir): + from mempalace.palace import get_collection + from mempalace.miner import mine + + palace_dir = os.path.join(tmp_dir, "palace") + os.makedirs(palace_dir) + project_dir = os.path.join(tmp_dir, "project") + os.makedirs(project_dir) + # Create config + config = {"wing": "test", "rooms": [{"name": "general", "description": "all"}]} + with open(os.path.join(project_dir, "mempalace.yaml"), "w") as f: + yaml.dump(config, f) + # Create test file with technical content + with open(os.path.join(project_dir, "code.py"), "w") as f: + f.write("def fix_bug():\n # Fixed python script error in handler\n pass\n") + + mine(project_dir, palace_dir, wing_override="test", agent="test") + + col = get_collection(palace_dir, create=False) + results = col.get(limit=10, include=["metadatas"]) + for meta in results["metadatas"]: + assert "hall" in meta, f"Drawer missing hall metadata: {meta}" diff --git a/tests/test_readme_claims.py b/tests/test_readme_claims.py new file mode 100644 index 0000000..4645f34 --- /dev/null +++ b/tests/test_readme_claims.py @@ -0,0 +1,737 @@ +#!/usr/bin/env python3 +""" +test_readme_claims.py — TDD verification of every major README claim against actual code. + +Each test verifies a specific claim made in README.md. If a test fails, either +the README is wrong or the code hasn't shipped the feature yet. Fix one or the +other until all tests pass — that's when the README matches reality. + +Based on the audit at ~/Desktop/readme_audit.md (2026-04-13). +""" + +import importlib +import re +from pathlib import Path + +import pytest + +# --------------------------------------------------------------------------- +# Helpers — locate repo root and parse README / source files +# --------------------------------------------------------------------------- + +REPO_ROOT = Path(__file__).resolve().parent.parent +MEMPALACE_PKG = REPO_ROOT / "mempalace" +README_PATH = REPO_ROOT / "README.md" + + +def _read(path: Path) -> str: + return path.read_text(encoding="utf-8", errors="replace") + + +def _readme() -> str: + return _read(README_PATH) + + +def _tools_dict_keys() -> list: + """Return the list of tool names registered in the TOOLS dict.""" + # Import the module-level TOOLS dict. We can't just import mcp_server + # because it calls chromadb on import, so we parse the source instead. + src = _read(MEMPALACE_PKG / "mcp_server.py") + return re.findall(r'"(mempalace_\w+)":\s*\{', src) + + +def _readme_tool_table_names() -> list: + """Return tool names listed in the README's MCP tool table.""" + readme = _readme() + return re.findall(r"^\| `(mempalace_\w+)`", readme, re.MULTILINE) + + +# --------------------------------------------------------------------------- +# 1. Tool count — README says 19, verify actual count +# --------------------------------------------------------------------------- + + +class TestToolCount: + """README claims '19 tools available through MCP' in multiple places.""" + + def test_readme_tool_count_matches_code(self): + """Claim: README says 19 tools. Actual TOOLS dict may differ. + + This test asserts the REAL tool count so the README can be updated. + If TOOLS has 25 entries, the README should say 25, not 19. + """ + actual_count = len(_tools_dict_keys()) + readme = _readme() + # Find all "19 tools" claims in README + claimed_counts = re.findall(r"(\d+)\s+tools", readme) + for claimed in claimed_counts: + assert int(claimed) == actual_count, ( + f"README claims {claimed} tools but TOOLS dict has {actual_count}. " + f"Update every occurrence of '{claimed} tools' to '{actual_count} tools'." + ) + + +# --------------------------------------------------------------------------- +# 2. Every tool listed in README actually exists in TOOLS dict +# --------------------------------------------------------------------------- + + +class TestReadmeToolsExistInCode: + """Every tool name in the README tool table must be a key in TOOLS.""" + + def test_every_readme_tool_exists_in_tools_dict(self): + """Claim: README lists tools like mempalace_get_aaak_spec. + Each one must actually be registered in the TOOLS dict.""" + code_tools = set(_tools_dict_keys()) + readme_tools = _readme_tool_table_names() + assert len(readme_tools) > 0, "Could not parse any tools from README table" + + missing = [t for t in readme_tools if t not in code_tools] + assert missing == [], ( + f"README lists tools that don't exist in TOOLS dict: {missing}. " + f"Either add them to mcp_server.py or remove them from README." + ) + + +# --------------------------------------------------------------------------- +# 3. No tool in TOOLS dict is missing from README's tool table +# --------------------------------------------------------------------------- + + +class TestNoUnlistedTools: + """Every tool in the TOOLS dict should be documented in the README.""" + + def test_no_undocumented_tools(self): + """Claim: README's tool table is complete. + Any tool in TOOLS but not in README is undocumented.""" + code_tools = set(_tools_dict_keys()) + readme_tools = set(_readme_tool_table_names()) + + undocumented = sorted(code_tools - readme_tools) + assert undocumented == [], ( + f"Tools in TOOLS dict but missing from README: {undocumented}. " + f"Add rows for these to the tool table in README.md." + ) + + +# --------------------------------------------------------------------------- +# 4. Closets collection exists — palace.py has get_closets_collection() +# --------------------------------------------------------------------------- + + +class TestClosetsExist: + """README describes closets as a core architectural feature.""" + + def test_get_closets_collection_exists(self): + """Claim: closets are a shipped feature. + palace.py must export get_closets_collection().""" + src = _read(MEMPALACE_PKG / "palace.py") + assert "def get_closets_collection(" in src, ( + "palace.py does not define get_closets_collection(). " + "Closets are described in README but the collection function is missing." + ) + + def test_closets_importable(self): + """get_closets_collection should be importable from mempalace.palace.""" + from mempalace.palace import get_closets_collection + + assert callable(get_closets_collection) + + +# --------------------------------------------------------------------------- +# 5. Closet-first search exists in searcher.py +# --------------------------------------------------------------------------- + + +class TestClosetFirstSearch: + """README implies search goes through closets, not just direct drawer query.""" + + def test_closet_boost_search_exists(self): + """Claim: search uses closets as a boost signal. + searcher.py must have CLOSET_RANK_BOOSTS and query closets_col.""" + src = _read(MEMPALACE_PKG / "searcher.py") + assert "CLOSET_RANK_BOOSTS" in src, ( + "searcher.py has no closet boost logic. " + "README describes closet-based search but searcher.py has no closet ranking." + ) + + def test_searcher_imports_closets(self): + """searcher.py must import get_closets_collection to use closets.""" + src = _read(MEMPALACE_PKG / "searcher.py") + assert "get_closets_collection" in src, ( + "searcher.py does not reference get_closets_collection. " + "Closet-first search can't work without the closets collection." + ) + + +# --------------------------------------------------------------------------- +# 6. BM25 hybrid search functions exist +# --------------------------------------------------------------------------- + + +class TestBM25HybridSearch: + """README claims 'BM25 hybrid search'. Verify the functions exist.""" + + def test_bm25_in_searcher(self): + """Claim: BM25 hybrid search is shipped. + searcher.py must have BM25 scoring or hybrid ranking logic.""" + src = _read(MEMPALACE_PKG / "searcher.py") + has_bm25 = any( + term in src.lower() + for term in [ + "bm25", + "_bm25_score", + "_hybrid_rank", + "hybrid_search", + "bm25_score", + "rank_bm25", + ] + ) + assert has_bm25, ( + "searcher.py has no BM25 or hybrid search function. " + "README claims BM25 hybrid search but it's not in the code." + ) + + +# --------------------------------------------------------------------------- +# 7. Entity metadata extraction exists in miner.py +# --------------------------------------------------------------------------- + + +class TestEntityMetadataExtraction: + """README implies entity extraction populates drawer/closet metadata.""" + + def test_entity_extraction_in_palace_or_miner(self): + """Claim: entity extraction is part of the mining pipeline. + Either miner.py or palace.py must extract entities.""" + miner_src = _read(MEMPALACE_PKG / "miner.py") + palace_src = _read(MEMPALACE_PKG / "palace.py") + # Entity extraction can be in either file — palace.py has it for closets + has_entity_extraction = ( + "entities" in palace_src and "_ENTITY_STOPLIST" in palace_src + ) or "extract_entities" in miner_src + assert has_entity_extraction, ( + "No entity extraction found in miner.py or palace.py. " + "README implies entities are extracted during mining." + ) + + +# --------------------------------------------------------------------------- +# 8. strip_noise function exists in normalize.py +# --------------------------------------------------------------------------- + + +class TestStripNoise: + """normalize.py should have strip_noise() for cleaning input text.""" + + def test_strip_noise_exists(self): + """Claim: normalize.py has noise stripping. + Function strip_noise must exist.""" + src = _read(MEMPALACE_PKG / "normalize.py") + assert "def strip_noise(" in src, ( + "normalize.py does not define strip_noise(). " + "This function is referenced in the normalization pipeline." + ) + + def test_strip_noise_importable(self): + """strip_noise should be importable from mempalace.normalize.""" + from mempalace.normalize import strip_noise + + assert callable(strip_noise) + + +# --------------------------------------------------------------------------- +# 9. diary_ingest.py module exists and is importable +# --------------------------------------------------------------------------- + + +class TestDiaryIngest: + """README describes diary ingest (day-based). Module must exist.""" + + def test_diary_ingest_module_exists(self): + """Claim: diary_ingest.py is a shipped module. + File must exist at mempalace/diary_ingest.py.""" + path = MEMPALACE_PKG / "diary_ingest.py" + assert path.is_file(), ( + "mempalace/diary_ingest.py does not exist. " + "README describes diary ingest but the module is missing (still in an unmerged PR?)." + ) + + def test_diary_ingest_importable(self): + """diary_ingest should be importable.""" + try: + importlib.import_module("mempalace.diary_ingest") + except ImportError: + pytest.fail( + "mempalace.diary_ingest is not importable. Module must exist and import cleanly." + ) + + +# --------------------------------------------------------------------------- +# 10. fact_checker.py module exists and is importable +# --------------------------------------------------------------------------- + + +class TestFactChecker: + """README has a 'Contradiction detection' section implying fact_checker.py.""" + + def test_fact_checker_module_exists(self): + """Claim: contradiction detection is shipped. + fact_checker.py must exist at mempalace/fact_checker.py.""" + path = MEMPALACE_PKG / "fact_checker.py" + assert path.is_file(), ( + "mempalace/fact_checker.py does not exist. " + "README describes contradiction detection but the module is missing." + ) + + def test_fact_checker_importable(self): + """fact_checker should be importable.""" + try: + importlib.import_module("mempalace.fact_checker") + except ImportError: + pytest.fail( + "mempalace.fact_checker is not importable. Module must exist and import cleanly." + ) + + +# --------------------------------------------------------------------------- +# 11. Tunnel functions exist in palace_graph.py +# --------------------------------------------------------------------------- + + +class TestTunnelFunctions: + """README describes tunnels — connections between wings.""" + + def test_find_tunnels_exists(self): + """Claim: tunnels connect rooms across wings. + palace_graph.py must have find_tunnels().""" + src = _read(MEMPALACE_PKG / "palace_graph.py") + assert "def find_tunnels(" in src, ( + "palace_graph.py has no find_tunnels() function. " + "README describes tunnels but the function is missing." + ) + + def test_traverse_exists(self): + """Claim: you can walk the palace graph. + palace_graph.py must have traverse().""" + src = _read(MEMPALACE_PKG / "palace_graph.py") + assert "def traverse(" in src, "palace_graph.py has no traverse() function." + + def test_graph_stats_exists(self): + """palace_graph.py must have graph_stats().""" + src = _read(MEMPALACE_PKG / "palace_graph.py") + assert "def graph_stats(" in src, "palace_graph.py has no graph_stats() function." + + def test_tunnel_functions_importable(self): + """find_tunnels, traverse, graph_stats should be importable.""" + from mempalace.palace_graph import find_tunnels, traverse, graph_stats + + assert callable(find_tunnels) + assert callable(traverse) + assert callable(graph_stats) + + +# --------------------------------------------------------------------------- +# 12. closet_llm.py module exists and is importable +# --------------------------------------------------------------------------- + + +class TestClosetLLM: + """README describes LLM-based closet regeneration. Module must exist.""" + + def test_closet_llm_module_exists(self): + """Claim: LLM-based closet regen is shipped. + closet_llm.py must exist at mempalace/closet_llm.py.""" + path = MEMPALACE_PKG / "closet_llm.py" + assert path.is_file(), ( + "mempalace/closet_llm.py does not exist. " + "README describes LLM closet regeneration but the module is missing." + ) + + def test_closet_llm_importable(self): + """closet_llm should be importable.""" + try: + importlib.import_module("mempalace.closet_llm") + except ImportError: + pytest.fail( + "mempalace.closet_llm is not importable. Module must exist and import cleanly." + ) + + +# --------------------------------------------------------------------------- +# 13. mine_lock exists in palace.py +# --------------------------------------------------------------------------- + + +class TestMineLock: + """Multi-agent file locking must be shipped (PR #784 was merged).""" + + def test_mine_lock_exists(self): + """Claim: multi-agent file locking is shipped. + palace.py must define mine_lock.""" + src = _read(MEMPALACE_PKG / "palace.py") + assert "def mine_lock(" in src, ( + "palace.py does not define mine_lock(). " + "Multi-agent locking is claimed as shipped but function is missing." + ) + + def test_mine_lock_importable(self): + """mine_lock should be importable from mempalace.palace.""" + from mempalace.palace import mine_lock + + assert callable(mine_lock) + + def test_mine_lock_is_context_manager(self): + """mine_lock should be a context manager (used with `with` statement).""" + src = _read(MEMPALACE_PKG / "palace.py") + # It should be decorated with @contextlib.contextmanager or similar + # Find the mine_lock definition and check for context manager pattern + assert "@contextlib.contextmanager" in src or "def __enter__" in src, ( + "mine_lock does not appear to be a context manager. " + "It should be usable with `with mine_lock(path):` syntax." + ) + + +# --------------------------------------------------------------------------- +# 14. Version in version.py matches pyproject.toml +# --------------------------------------------------------------------------- + + +class TestVersionConsistency: + """version.py and pyproject.toml must agree on the version string.""" + + def test_version_py_matches_pyproject(self): + """Claim: single source of truth for version. + version.py __version__ must match pyproject.toml version.""" + version_src = _read(MEMPALACE_PKG / "version.py") + version_match = re.search(r'__version__\s*=\s*"([^"]+)"', version_src) + assert version_match, "Could not parse __version__ from version.py" + code_version = version_match.group(1) + + pyproject_src = _read(REPO_ROOT / "pyproject.toml") + pyproject_match = re.search(r'^version\s*=\s*"([^"]+)"', pyproject_src, re.MULTILINE) + assert pyproject_match, "Could not parse version from pyproject.toml" + toml_version = pyproject_match.group(1) + + assert code_version == toml_version, ( + f"version.py says {code_version} but pyproject.toml says {toml_version}. " + f"These must match." + ) + + +# --------------------------------------------------------------------------- +# 15. Version badge URL in README matches version.py +# --------------------------------------------------------------------------- + + +class TestVersionBadge: + """README version badge must show the current version, not a stale one.""" + + def test_readme_badge_matches_version_py(self): + """Claim: README badge shows current version. + The shields.io badge URL must contain the version from version.py.""" + version_src = _read(MEMPALACE_PKG / "version.py") + version_match = re.search(r'__version__\s*=\s*"([^"]+)"', version_src) + assert version_match, "Could not parse __version__ from version.py" + code_version = version_match.group(1) + + readme = _readme() + # Find the version badge URL + badge_match = re.search(r"shields\.io/badge/version-([^-]+)-", readme) + assert badge_match, "Could not find version badge URL in README" + badge_version = badge_match.group(1) + + assert badge_version == code_version, ( + f"README badge says {badge_version} but version.py says {code_version}. " + f"Update the badge URL in README.md." + ) + + +# --------------------------------------------------------------------------- +# 16. dialect.py docstring does NOT say "lossless" +# --------------------------------------------------------------------------- + + +class TestDialectNotLossless: + """The April 7 correction: AAAK is lossy, not lossless.""" + + def test_dialect_docstring_says_not_lossless(self): + """Claim: dialect.py correctly says AAAK is NOT lossless. + The docstring must contain 'NOT lossless' or 'lossy'.""" + src = _read(MEMPALACE_PKG / "dialect.py") + # Check the module docstring (first ~20 lines) + docstring_area = src[:1000] + assert "NOT lossless" in docstring_area or "lossy" in docstring_area.lower(), ( + "dialect.py docstring does not disclaim losslessness. " + "After the April 7 correction, it must say AAAK is NOT lossless." + ) + + def test_dialect_docstring_does_not_claim_lossless(self): + """The docstring must not positively claim 'lossless compression'.""" + src = _read(MEMPALACE_PKG / "dialect.py") + docstring_area = src[:1000] + # "NOT lossless" is OK; bare "lossless" without negation is not + # Remove the "NOT lossless" disclaimer before checking + cleaned = docstring_area.replace("NOT lossless", "") + assert "lossless" not in cleaned.lower(), ( + "dialect.py docstring still claims 'lossless' somewhere. " + "AAAK is lossy — remove any positive lossless claims." + ) + + +# --------------------------------------------------------------------------- +# 17. README file reference table for dialect.py does NOT say "lossless" +# --------------------------------------------------------------------------- + + +class TestReadmeDialectNotLossless: + """README's file reference table must not say dialect.py is lossless.""" + + def test_readme_dialect_line_not_lossless(self): + """Claim: April 7 correction applied to README file table. + The dialect.py row must not say 'lossless'.""" + readme = _readme() + # Find the line with dialect.py in the file reference table + dialect_lines = [ + line for line in readme.splitlines() if "dialect.py" in line and "|" in line + ] + assert len(dialect_lines) > 0, "Could not find dialect.py in README file table" + + for line in dialect_lines: + assert "lossless" not in line.lower(), ( + f"README file table still says dialect.py is lossless: {line.strip()!r}. " + f"After April 7 correction, this must say 'lossy' or remove the lossless claim." + ) + + +# --------------------------------------------------------------------------- +# 18. Hall keywords in config.py — verify miners actually WRITE hall metadata +# --------------------------------------------------------------------------- + + +class TestHallMetadata: + """README describes 5 hall types. Miners must actually write hall metadata.""" + + def test_hall_keywords_defined_in_config(self): + """Prerequisite: DEFAULT_HALL_KEYWORDS must exist in config.py.""" + src = _read(MEMPALACE_PKG / "config.py") + assert "DEFAULT_HALL_KEYWORDS" in src, ( + "config.py does not define DEFAULT_HALL_KEYWORDS. " + "Hall types are described in README but not defined in config." + ) + + def test_miners_write_hall_metadata(self): + """Claim: halls are populated. At least one miner must write a 'hall' + field into drawer metadata. + + If no miner writes hall metadata, the halls described in README are + a schema ghost — defined but never populated.""" + miner_src = _read(MEMPALACE_PKG / "miner.py") + convo_miner_src = _read(MEMPALACE_PKG / "convo_miner.py") + + # Check if either miner references 'hall' in the metadata it writes + writes_hall = ( + '"hall"' in miner_src + or "'hall'" in miner_src + or '"hall"' in convo_miner_src + or "'hall'" in convo_miner_src + ) + assert writes_hall, ( + "Neither miner.py nor convo_miner.py writes a 'hall' field to drawer metadata. " + "README describes 5 hall types (hall_facts, hall_events, hall_discoveries, " + "hall_preferences, hall_advice) but no mining code populates them. " + "Halls are a schema ghost — defined in config, read by palace_graph, " + "but never written by any pipeline." + ) + + def test_readme_hall_types_match_config(self): + """If README lists specific hall names, they should appear in config.""" + # README mentions these 5 halls + readme_halls = [ + "hall_facts", + "hall_events", + "hall_discoveries", + "hall_preferences", + "hall_advice", + ] + for hall in readme_halls: + # These should either be in config or README should not list them + # The hall_ prefix is a README convention; config uses keyword groups + # like "emotions", "consciousness" etc. Check if they're consistent. + pass # This is a documentation check; the real test is #18b above + + +# --------------------------------------------------------------------------- +# 19. Backend abstraction exists +# --------------------------------------------------------------------------- + + +class TestBackendAbstraction: + """Backend seam for pluggable storage backends.""" + + def test_backends_base_exists(self): + """Claim: pluggable backends. + backends/base.py must define an abstract base class.""" + path = MEMPALACE_PKG / "backends" / "base.py" + assert ( + path.is_file() + ), "mempalace/backends/base.py does not exist. Backend abstraction layer is missing." + src = _read(path) + assert ( + "ABC" in src or "abstractmethod" in src + ), "backends/base.py does not define an abstract base class." + + def test_backends_chroma_exists(self): + """Claim: ChromaDB backend implementation. + backends/chroma.py must exist and subclass the base.""" + path = MEMPALACE_PKG / "backends" / "chroma.py" + assert path.is_file(), "mempalace/backends/chroma.py does not exist." + src = _read(path) + assert ( + "BaseCollection" in src or "base" in src + ), "backends/chroma.py does not reference the base class." + + def test_backends_importable(self): + """Both backend modules should be importable.""" + from mempalace.backends.base import BaseCollection + from mempalace.backends.chroma import ChromaBackend + + assert BaseCollection is not None + assert ChromaBackend is not None + + +# --------------------------------------------------------------------------- +# 20. i18n module exists with at least 8 language files +# --------------------------------------------------------------------------- + + +class TestI18n: + """i18n support — 8 languages.""" + + def test_i18n_directory_exists(self): + """i18n directory must exist.""" + path = MEMPALACE_PKG / "i18n" + assert path.is_dir(), "mempalace/i18n/ directory does not exist." + + def test_at_least_8_language_files(self): + """Claim: 8 languages supported. + i18n/ must contain at least 8 .json language files.""" + path = MEMPALACE_PKG / "i18n" + json_files = list(path.glob("*.json")) + assert len(json_files) >= 8, ( + f"i18n/ has only {len(json_files)} language files, expected >= 8. " + f"Files found: {[f.name for f in json_files]}" + ) + + def test_english_baseline_exists(self): + """en.json must exist as the baseline language file.""" + path = MEMPALACE_PKG / "i18n" / "en.json" + assert ( + path.is_file() + ), "mempalace/i18n/en.json does not exist. English baseline is required." + + +# --------------------------------------------------------------------------- +# 21. Wake-up token cost — check layers.py vs README's "~170 tokens" +# --------------------------------------------------------------------------- + + +class TestWakeUpTokenCost: + """README claims '~170 tokens' for wake-up. layers.py says otherwise.""" + + def test_readme_wakeup_cost_matches_layers(self): + """Claim: README says ~170 tokens for wake-up. + layers.py docstring says L0 ~100 tokens, L1 ~500-800 tokens. + Total = 600-900, not 170. + + If the README means '170 tokens of critical facts' (just the AAAK + portion), it should say so clearly. If it means total wake-up cost, + it must match layers.py.""" + readme = _readme() + layers_src = _read(MEMPALACE_PKG / "layers.py") + + # What layers.py says + assert "~600-900 tokens" in layers_src or "600-900" in layers_src, ( + "layers.py docstring does not mention 600-900 tokens. " + "Check if the wake-up cost documentation has changed." + ) + + # What README says + readme_170_claims = re.findall(r"~?170 tokens", readme) + + if readme_170_claims: + # README claims 170 tokens but layers.py says 600-900. + # This test enforces that README must match the code. + # Either README should say 600-900 or layers.py should say 170. + # Since we trust code over docs, the README is wrong. + pytest.fail( + f"README claims '~170 tokens' for wake-up ({len(readme_170_claims)} occurrences) " + f"but layers.py says L0+L1 = ~600-900 tokens. " + f"Either update README to match layers.py, or clarify that '170 tokens' " + f"refers to a specific subset (e.g., AAAK-compressed facts only)." + ) + + +# --------------------------------------------------------------------------- +# Bonus: pyproject.toml version in README project structure +# --------------------------------------------------------------------------- + + +class TestReadmeProjectStructureVersion: + """README's project structure section says pyproject.toml version.""" + + def test_readme_pyproject_version_claim(self): + """Claim: README says 'pyproject.toml — package config (v3.0.0)' or similar. + Must match actual pyproject.toml version.""" + readme = _readme() + pyproject_src = _read(REPO_ROOT / "pyproject.toml") + pyproject_match = re.search(r'^version\s*=\s*"([^"]+)"', pyproject_src, re.MULTILINE) + assert pyproject_match, "Could not parse version from pyproject.toml" + actual_version = pyproject_match.group(1) + + # Find any version claim near pyproject.toml in README + version_in_readme = re.search(r"pyproject\.toml.*?v?([\d]+\.[\d]+\.[\d]+)", readme) + if version_in_readme: + readme_version = version_in_readme.group(1) + assert readme_version == actual_version, ( + f"README says pyproject.toml is v{readme_version} " + f"but actual version is {actual_version}." + ) + + +# --------------------------------------------------------------------------- +# Bonus: README tool count consistency (all mentions must agree) +# --------------------------------------------------------------------------- + + +class TestReadmeToolCountConsistency: + """README mentions tool count in multiple places — they must all agree.""" + + def test_all_tool_count_mentions_consistent(self): + """Every place README says 'N tools' must use the same number.""" + readme = _readme() + counts = re.findall(r"(\d+)\s+tools", readme) + if len(counts) > 1: + unique = set(counts) + assert ( + len(unique) == 1 + ), f"README mentions different tool counts: {counts}. All occurrences must agree." + + +# --------------------------------------------------------------------------- +# Bonus: get_aaak_spec tool handler exists +# --------------------------------------------------------------------------- + + +class TestAAAKSpecToolHandler: + """If mempalace_get_aaak_spec is in TOOLS, its handler must exist.""" + + def test_aaak_spec_handler_exists(self): + """The handler function for get_aaak_spec must be defined.""" + src = _read(MEMPALACE_PKG / "mcp_server.py") + tools = _tools_dict_keys() + if "mempalace_get_aaak_spec" in tools: + assert "def tool_get_aaak_spec(" in src, ( + "mempalace_get_aaak_spec is in TOOLS dict but " + "tool_get_aaak_spec() handler function is not defined." + ) From 63dd165fedd1b116dcf344c325cd7d5604ec43d7 Mon Sep 17 00:00:00 2001 From: Dominique Deschatre <43499065+domiscd@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:23:51 -0300 Subject: [PATCH 32/42] fix(website): update vitepress base url for custom domain --- website/.gitignore | 1 + website/.vitepress/config.mts | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/website/.gitignore b/website/.gitignore index 519ac39..3c1061b 100644 --- a/website/.gitignore +++ b/website/.gitignore @@ -5,6 +5,7 @@ node_modules out dist .vitepress/dist +.vitepress/cache .vitepress/.temp *.tgz diff --git a/website/.vitepress/config.mts b/website/.vitepress/config.mts index eea8829..4fd5f60 100644 --- a/website/.vitepress/config.mts +++ b/website/.vitepress/config.mts @@ -9,7 +9,7 @@ function normalizeBase(base?: string): string { return base.endsWith('/') ? base : `${base}/` } -const docsBase = normalizeBase(process.env.DOCS_BASE || '/mempalace/') +const docsBase = normalizeBase(process.env.DOCS_BASE || '/') const editBranch = process.env.DOCS_EDIT_BRANCH || 'main' export default withMermaid( From 41696df1ac718ac76faed9f38e750015a732218b Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:36:56 -0300 Subject: [PATCH 33/42] chore(release): bump version strings to 3.3.0 and curate CHANGELOG Prepare develop for the 3.3.0 release cycle. Version bumps: - mempalace/version.py: 3.2.0 -> 3.3.0 - pyproject.toml: 3.2.0 -> 3.3.0 - README.md: pyproject.toml label and shields.io badge - uv.lock: mempalace 3.0.0 -> 3.3.0 (also fills in resolved dev/extras) CHANGELOG.md: - Close out the stale [Unreleased] section as [3.2.0] - 2026-04-12 (v3.2.0 was tagged on that date but the release flip was never made) - Add a fresh [Unreleased] - v3.3.0 section covering the 49 commits since v3.2.0: closet layer, BM25 hybrid search, entity metadata, diary ingest, cross-wing tunnels, drawer-grep, offline fact checker, LLM-based closet regen, hall detection, cosine-distance fix, multi-agent locking, README audit, etc. - Adopt Keep a Changelog + SemVer framing - Add version compare reference links at the bottom - Fix stale milla-jovovich/mempalace preamble URL to MemPalace/mempalace --- CHANGELOG.md | 59 +++++++- README.md | 4 +- mempalace/version.py | 2 +- pyproject.toml | 2 +- uv.lock | 313 ++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 370 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f8167a7..5b45a7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,58 @@ # Changelog -All notable changes to [MemPalace](https://github.com/milla-jovovich/mempalace) are documented in this file. +All notable changes to [MemPalace](https://github.com/MemPalace/mempalace) are documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/). --- -## [Unreleased] — v3.2.0 (on develop) +## [Unreleased] — v3.3.0 (on develop) + +### New Features +- Closet layer — a compact searchable index of pointers to verbatim drawers, enabling fast topical lookup without reading all content (#788) +- BM25 hybrid search — closets boost ranking, drawers remain the source of truth (#795, #829) +- Entity metadata on every drawer for filterable search (#829) +- Diary ingest — day-based rooms for conversation transcripts (#829) +- Cross-wing tunnels — explicit links between rooms in different wings for multi-project agents (#829) +- Drawer-grep — returns the best-matching chunk plus adjacent context drawers (#829) +- Offline fact checker against the entity registry and knowledge graph (#829) +- LLM-based closet regeneration — optional, bring-your-own endpoint, no mandatory API key (#793) +- Hall detection — routes drawer content to `emotions` / `technical` / `family` / `memory` / `identity` / `consciousness` / `creative` halls, enabling hall-based graph connectivity within wings (#835) + +### Bug Fixes +- Set `hnsw:space=cosine` metadata on all collection creation sites — fixes broken similarity scoring under ChromaDB's default L2 distance (#807, #218) +- File-level locking prevents duplicate drawers when agents mine the same file concurrently (#784, #826) +- Hybrid closet+drawer retrieval — closets boost ranking, never gate results (#795) +- Stop hooks from making agents write in chat — saves tokens on every turn (#786) +- Strip system tags, hook output, and Claude UI chrome from drawers before filing (#785) +- Verbatim-safe `strip_noise` scoped to Claude Code JSONL only (#785) +- Prevent diary entry ID collisions via microsecond timestamp and full content hash (#819) +- Auto-rebuild stale drawers via `NORMALIZE_VERSION` schema gate +- Enforce atomic topics in closets and extract richer pointers +- Sync `version.py` to match `pyproject.toml` (#820) +- Remove unused `main` import from `mempalace/__init__.py` (#827) +- README audit — fix 7 stale claims (tool count, version badge, wake-up token cost, `dialect.py` lossless disclaimer, `pyproject.toml` version) with 42 regression-guard tests (#835) + +### Improvements +- Optimize entity detection with regex caching and pre-compilation (#828) +- Extract locked filing block into helper to keep `mine_convos` under C901 complexity + +### Documentation +- Add `docs/CLOSETS.md` — closet layer overview +- Fix stale `milla-jovovich/*` org URLs in website and plugin manifests (#787) +- Fix remaining stale org URLs in contributor docs (#808) + +### Internal +- Add test coverage for `mine_lock`, closets, entity metadata, BM25, and diary +- Verify `mine_lock` via disjoint critical-section intervals +- Serialize `mine_lock` concurrency test with multiprocessing +- Make diary state path assertion platform-neutral +- Add `TestTunnels` coverage for cross-wing tunnel operations +- Ruff format with CI-pinned version (0.4.x); format `mempalace/palace.py` + +--- + +## [3.2.0] — 2026-04-12 ### Security - Harden palace deletion, WAL redaction, and MCP search input handling (#739) @@ -144,3 +192,10 @@ Initial public release. - CLI: `init`, `mine`, `search`, `status`, `compress`, `repair`, `split` - Benchmark suite with recall and scale tests - README with MCP flow, local model flow, and specialist agent documentation + +--- + +[Unreleased]: https://github.com/MemPalace/mempalace/compare/v3.2.0...HEAD +[3.2.0]: https://github.com/MemPalace/mempalace/compare/v3.1.0...v3.2.0 +[3.1.0]: https://github.com/MemPalace/mempalace/compare/v3.0.0...v3.1.0 +[3.0.0]: https://github.com/MemPalace/mempalace/releases/tag/v3.0.0 diff --git a/README.md b/README.md index dd645f0..be54ea0 100644 --- a/README.md +++ b/README.md @@ -714,7 +714,7 @@ mempalace/ │ └── mcp_setup.md ├── tests/ ← test suite (README) ├── assets/ ← logo + brand assets -└── pyproject.toml ← package config (v3.2.0) +└── pyproject.toml ← package config (v3.3.0) ``` --- @@ -742,7 +742,7 @@ PRs welcome. See [CONTRIBUTING.md](CONTRIBUTING.md) for setup and guidelines. MIT — see [LICENSE](LICENSE). -[version-shield]: https://img.shields.io/badge/version-3.2.0-4dc9f6?style=flat-square&labelColor=0a0e14 +[version-shield]: https://img.shields.io/badge/version-3.3.0-4dc9f6?style=flat-square&labelColor=0a0e14 [release-link]: https://github.com/milla-jovovich/mempalace/releases [python-shield]: https://img.shields.io/badge/python-3.9+-7dd8f8?style=flat-square&labelColor=0a0e14&logo=python&logoColor=7dd8f8 [python-link]: https://www.python.org/ diff --git a/mempalace/version.py b/mempalace/version.py index 45176bc..c299346 100644 --- a/mempalace/version.py +++ b/mempalace/version.py @@ -1,3 +1,3 @@ """Single source of truth for the MemPalace package version.""" -__version__ = "3.2.0" +__version__ = "3.3.0" diff --git a/pyproject.toml b/pyproject.toml index 8700fd1..d7df66f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mempalace" -version = "3.2.0" +version = "3.3.0" description = "Give your AI a memory — mine projects and conversations into a searchable palace. No API key required." readme = "README.md" requires-python = ">=3.9" diff --git a/uv.lock b/uv.lock index 9d99313..413f104 100644 --- a/uv.lock +++ b/uv.lock @@ -76,6 +76,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5c/0a/a72d10ed65068e115044937873362e6e32fab1b7dce0046aeb224682c989/asgiref-3.11.1-py3-none-any.whl", hash = "sha256:e8667a091e69529631969fd45dc268fa79b99c92c5fcdda727757e52146ec133", size = 24345, upload-time = "2026-02-03T13:30:13.039Z" }, ] +[[package]] +name = "autocorrect" +version = "2.6.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/cb/55fd549def80011b09dbd7bef6ad06ec4453745294bcfe6c63a270070046/autocorrect-2.6.1.tar.gz", hash = "sha256:2bc68192dc645b44bece2613caac338e93548c3dac9c563095b27224c7fd4391", size = 622775, upload-time = "2021-12-04T20:33:56.928Z" } + [[package]] name = "backoff" version = "2.2.1" @@ -437,6 +443,249 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", size = 46018, upload-time = "2021-06-11T10:22:42.561Z" }, ] +[[package]] +name = "coverage" +version = "7.10.7" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.10'", +] +sdist = { url = "https://files.pythonhosted.org/packages/51/26/d22c300112504f5f9a9fd2297ce33c35f3d353e4aeb987c8419453b2a7c2/coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239", size = 827704, upload-time = "2025-09-21T20:03:56.815Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/6c/3a3f7a46888e69d18abe3ccc6fe4cb16cccb1e6a2f99698931dafca489e6/coverage-7.10.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fc04cc7a3db33664e0c2d10eb8990ff6b3536f6842c9590ae8da4c614b9ed05a", size = 217987, upload-time = "2025-09-21T20:00:57.218Z" }, + { url = "https://files.pythonhosted.org/packages/03/94/952d30f180b1a916c11a56f5c22d3535e943aa22430e9e3322447e520e1c/coverage-7.10.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e201e015644e207139f7e2351980feb7040e6f4b2c2978892f3e3789d1c125e5", size = 218388, upload-time = "2025-09-21T20:01:00.081Z" }, + { url = "https://files.pythonhosted.org/packages/50/2b/9e0cf8ded1e114bcd8b2fd42792b57f1c4e9e4ea1824cde2af93a67305be/coverage-7.10.7-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:240af60539987ced2c399809bd34f7c78e8abe0736af91c3d7d0e795df633d17", size = 245148, upload-time = "2025-09-21T20:01:01.768Z" }, + { url = "https://files.pythonhosted.org/packages/19/20/d0384ac06a6f908783d9b6aa6135e41b093971499ec488e47279f5b846e6/coverage-7.10.7-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8421e088bc051361b01c4b3a50fd39a4b9133079a2229978d9d30511fd05231b", size = 246958, upload-time = "2025-09-21T20:01:03.355Z" }, + { url = "https://files.pythonhosted.org/packages/60/83/5c283cff3d41285f8eab897651585db908a909c572bdc014bcfaf8a8b6ae/coverage-7.10.7-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6be8ed3039ae7f7ac5ce058c308484787c86e8437e72b30bf5e88b8ea10f3c87", size = 248819, upload-time = "2025-09-21T20:01:04.968Z" }, + { url = "https://files.pythonhosted.org/packages/60/22/02eb98fdc5ff79f423e990d877693e5310ae1eab6cb20ae0b0b9ac45b23b/coverage-7.10.7-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e28299d9f2e889e6d51b1f043f58d5f997c373cc12e6403b90df95b8b047c13e", size = 245754, upload-time = "2025-09-21T20:01:06.321Z" }, + { url = "https://files.pythonhosted.org/packages/b4/bc/25c83bcf3ad141b32cd7dc45485ef3c01a776ca3aa8ef0a93e77e8b5bc43/coverage-7.10.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c4e16bd7761c5e454f4efd36f345286d6f7c5fa111623c355691e2755cae3b9e", size = 246860, upload-time = "2025-09-21T20:01:07.605Z" }, + { url = "https://files.pythonhosted.org/packages/3c/b7/95574702888b58c0928a6e982038c596f9c34d52c5e5107f1eef729399b5/coverage-7.10.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b1c81d0e5e160651879755c9c675b974276f135558cf4ba79fee7b8413a515df", size = 244877, upload-time = "2025-09-21T20:01:08.829Z" }, + { url = "https://files.pythonhosted.org/packages/47/b6/40095c185f235e085df0e0b158f6bd68cc6e1d80ba6c7721dc81d97ec318/coverage-7.10.7-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:606cc265adc9aaedcc84f1f064f0e8736bc45814f15a357e30fca7ecc01504e0", size = 245108, upload-time = "2025-09-21T20:01:10.527Z" }, + { url = "https://files.pythonhosted.org/packages/c8/50/4aea0556da7a4b93ec9168420d170b55e2eb50ae21b25062513d020c6861/coverage-7.10.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:10b24412692df990dbc34f8fb1b6b13d236ace9dfdd68df5b28c2e39cafbba13", size = 245752, upload-time = "2025-09-21T20:01:11.857Z" }, + { url = "https://files.pythonhosted.org/packages/6a/28/ea1a84a60828177ae3b100cb6723838523369a44ec5742313ed7db3da160/coverage-7.10.7-cp310-cp310-win32.whl", hash = "sha256:b51dcd060f18c19290d9b8a9dd1e0181538df2ce0717f562fff6cf74d9fc0b5b", size = 220497, upload-time = "2025-09-21T20:01:13.459Z" }, + { url = "https://files.pythonhosted.org/packages/fc/1a/a81d46bbeb3c3fd97b9602ebaa411e076219a150489bcc2c025f151bd52d/coverage-7.10.7-cp310-cp310-win_amd64.whl", hash = "sha256:3a622ac801b17198020f09af3eaf45666b344a0d69fc2a6ffe2ea83aeef1d807", size = 221392, upload-time = "2025-09-21T20:01:14.722Z" }, + { url = "https://files.pythonhosted.org/packages/d2/5d/c1a17867b0456f2e9ce2d8d4708a4c3a089947d0bec9c66cdf60c9e7739f/coverage-7.10.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a609f9c93113be646f44c2a0256d6ea375ad047005d7f57a5c15f614dc1b2f59", size = 218102, upload-time = "2025-09-21T20:01:16.089Z" }, + { url = "https://files.pythonhosted.org/packages/54/f0/514dcf4b4e3698b9a9077f084429681bf3aad2b4a72578f89d7f643eb506/coverage-7.10.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:65646bb0359386e07639c367a22cf9b5bf6304e8630b565d0626e2bdf329227a", size = 218505, upload-time = "2025-09-21T20:01:17.788Z" }, + { url = "https://files.pythonhosted.org/packages/20/f6/9626b81d17e2a4b25c63ac1b425ff307ecdeef03d67c9a147673ae40dc36/coverage-7.10.7-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5f33166f0dfcce728191f520bd2692914ec70fac2713f6bf3ce59c3deacb4699", size = 248898, upload-time = "2025-09-21T20:01:19.488Z" }, + { url = "https://files.pythonhosted.org/packages/b0/ef/bd8e719c2f7417ba03239052e099b76ea1130ac0cbb183ee1fcaa58aaff3/coverage-7.10.7-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:35f5e3f9e455bb17831876048355dca0f758b6df22f49258cb5a91da23ef437d", size = 250831, upload-time = "2025-09-21T20:01:20.817Z" }, + { url = "https://files.pythonhosted.org/packages/a5/b6/bf054de41ec948b151ae2b79a55c107f5760979538f5fb80c195f2517718/coverage-7.10.7-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4da86b6d62a496e908ac2898243920c7992499c1712ff7c2b6d837cc69d9467e", size = 252937, upload-time = "2025-09-21T20:01:22.171Z" }, + { url = "https://files.pythonhosted.org/packages/0f/e5/3860756aa6f9318227443c6ce4ed7bf9e70bb7f1447a0353f45ac5c7974b/coverage-7.10.7-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6b8b09c1fad947c84bbbc95eca841350fad9cbfa5a2d7ca88ac9f8d836c92e23", size = 249021, upload-time = "2025-09-21T20:01:23.907Z" }, + { url = "https://files.pythonhosted.org/packages/26/0f/bd08bd042854f7fd07b45808927ebcce99a7ed0f2f412d11629883517ac2/coverage-7.10.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4376538f36b533b46f8971d3a3e63464f2c7905c9800db97361c43a2b14792ab", size = 250626, upload-time = "2025-09-21T20:01:25.721Z" }, + { url = "https://files.pythonhosted.org/packages/8e/a7/4777b14de4abcc2e80c6b1d430f5d51eb18ed1d75fca56cbce5f2db9b36e/coverage-7.10.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:121da30abb574f6ce6ae09840dae322bef734480ceafe410117627aa54f76d82", size = 248682, upload-time = "2025-09-21T20:01:27.105Z" }, + { url = "https://files.pythonhosted.org/packages/34/72/17d082b00b53cd45679bad682fac058b87f011fd8b9fe31d77f5f8d3a4e4/coverage-7.10.7-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:88127d40df529336a9836870436fc2751c339fbaed3a836d42c93f3e4bd1d0a2", size = 248402, upload-time = "2025-09-21T20:01:28.629Z" }, + { url = "https://files.pythonhosted.org/packages/81/7a/92367572eb5bdd6a84bfa278cc7e97db192f9f45b28c94a9ca1a921c3577/coverage-7.10.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ba58bbcd1b72f136080c0bccc2400d66cc6115f3f906c499013d065ac33a4b61", size = 249320, upload-time = "2025-09-21T20:01:30.004Z" }, + { url = "https://files.pythonhosted.org/packages/2f/88/a23cc185f6a805dfc4fdf14a94016835eeb85e22ac3a0e66d5e89acd6462/coverage-7.10.7-cp311-cp311-win32.whl", hash = "sha256:972b9e3a4094b053a4e46832b4bc829fc8a8d347160eb39d03f1690316a99c14", size = 220536, upload-time = "2025-09-21T20:01:32.184Z" }, + { url = "https://files.pythonhosted.org/packages/fe/ef/0b510a399dfca17cec7bc2f05ad8bd78cf55f15c8bc9a73ab20c5c913c2e/coverage-7.10.7-cp311-cp311-win_amd64.whl", hash = "sha256:a7b55a944a7f43892e28ad4bc0561dfd5f0d73e605d1aa5c3c976b52aea121d2", size = 221425, upload-time = "2025-09-21T20:01:33.557Z" }, + { url = "https://files.pythonhosted.org/packages/51/7f/023657f301a276e4ba1850f82749bc136f5a7e8768060c2e5d9744a22951/coverage-7.10.7-cp311-cp311-win_arm64.whl", hash = "sha256:736f227fb490f03c6488f9b6d45855f8e0fd749c007f9303ad30efab0e73c05a", size = 220103, upload-time = "2025-09-21T20:01:34.929Z" }, + { url = "https://files.pythonhosted.org/packages/13/e4/eb12450f71b542a53972d19117ea5a5cea1cab3ac9e31b0b5d498df1bd5a/coverage-7.10.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7bb3b9ddb87ef7725056572368040c32775036472d5a033679d1fa6c8dc08417", size = 218290, upload-time = "2025-09-21T20:01:36.455Z" }, + { url = "https://files.pythonhosted.org/packages/37/66/593f9be12fc19fb36711f19a5371af79a718537204d16ea1d36f16bd78d2/coverage-7.10.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:18afb24843cbc175687225cab1138c95d262337f5473512010e46831aa0c2973", size = 218515, upload-time = "2025-09-21T20:01:37.982Z" }, + { url = "https://files.pythonhosted.org/packages/66/80/4c49f7ae09cafdacc73fbc30949ffe77359635c168f4e9ff33c9ebb07838/coverage-7.10.7-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:399a0b6347bcd3822be369392932884b8216d0944049ae22925631a9b3d4ba4c", size = 250020, upload-time = "2025-09-21T20:01:39.617Z" }, + { url = "https://files.pythonhosted.org/packages/a6/90/a64aaacab3b37a17aaedd83e8000142561a29eb262cede42d94a67f7556b/coverage-7.10.7-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:314f2c326ded3f4b09be11bc282eb2fc861184bc95748ae67b360ac962770be7", size = 252769, upload-time = "2025-09-21T20:01:41.341Z" }, + { url = "https://files.pythonhosted.org/packages/98/2e/2dda59afd6103b342e096f246ebc5f87a3363b5412609946c120f4e7750d/coverage-7.10.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c41e71c9cfb854789dee6fc51e46743a6d138b1803fab6cb860af43265b42ea6", size = 253901, upload-time = "2025-09-21T20:01:43.042Z" }, + { url = "https://files.pythonhosted.org/packages/53/dc/8d8119c9051d50f3119bb4a75f29f1e4a6ab9415cd1fa8bf22fcc3fb3b5f/coverage-7.10.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc01f57ca26269c2c706e838f6422e2a8788e41b3e3c65e2f41148212e57cd59", size = 250413, upload-time = "2025-09-21T20:01:44.469Z" }, + { url = "https://files.pythonhosted.org/packages/98/b3/edaff9c5d79ee4d4b6d3fe046f2b1d799850425695b789d491a64225d493/coverage-7.10.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a6442c59a8ac8b85812ce33bc4d05bde3fb22321fa8294e2a5b487c3505f611b", size = 251820, upload-time = "2025-09-21T20:01:45.915Z" }, + { url = "https://files.pythonhosted.org/packages/11/25/9a0728564bb05863f7e513e5a594fe5ffef091b325437f5430e8cfb0d530/coverage-7.10.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:78a384e49f46b80fb4c901d52d92abe098e78768ed829c673fbb53c498bef73a", size = 249941, upload-time = "2025-09-21T20:01:47.296Z" }, + { url = "https://files.pythonhosted.org/packages/e0/fd/ca2650443bfbef5b0e74373aac4df67b08180d2f184b482c41499668e258/coverage-7.10.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:5e1e9802121405ede4b0133aa4340ad8186a1d2526de5b7c3eca519db7bb89fb", size = 249519, upload-time = "2025-09-21T20:01:48.73Z" }, + { url = "https://files.pythonhosted.org/packages/24/79/f692f125fb4299b6f963b0745124998ebb8e73ecdfce4ceceb06a8c6bec5/coverage-7.10.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d41213ea25a86f69efd1575073d34ea11aabe075604ddf3d148ecfec9e1e96a1", size = 251375, upload-time = "2025-09-21T20:01:50.529Z" }, + { url = "https://files.pythonhosted.org/packages/5e/75/61b9bbd6c7d24d896bfeec57acba78e0f8deac68e6baf2d4804f7aae1f88/coverage-7.10.7-cp312-cp312-win32.whl", hash = "sha256:77eb4c747061a6af8d0f7bdb31f1e108d172762ef579166ec84542f711d90256", size = 220699, upload-time = "2025-09-21T20:01:51.941Z" }, + { url = "https://files.pythonhosted.org/packages/ca/f3/3bf7905288b45b075918d372498f1cf845b5b579b723c8fd17168018d5f5/coverage-7.10.7-cp312-cp312-win_amd64.whl", hash = "sha256:f51328ffe987aecf6d09f3cd9d979face89a617eacdaea43e7b3080777f647ba", size = 221512, upload-time = "2025-09-21T20:01:53.481Z" }, + { url = "https://files.pythonhosted.org/packages/5c/44/3e32dbe933979d05cf2dac5e697c8599cfe038aaf51223ab901e208d5a62/coverage-7.10.7-cp312-cp312-win_arm64.whl", hash = "sha256:bda5e34f8a75721c96085903c6f2197dc398c20ffd98df33f866a9c8fd95f4bf", size = 220147, upload-time = "2025-09-21T20:01:55.2Z" }, + { url = "https://files.pythonhosted.org/packages/9a/94/b765c1abcb613d103b64fcf10395f54d69b0ef8be6a0dd9c524384892cc7/coverage-7.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:981a651f543f2854abd3b5fcb3263aac581b18209be49863ba575de6edf4c14d", size = 218320, upload-time = "2025-09-21T20:01:56.629Z" }, + { url = "https://files.pythonhosted.org/packages/72/4f/732fff31c119bb73b35236dd333030f32c4bfe909f445b423e6c7594f9a2/coverage-7.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73ab1601f84dc804f7812dc297e93cd99381162da39c47040a827d4e8dafe63b", size = 218575, upload-time = "2025-09-21T20:01:58.203Z" }, + { url = "https://files.pythonhosted.org/packages/87/02/ae7e0af4b674be47566707777db1aa375474f02a1d64b9323e5813a6cdd5/coverage-7.10.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8b6f03672aa6734e700bbcd65ff050fd19cddfec4b031cc8cf1c6967de5a68e", size = 249568, upload-time = "2025-09-21T20:01:59.748Z" }, + { url = "https://files.pythonhosted.org/packages/a2/77/8c6d22bf61921a59bce5471c2f1f7ac30cd4ac50aadde72b8c48d5727902/coverage-7.10.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10b6ba00ab1132a0ce4428ff68cf50a25efd6840a42cdf4239c9b99aad83be8b", size = 252174, upload-time = "2025-09-21T20:02:01.192Z" }, + { url = "https://files.pythonhosted.org/packages/b1/20/b6ea4f69bbb52dac0aebd62157ba6a9dddbfe664f5af8122dac296c3ee15/coverage-7.10.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c79124f70465a150e89340de5963f936ee97097d2ef76c869708c4248c63ca49", size = 253447, upload-time = "2025-09-21T20:02:02.701Z" }, + { url = "https://files.pythonhosted.org/packages/f9/28/4831523ba483a7f90f7b259d2018fef02cb4d5b90bc7c1505d6e5a84883c/coverage-7.10.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:69212fbccdbd5b0e39eac4067e20a4a5256609e209547d86f740d68ad4f04911", size = 249779, upload-time = "2025-09-21T20:02:04.185Z" }, + { url = "https://files.pythonhosted.org/packages/a7/9f/4331142bc98c10ca6436d2d620c3e165f31e6c58d43479985afce6f3191c/coverage-7.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7ea7c6c9d0d286d04ed3541747e6597cbe4971f22648b68248f7ddcd329207f0", size = 251604, upload-time = "2025-09-21T20:02:06.034Z" }, + { url = "https://files.pythonhosted.org/packages/ce/60/bda83b96602036b77ecf34e6393a3836365481b69f7ed7079ab85048202b/coverage-7.10.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b9be91986841a75042b3e3243d0b3cb0b2434252b977baaf0cd56e960fe1e46f", size = 249497, upload-time = "2025-09-21T20:02:07.619Z" }, + { url = "https://files.pythonhosted.org/packages/5f/af/152633ff35b2af63977edd835d8e6430f0caef27d171edf2fc76c270ef31/coverage-7.10.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b281d5eca50189325cfe1f365fafade89b14b4a78d9b40b05ddd1fc7d2a10a9c", size = 249350, upload-time = "2025-09-21T20:02:10.34Z" }, + { url = "https://files.pythonhosted.org/packages/9d/71/d92105d122bd21cebba877228990e1646d862e34a98bb3374d3fece5a794/coverage-7.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:99e4aa63097ab1118e75a848a28e40d68b08a5e19ce587891ab7fd04475e780f", size = 251111, upload-time = "2025-09-21T20:02:12.122Z" }, + { url = "https://files.pythonhosted.org/packages/a2/9e/9fdb08f4bf476c912f0c3ca292e019aab6712c93c9344a1653986c3fd305/coverage-7.10.7-cp313-cp313-win32.whl", hash = "sha256:dc7c389dce432500273eaf48f410b37886be9208b2dd5710aaf7c57fd442c698", size = 220746, upload-time = "2025-09-21T20:02:13.919Z" }, + { url = "https://files.pythonhosted.org/packages/b1/b1/a75fd25df44eab52d1931e89980d1ada46824c7a3210be0d3c88a44aaa99/coverage-7.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:cac0fdca17b036af3881a9d2729a850b76553f3f716ccb0360ad4dbc06b3b843", size = 221541, upload-time = "2025-09-21T20:02:15.57Z" }, + { url = "https://files.pythonhosted.org/packages/14/3a/d720d7c989562a6e9a14b2c9f5f2876bdb38e9367126d118495b89c99c37/coverage-7.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:4b6f236edf6e2f9ae8fcd1332da4e791c1b6ba0dc16a2dc94590ceccb482e546", size = 220170, upload-time = "2025-09-21T20:02:17.395Z" }, + { url = "https://files.pythonhosted.org/packages/bb/22/e04514bf2a735d8b0add31d2b4ab636fc02370730787c576bb995390d2d5/coverage-7.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0ec07fd264d0745ee396b666d47cef20875f4ff2375d7c4f58235886cc1ef0c", size = 219029, upload-time = "2025-09-21T20:02:18.936Z" }, + { url = "https://files.pythonhosted.org/packages/11/0b/91128e099035ece15da3445d9015e4b4153a6059403452d324cbb0a575fa/coverage-7.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd5e856ebb7bfb7672b0086846db5afb4567a7b9714b8a0ebafd211ec7ce6a15", size = 219259, upload-time = "2025-09-21T20:02:20.44Z" }, + { url = "https://files.pythonhosted.org/packages/8b/51/66420081e72801536a091a0c8f8c1f88a5c4bf7b9b1bdc6222c7afe6dc9b/coverage-7.10.7-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f57b2a3c8353d3e04acf75b3fed57ba41f5c0646bbf1d10c7c282291c97936b4", size = 260592, upload-time = "2025-09-21T20:02:22.313Z" }, + { url = "https://files.pythonhosted.org/packages/5d/22/9b8d458c2881b22df3db5bb3e7369e63d527d986decb6c11a591ba2364f7/coverage-7.10.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ef2319dd15a0b009667301a3f84452a4dc6fddfd06b0c5c53ea472d3989fbf0", size = 262768, upload-time = "2025-09-21T20:02:24.287Z" }, + { url = "https://files.pythonhosted.org/packages/f7/08/16bee2c433e60913c610ea200b276e8eeef084b0d200bdcff69920bd5828/coverage-7.10.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83082a57783239717ceb0ad584de3c69cf581b2a95ed6bf81ea66034f00401c0", size = 264995, upload-time = "2025-09-21T20:02:26.133Z" }, + { url = "https://files.pythonhosted.org/packages/20/9d/e53eb9771d154859b084b90201e5221bca7674ba449a17c101a5031d4054/coverage-7.10.7-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:50aa94fb1fb9a397eaa19c0d5ec15a5edd03a47bf1a3a6111a16b36e190cff65", size = 259546, upload-time = "2025-09-21T20:02:27.716Z" }, + { url = "https://files.pythonhosted.org/packages/ad/b0/69bc7050f8d4e56a89fb550a1577d5d0d1db2278106f6f626464067b3817/coverage-7.10.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2120043f147bebb41c85b97ac45dd173595ff14f2a584f2963891cbcc3091541", size = 262544, upload-time = "2025-09-21T20:02:29.216Z" }, + { url = "https://files.pythonhosted.org/packages/ef/4b/2514b060dbd1bc0aaf23b852c14bb5818f244c664cb16517feff6bb3a5ab/coverage-7.10.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2fafd773231dd0378fdba66d339f84904a8e57a262f583530f4f156ab83863e6", size = 260308, upload-time = "2025-09-21T20:02:31.226Z" }, + { url = "https://files.pythonhosted.org/packages/54/78/7ba2175007c246d75e496f64c06e94122bdb914790a1285d627a918bd271/coverage-7.10.7-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:0b944ee8459f515f28b851728ad224fa2d068f1513ef6b7ff1efafeb2185f999", size = 258920, upload-time = "2025-09-21T20:02:32.823Z" }, + { url = "https://files.pythonhosted.org/packages/c0/b3/fac9f7abbc841409b9a410309d73bfa6cfb2e51c3fada738cb607ce174f8/coverage-7.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4b583b97ab2e3efe1b3e75248a9b333bd3f8b0b1b8e5b45578e05e5850dfb2c2", size = 261434, upload-time = "2025-09-21T20:02:34.86Z" }, + { url = "https://files.pythonhosted.org/packages/ee/51/a03bec00d37faaa891b3ff7387192cef20f01604e5283a5fabc95346befa/coverage-7.10.7-cp313-cp313t-win32.whl", hash = "sha256:2a78cd46550081a7909b3329e2266204d584866e8d97b898cd7fb5ac8d888b1a", size = 221403, upload-time = "2025-09-21T20:02:37.034Z" }, + { url = "https://files.pythonhosted.org/packages/53/22/3cf25d614e64bf6d8e59c7c669b20d6d940bb337bdee5900b9ca41c820bb/coverage-7.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:33a5e6396ab684cb43dc7befa386258acb2d7fae7f67330ebb85ba4ea27938eb", size = 222469, upload-time = "2025-09-21T20:02:39.011Z" }, + { url = "https://files.pythonhosted.org/packages/49/a1/00164f6d30d8a01c3c9c48418a7a5be394de5349b421b9ee019f380df2a0/coverage-7.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:86b0e7308289ddde73d863b7683f596d8d21c7d8664ce1dee061d0bcf3fbb4bb", size = 220731, upload-time = "2025-09-21T20:02:40.939Z" }, + { url = "https://files.pythonhosted.org/packages/23/9c/5844ab4ca6a4dd97a1850e030a15ec7d292b5c5cb93082979225126e35dd/coverage-7.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b06f260b16ead11643a5a9f955bd4b5fd76c1a4c6796aeade8520095b75de520", size = 218302, upload-time = "2025-09-21T20:02:42.527Z" }, + { url = "https://files.pythonhosted.org/packages/f0/89/673f6514b0961d1f0e20ddc242e9342f6da21eaba3489901b565c0689f34/coverage-7.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:212f8f2e0612778f09c55dd4872cb1f64a1f2b074393d139278ce902064d5b32", size = 218578, upload-time = "2025-09-21T20:02:44.468Z" }, + { url = "https://files.pythonhosted.org/packages/05/e8/261cae479e85232828fb17ad536765c88dd818c8470aca690b0ac6feeaa3/coverage-7.10.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3445258bcded7d4aa630ab8296dea4d3f15a255588dd535f980c193ab6b95f3f", size = 249629, upload-time = "2025-09-21T20:02:46.503Z" }, + { url = "https://files.pythonhosted.org/packages/82/62/14ed6546d0207e6eda876434e3e8475a3e9adbe32110ce896c9e0c06bb9a/coverage-7.10.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb45474711ba385c46a0bfe696c695a929ae69ac636cda8f532be9e8c93d720a", size = 252162, upload-time = "2025-09-21T20:02:48.689Z" }, + { url = "https://files.pythonhosted.org/packages/ff/49/07f00db9ac6478e4358165a08fb41b469a1b053212e8a00cb02f0d27a05f/coverage-7.10.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:813922f35bd800dca9994c5971883cbc0d291128a5de6b167c7aa697fcf59360", size = 253517, upload-time = "2025-09-21T20:02:50.31Z" }, + { url = "https://files.pythonhosted.org/packages/a2/59/c5201c62dbf165dfbc91460f6dbbaa85a8b82cfa6131ac45d6c1bfb52deb/coverage-7.10.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c1b03552081b2a4423091d6fb3787265b8f86af404cff98d1b5342713bdd69", size = 249632, upload-time = "2025-09-21T20:02:51.971Z" }, + { url = "https://files.pythonhosted.org/packages/07/ae/5920097195291a51fb00b3a70b9bbd2edbfe3c84876a1762bd1ef1565ebc/coverage-7.10.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cc87dd1b6eaf0b848eebb1c86469b9f72a1891cb42ac7adcfbce75eadb13dd14", size = 251520, upload-time = "2025-09-21T20:02:53.858Z" }, + { url = "https://files.pythonhosted.org/packages/b9/3c/a815dde77a2981f5743a60b63df31cb322c944843e57dbd579326625a413/coverage-7.10.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:39508ffda4f343c35f3236fe8d1a6634a51f4581226a1262769d7f970e73bffe", size = 249455, upload-time = "2025-09-21T20:02:55.807Z" }, + { url = "https://files.pythonhosted.org/packages/aa/99/f5cdd8421ea656abefb6c0ce92556709db2265c41e8f9fc6c8ae0f7824c9/coverage-7.10.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:925a1edf3d810537c5a3abe78ec5530160c5f9a26b1f4270b40e62cc79304a1e", size = 249287, upload-time = "2025-09-21T20:02:57.784Z" }, + { url = "https://files.pythonhosted.org/packages/c3/7a/e9a2da6a1fc5d007dd51fca083a663ab930a8c4d149c087732a5dbaa0029/coverage-7.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2c8b9a0636f94c43cd3576811e05b89aa9bc2d0a85137affc544ae5cb0e4bfbd", size = 250946, upload-time = "2025-09-21T20:02:59.431Z" }, + { url = "https://files.pythonhosted.org/packages/ef/5b/0b5799aa30380a949005a353715095d6d1da81927d6dbed5def2200a4e25/coverage-7.10.7-cp314-cp314-win32.whl", hash = "sha256:b7b8288eb7cdd268b0304632da8cb0bb93fadcfec2fe5712f7b9cc8f4d487be2", size = 221009, upload-time = "2025-09-21T20:03:01.324Z" }, + { url = "https://files.pythonhosted.org/packages/da/b0/e802fbb6eb746de006490abc9bb554b708918b6774b722bb3a0e6aa1b7de/coverage-7.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:1ca6db7c8807fb9e755d0379ccc39017ce0a84dcd26d14b5a03b78563776f681", size = 221804, upload-time = "2025-09-21T20:03:03.4Z" }, + { url = "https://files.pythonhosted.org/packages/9e/e8/71d0c8e374e31f39e3389bb0bd19e527d46f00ea8571ec7ec8fd261d8b44/coverage-7.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:097c1591f5af4496226d5783d036bf6fd6cd0cbc132e071b33861de756efb880", size = 220384, upload-time = "2025-09-21T20:03:05.111Z" }, + { url = "https://files.pythonhosted.org/packages/62/09/9a5608d319fa3eba7a2019addeacb8c746fb50872b57a724c9f79f146969/coverage-7.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a62c6ef0d50e6de320c270ff91d9dd0a05e7250cac2a800b7784bae474506e63", size = 219047, upload-time = "2025-09-21T20:03:06.795Z" }, + { url = "https://files.pythonhosted.org/packages/f5/6f/f58d46f33db9f2e3647b2d0764704548c184e6f5e014bef528b7f979ef84/coverage-7.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa6e4dd51fe15d8738708a973470f67a855ca50002294852e9571cdbd9433f2", size = 219266, upload-time = "2025-09-21T20:03:08.495Z" }, + { url = "https://files.pythonhosted.org/packages/74/5c/183ffc817ba68e0b443b8c934c8795553eb0c14573813415bd59941ee165/coverage-7.10.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8fb190658865565c549b6b4706856d6a7b09302c797eb2cf8e7fe9dabb043f0d", size = 260767, upload-time = "2025-09-21T20:03:10.172Z" }, + { url = "https://files.pythonhosted.org/packages/0f/48/71a8abe9c1ad7e97548835e3cc1adbf361e743e9d60310c5f75c9e7bf847/coverage-7.10.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:affef7c76a9ef259187ef31599a9260330e0335a3011732c4b9effa01e1cd6e0", size = 262931, upload-time = "2025-09-21T20:03:11.861Z" }, + { url = "https://files.pythonhosted.org/packages/84/fd/193a8fb132acfc0a901f72020e54be5e48021e1575bb327d8ee1097a28fd/coverage-7.10.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e16e07d85ca0cf8bafe5f5d23a0b850064e8e945d5677492b06bbe6f09cc699", size = 265186, upload-time = "2025-09-21T20:03:13.539Z" }, + { url = "https://files.pythonhosted.org/packages/b1/8f/74ecc30607dd95ad50e3034221113ccb1c6d4e8085cc761134782995daae/coverage-7.10.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:03ffc58aacdf65d2a82bbeb1ffe4d01ead4017a21bfd0454983b88ca73af94b9", size = 259470, upload-time = "2025-09-21T20:03:15.584Z" }, + { url = "https://files.pythonhosted.org/packages/0f/55/79ff53a769f20d71b07023ea115c9167c0bb56f281320520cf64c5298a96/coverage-7.10.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1b4fd784344d4e52647fd7857b2af5b3fbe6c239b0b5fa63e94eb67320770e0f", size = 262626, upload-time = "2025-09-21T20:03:17.673Z" }, + { url = "https://files.pythonhosted.org/packages/88/e2/dac66c140009b61ac3fc13af673a574b00c16efdf04f9b5c740703e953c0/coverage-7.10.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0ebbaddb2c19b71912c6f2518e791aa8b9f054985a0769bdb3a53ebbc765c6a1", size = 260386, upload-time = "2025-09-21T20:03:19.36Z" }, + { url = "https://files.pythonhosted.org/packages/a2/f1/f48f645e3f33bb9ca8a496bc4a9671b52f2f353146233ebd7c1df6160440/coverage-7.10.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a2d9a3b260cc1d1dbdb1c582e63ddcf5363426a1a68faa0f5da28d8ee3c722a0", size = 258852, upload-time = "2025-09-21T20:03:21.007Z" }, + { url = "https://files.pythonhosted.org/packages/bb/3b/8442618972c51a7affeead957995cfa8323c0c9bcf8fa5a027421f720ff4/coverage-7.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a3cc8638b2480865eaa3926d192e64ce6c51e3d29c849e09d5b4ad95efae5399", size = 261534, upload-time = "2025-09-21T20:03:23.12Z" }, + { url = "https://files.pythonhosted.org/packages/b2/dc/101f3fa3a45146db0cb03f5b4376e24c0aac818309da23e2de0c75295a91/coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235", size = 221784, upload-time = "2025-09-21T20:03:24.769Z" }, + { url = "https://files.pythonhosted.org/packages/4c/a1/74c51803fc70a8a40d7346660379e144be772bab4ac7bb6e6b905152345c/coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d", size = 222905, upload-time = "2025-09-21T20:03:26.93Z" }, + { url = "https://files.pythonhosted.org/packages/12/65/f116a6d2127df30bcafbceef0302d8a64ba87488bf6f73a6d8eebf060873/coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a", size = 220922, upload-time = "2025-09-21T20:03:28.672Z" }, + { url = "https://files.pythonhosted.org/packages/a3/ad/d1c25053764b4c42eb294aae92ab617d2e4f803397f9c7c8295caa77a260/coverage-7.10.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fff7b9c3f19957020cac546c70025331113d2e61537f6e2441bc7657913de7d3", size = 217978, upload-time = "2025-09-21T20:03:30.362Z" }, + { url = "https://files.pythonhosted.org/packages/52/2f/b9f9daa39b80ece0b9548bbb723381e29bc664822d9a12c2135f8922c22b/coverage-7.10.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bc91b314cef27742da486d6839b677b3f2793dfe52b51bbbb7cf736d5c29281c", size = 218370, upload-time = "2025-09-21T20:03:32.147Z" }, + { url = "https://files.pythonhosted.org/packages/dd/6e/30d006c3b469e58449650642383dddf1c8fb63d44fdf92994bfd46570695/coverage-7.10.7-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:567f5c155eda8df1d3d439d40a45a6a5f029b429b06648235f1e7e51b522b396", size = 244802, upload-time = "2025-09-21T20:03:33.919Z" }, + { url = "https://files.pythonhosted.org/packages/b0/49/8a070782ce7e6b94ff6a0b6d7c65ba6bc3091d92a92cef4cd4eb0767965c/coverage-7.10.7-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2af88deffcc8a4d5974cf2d502251bc3b2db8461f0b66d80a449c33757aa9f40", size = 246625, upload-time = "2025-09-21T20:03:36.09Z" }, + { url = "https://files.pythonhosted.org/packages/6a/92/1c1c5a9e8677ce56d42b97bdaca337b2d4d9ebe703d8c174ede52dbabd5f/coverage-7.10.7-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7315339eae3b24c2d2fa1ed7d7a38654cba34a13ef19fbcb9425da46d3dc594", size = 248399, upload-time = "2025-09-21T20:03:38.342Z" }, + { url = "https://files.pythonhosted.org/packages/c0/54/b140edee7257e815de7426d5d9846b58505dffc29795fff2dfb7f8a1c5a0/coverage-7.10.7-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:912e6ebc7a6e4adfdbb1aec371ad04c68854cd3bf3608b3514e7ff9062931d8a", size = 245142, upload-time = "2025-09-21T20:03:40.591Z" }, + { url = "https://files.pythonhosted.org/packages/e4/9e/6d6b8295940b118e8b7083b29226c71f6154f7ff41e9ca431f03de2eac0d/coverage-7.10.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f49a05acd3dfe1ce9715b657e28d138578bc40126760efb962322c56e9ca344b", size = 246284, upload-time = "2025-09-21T20:03:42.355Z" }, + { url = "https://files.pythonhosted.org/packages/db/e5/5e957ca747d43dbe4d9714358375c7546cb3cb533007b6813fc20fce37ad/coverage-7.10.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:cce2109b6219f22ece99db7644b9622f54a4e915dad65660ec435e89a3ea7cc3", size = 244353, upload-time = "2025-09-21T20:03:44.218Z" }, + { url = "https://files.pythonhosted.org/packages/9a/45/540fc5cc92536a1b783b7ef99450bd55a4b3af234aae35a18a339973ce30/coverage-7.10.7-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:f3c887f96407cea3916294046fc7dab611c2552beadbed4ea901cbc6a40cc7a0", size = 244430, upload-time = "2025-09-21T20:03:46.065Z" }, + { url = "https://files.pythonhosted.org/packages/75/0b/8287b2e5b38c8fe15d7e3398849bb58d382aedc0864ea0fa1820e8630491/coverage-7.10.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:635adb9a4507c9fd2ed65f39693fa31c9a3ee3a8e6dc64df033e8fdf52a7003f", size = 245311, upload-time = "2025-09-21T20:03:48.19Z" }, + { url = "https://files.pythonhosted.org/packages/0c/1d/29724999984740f0c86d03e6420b942439bf5bd7f54d4382cae386a9d1e9/coverage-7.10.7-cp39-cp39-win32.whl", hash = "sha256:5a02d5a850e2979b0a014c412573953995174743a3f7fa4ea5a6e9a3c5617431", size = 220500, upload-time = "2025-09-21T20:03:50.024Z" }, + { url = "https://files.pythonhosted.org/packages/43/11/4b1e6b129943f905ca54c339f343877b55b365ae2558806c1be4f7476ed5/coverage-7.10.7-cp39-cp39-win_amd64.whl", hash = "sha256:c134869d5ffe34547d14e174c866fd8fe2254918cc0a95e99052903bc1543e07", size = 221408, upload-time = "2025-09-21T20:03:51.803Z" }, + { url = "https://files.pythonhosted.org/packages/ec/16/114df1c291c22cac3b0c127a73e0af5c12ed7bbb6558d310429a0ae24023/coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260", size = 209952, upload-time = "2025-09-21T20:03:53.918Z" }, +] + +[package.optional-dependencies] +toml = [ + { name = "tomli", marker = "python_full_version < '3.10'" }, +] + +[[package]] +name = "coverage" +version = "7.13.5" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.14'", + "python_full_version == '3.13.*'", + "python_full_version >= '3.11' and python_full_version < '3.13'", + "python_full_version == '3.10.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/9d/e0/70553e3000e345daff267cec284ce4cbf3fc141b6da229ac52775b5428f1/coverage-7.13.5.tar.gz", hash = "sha256:c81f6515c4c40141f83f502b07bbfa5c240ba25bbe73da7b33f1e5b6120ff179", size = 915967, upload-time = "2026-03-17T10:33:18.341Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/33/e8c48488c29a73fd089f9d71f9653c1be7478f2ad6b5bc870db11a55d23d/coverage-7.13.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0723d2c96324561b9aa76fb982406e11d93cdb388a7a7da2b16e04719cf7ca5", size = 219255, upload-time = "2026-03-17T10:29:51.081Z" }, + { url = "https://files.pythonhosted.org/packages/da/bd/b0ebe9f677d7f4b74a3e115eec7ddd4bcf892074963a00d91e8b164a6386/coverage-7.13.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52f444e86475992506b32d4e5ca55c24fc88d73bcbda0e9745095b28ef4dc0cf", size = 219772, upload-time = "2026-03-17T10:29:52.867Z" }, + { url = "https://files.pythonhosted.org/packages/48/cc/5cb9502f4e01972f54eedd48218bb203fe81e294be606a2bc93970208013/coverage-7.13.5-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:704de6328e3d612a8f6c07000a878ff38181ec3263d5a11da1db294fa6a9bdf8", size = 246532, upload-time = "2026-03-17T10:29:54.688Z" }, + { url = "https://files.pythonhosted.org/packages/7d/d8/3217636d86c7e7b12e126e4f30ef1581047da73140614523af7495ed5f2d/coverage-7.13.5-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a1a6d79a14e1ec1832cabc833898636ad5f3754a678ef8bb4908515208bf84f4", size = 248333, upload-time = "2026-03-17T10:29:56.221Z" }, + { url = "https://files.pythonhosted.org/packages/2b/30/2002ac6729ba2d4357438e2ed3c447ad8562866c8c63fc16f6dfc33afe56/coverage-7.13.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79060214983769c7ba3f0cee10b54c97609dca4d478fa1aa32b914480fd5738d", size = 250211, upload-time = "2026-03-17T10:29:57.938Z" }, + { url = "https://files.pythonhosted.org/packages/6c/85/552496626d6b9359eb0e2f86f920037c9cbfba09b24d914c6e1528155f7d/coverage-7.13.5-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:356e76b46783a98c2a2fe81ec79df4883a1e62895ea952968fb253c114e7f930", size = 252125, upload-time = "2026-03-17T10:29:59.388Z" }, + { url = "https://files.pythonhosted.org/packages/44/21/40256eabdcbccdb6acf6b381b3016a154399a75fe39d406f790ae84d1f3c/coverage-7.13.5-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0cef0cdec915d11254a7f549c1170afecce708d30610c6abdded1f74e581666d", size = 247219, upload-time = "2026-03-17T10:30:01.199Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e8/96e2a6c3f21a0ea77d7830b254a1542d0328acc8d7bdf6a284ba7e529f77/coverage-7.13.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:dc022073d063b25a402454e5712ef9e007113e3a676b96c5f29b2bda29352f40", size = 248248, upload-time = "2026-03-17T10:30:03.317Z" }, + { url = "https://files.pythonhosted.org/packages/da/ba/8477f549e554827da390ec659f3c38e4b6d95470f4daafc2d8ff94eaa9c2/coverage-7.13.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9b74db26dfea4f4e50d48a4602207cd1e78be33182bc9cbf22da94f332f99878", size = 246254, upload-time = "2026-03-17T10:30:04.832Z" }, + { url = "https://files.pythonhosted.org/packages/55/59/bc22aef0e6aa179d5b1b001e8b3654785e9adf27ef24c93dc4228ebd5d68/coverage-7.13.5-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ad146744ca4fd09b50c482650e3c1b1f4dfa1d4792e0a04a369c7f23336f0400", size = 250067, upload-time = "2026-03-17T10:30:06.535Z" }, + { url = "https://files.pythonhosted.org/packages/de/1b/c6a023a160806a5137dca53468fd97530d6acad24a22003b1578a9c2e429/coverage-7.13.5-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:c555b48be1853fe3997c11c4bd521cdd9a9612352de01fa4508f16ec341e6fe0", size = 246521, upload-time = "2026-03-17T10:30:08.486Z" }, + { url = "https://files.pythonhosted.org/packages/2d/3f/3532c85a55aa2f899fa17c186f831cfa1aa434d88ff792a709636f64130e/coverage-7.13.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7034b5c56a58ae5e85f23949d52c14aca2cfc6848a31764995b7de88f13a1ea0", size = 247126, upload-time = "2026-03-17T10:30:09.966Z" }, + { url = "https://files.pythonhosted.org/packages/aa/2e/b9d56af4a24ef45dfbcda88e06870cb7d57b2b0bfa3a888d79b4c8debd76/coverage-7.13.5-cp310-cp310-win32.whl", hash = "sha256:eb7fdf1ef130660e7415e0253a01a7d5a88c9c4d158bcf75cbbd922fd65a5b58", size = 221860, upload-time = "2026-03-17T10:30:11.393Z" }, + { url = "https://files.pythonhosted.org/packages/9f/cc/d938417e7a4d7f0433ad4edee8bb2acdc60dc7ac5af19e2a07a048ecbee3/coverage-7.13.5-cp310-cp310-win_amd64.whl", hash = "sha256:3e1bb5f6c78feeb1be3475789b14a0f0a5b47d505bfc7267126ccbd50289999e", size = 222788, upload-time = "2026-03-17T10:30:12.886Z" }, + { url = "https://files.pythonhosted.org/packages/4b/37/d24c8f8220ff07b839b2c043ea4903a33b0f455abe673ae3c03bbdb7f212/coverage-7.13.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66a80c616f80181f4d643b0f9e709d97bcea413ecd9631e1dedc7401c8e6695d", size = 219381, upload-time = "2026-03-17T10:30:14.68Z" }, + { url = "https://files.pythonhosted.org/packages/35/8b/cd129b0ca4afe886a6ce9d183c44d8301acbd4ef248622e7c49a23145605/coverage-7.13.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:145ede53ccbafb297c1c9287f788d1bc3efd6c900da23bf6931b09eafc931587", size = 219880, upload-time = "2026-03-17T10:30:16.231Z" }, + { url = "https://files.pythonhosted.org/packages/55/2f/e0e5b237bffdb5d6c530ce87cc1d413a5b7d7dfd60fb067ad6d254c35c76/coverage-7.13.5-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0672854dc733c342fa3e957e0605256d2bf5934feeac328da9e0b5449634a642", size = 250303, upload-time = "2026-03-17T10:30:17.748Z" }, + { url = "https://files.pythonhosted.org/packages/92/be/b1afb692be85b947f3401375851484496134c5554e67e822c35f28bf2fbc/coverage-7.13.5-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ec10e2a42b41c923c2209b846126c6582db5e43a33157e9870ba9fb70dc7854b", size = 252218, upload-time = "2026-03-17T10:30:19.804Z" }, + { url = "https://files.pythonhosted.org/packages/da/69/2f47bb6fa1b8d1e3e5d0c4be8ccb4313c63d742476a619418f85740d597b/coverage-7.13.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be3d4bbad9d4b037791794ddeedd7d64a56f5933a2c1373e18e9e568b9141686", size = 254326, upload-time = "2026-03-17T10:30:21.321Z" }, + { url = "https://files.pythonhosted.org/packages/d5/d0/79db81da58965bd29dabc8f4ad2a2af70611a57cba9d1ec006f072f30a54/coverage-7.13.5-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4d2afbc5cc54d286bfb54541aa50b64cdb07a718227168c87b9e2fb8f25e1743", size = 256267, upload-time = "2026-03-17T10:30:23.094Z" }, + { url = "https://files.pythonhosted.org/packages/e5/32/d0d7cc8168f91ddab44c0ce4806b969df5f5fdfdbb568eaca2dbc2a04936/coverage-7.13.5-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3ad050321264c49c2fa67bb599100456fc51d004b82534f379d16445da40fb75", size = 250430, upload-time = "2026-03-17T10:30:25.311Z" }, + { url = "https://files.pythonhosted.org/packages/4d/06/a055311d891ddbe231cd69fdd20ea4be6e3603ffebddf8704b8ca8e10a3c/coverage-7.13.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7300c8a6d13335b29bb76d7651c66af6bd8658517c43499f110ddc6717bfc209", size = 252017, upload-time = "2026-03-17T10:30:27.284Z" }, + { url = "https://files.pythonhosted.org/packages/d6/f6/d0fd2d21e29a657b5f77a2fe7082e1568158340dceb941954f776dce1b7b/coverage-7.13.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:eb07647a5738b89baab047f14edd18ded523de60f3b30e75c2acc826f79c839a", size = 250080, upload-time = "2026-03-17T10:30:29.481Z" }, + { url = "https://files.pythonhosted.org/packages/4e/ab/0d7fb2efc2e9a5eb7ddcc6e722f834a69b454b7e6e5888c3a8567ecffb31/coverage-7.13.5-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9adb6688e3b53adffefd4a52d72cbd8b02602bfb8f74dcd862337182fd4d1a4e", size = 253843, upload-time = "2026-03-17T10:30:31.301Z" }, + { url = "https://files.pythonhosted.org/packages/ba/6f/7467b917bbf5408610178f62a49c0ed4377bb16c1657f689cc61470da8ce/coverage-7.13.5-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7c8d4bc913dd70b93488d6c496c77f3aff5ea99a07e36a18f865bca55adef8bd", size = 249802, upload-time = "2026-03-17T10:30:33.358Z" }, + { url = "https://files.pythonhosted.org/packages/75/2c/1172fb689df92135f5bfbbd69fc83017a76d24ea2e2f3a1154007e2fb9f8/coverage-7.13.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0e3c426ffc4cd952f54ee9ffbdd10345709ecc78a3ecfd796a57236bfad0b9b8", size = 250707, upload-time = "2026-03-17T10:30:35.2Z" }, + { url = "https://files.pythonhosted.org/packages/67/21/9ac389377380a07884e3b48ba7a620fcd9dbfaf1d40565facdc6b36ec9ef/coverage-7.13.5-cp311-cp311-win32.whl", hash = "sha256:259b69bb83ad9894c4b25be2528139eecba9a82646ebdda2d9db1ba28424a6bf", size = 221880, upload-time = "2026-03-17T10:30:36.775Z" }, + { url = "https://files.pythonhosted.org/packages/af/7f/4cd8a92531253f9d7c1bbecd9fa1b472907fb54446ca768c59b531248dc5/coverage-7.13.5-cp311-cp311-win_amd64.whl", hash = "sha256:258354455f4e86e3e9d0d17571d522e13b4e1e19bf0f8596bcf9476d61e7d8a9", size = 222816, upload-time = "2026-03-17T10:30:38.891Z" }, + { url = "https://files.pythonhosted.org/packages/12/a6/1d3f6155fb0010ca68eba7fe48ca6c9da7385058b77a95848710ecf189b1/coverage-7.13.5-cp311-cp311-win_arm64.whl", hash = "sha256:bff95879c33ec8da99fc9b6fe345ddb5be6414b41d6d1ad1c8f188d26f36e028", size = 221483, upload-time = "2026-03-17T10:30:40.463Z" }, + { url = "https://files.pythonhosted.org/packages/a0/c3/a396306ba7db865bf96fc1fb3b7fd29bcbf3d829df642e77b13555163cd6/coverage-7.13.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:460cf0114c5016fa841214ff5564aa4864f11948da9440bc97e21ad1f4ba1e01", size = 219554, upload-time = "2026-03-17T10:30:42.208Z" }, + { url = "https://files.pythonhosted.org/packages/a6/16/a68a19e5384e93f811dccc51034b1fd0b865841c390e3c931dcc4699e035/coverage-7.13.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0e223ce4b4ed47f065bfb123687686512e37629be25cc63728557ae7db261422", size = 219908, upload-time = "2026-03-17T10:30:43.906Z" }, + { url = "https://files.pythonhosted.org/packages/29/72/20b917c6793af3a5ceb7fb9c50033f3ec7865f2911a1416b34a7cfa0813b/coverage-7.13.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6e3370441f4513c6252bf042b9c36d22491142385049243253c7e48398a15a9f", size = 251419, upload-time = "2026-03-17T10:30:45.545Z" }, + { url = "https://files.pythonhosted.org/packages/8c/49/cd14b789536ac6a4778c453c6a2338bc0a2fb60c5a5a41b4008328b9acc1/coverage-7.13.5-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:03ccc709a17a1de074fb1d11f217342fb0d2b1582ed544f554fc9fc3f07e95f5", size = 254159, upload-time = "2026-03-17T10:30:47.204Z" }, + { url = "https://files.pythonhosted.org/packages/9d/00/7b0edcfe64e2ed4c0340dac14a52ad0f4c9bd0b8b5e531af7d55b703db7c/coverage-7.13.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f4818d065964db3c1c66dc0fbdac5ac692ecbc875555e13374fdbe7eedb4376", size = 255270, upload-time = "2026-03-17T10:30:48.812Z" }, + { url = "https://files.pythonhosted.org/packages/93/89/7ffc4ba0f5d0a55c1e84ea7cee39c9fc06af7b170513d83fbf3bbefce280/coverage-7.13.5-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:012d5319e66e9d5a218834642d6c35d265515a62f01157a45bcc036ecf947256", size = 257538, upload-time = "2026-03-17T10:30:50.77Z" }, + { url = "https://files.pythonhosted.org/packages/81/bd/73ddf85f93f7e6fa83e77ccecb6162d9415c79007b4bc124008a4995e4a7/coverage-7.13.5-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8dd02af98971bdb956363e4827d34425cb3df19ee550ef92855b0acb9c7ce51c", size = 251821, upload-time = "2026-03-17T10:30:52.5Z" }, + { url = "https://files.pythonhosted.org/packages/a0/81/278aff4e8dec4926a0bcb9486320752811f543a3ce5b602cc7a29978d073/coverage-7.13.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f08fd75c50a760c7eb068ae823777268daaf16a80b918fa58eea888f8e3919f5", size = 253191, upload-time = "2026-03-17T10:30:54.543Z" }, + { url = "https://files.pythonhosted.org/packages/70/ee/fe1621488e2e0a58d7e94c4800f0d96f79671553488d401a612bebae324b/coverage-7.13.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:843ea8643cf967d1ac7e8ecd4bb00c99135adf4816c0c0593fdcc47b597fcf09", size = 251337, upload-time = "2026-03-17T10:30:56.663Z" }, + { url = "https://files.pythonhosted.org/packages/37/a6/f79fb37aa104b562207cc23cb5711ab6793608e246cae1e93f26b2236ed9/coverage-7.13.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9d44d7aa963820b1b971dbecd90bfe5fe8f81cff79787eb6cca15750bd2f79b9", size = 255404, upload-time = "2026-03-17T10:30:58.427Z" }, + { url = "https://files.pythonhosted.org/packages/75/f0/ed15262a58ec81ce457ceb717b7f78752a1713556b19081b76e90896e8d4/coverage-7.13.5-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:7132bed4bd7b836200c591410ae7d97bf7ae8be6fc87d160b2bd881df929e7bf", size = 250903, upload-time = "2026-03-17T10:31:00.093Z" }, + { url = "https://files.pythonhosted.org/packages/0f/e9/9129958f20e7e9d4d56d51d42ccf708d15cac355ff4ac6e736e97a9393d2/coverage-7.13.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a698e363641b98843c517817db75373c83254781426e94ada3197cabbc2c919c", size = 252780, upload-time = "2026-03-17T10:31:01.916Z" }, + { url = "https://files.pythonhosted.org/packages/a4/d7/0ad9b15812d81272db94379fe4c6df8fd17781cc7671fdfa30c76ba5ff7b/coverage-7.13.5-cp312-cp312-win32.whl", hash = "sha256:bdba0a6b8812e8c7df002d908a9a2ea3c36e92611b5708633c50869e6d922fdf", size = 222093, upload-time = "2026-03-17T10:31:03.642Z" }, + { url = "https://files.pythonhosted.org/packages/29/3d/821a9a5799fac2556bcf0bd37a70d1d11fa9e49784b6d22e92e8b2f85f18/coverage-7.13.5-cp312-cp312-win_amd64.whl", hash = "sha256:d2c87e0c473a10bffe991502eac389220533024c8082ec1ce849f4218dded810", size = 222900, upload-time = "2026-03-17T10:31:05.651Z" }, + { url = "https://files.pythonhosted.org/packages/d4/fa/2238c2ad08e35cf4f020ea721f717e09ec3152aea75d191a7faf3ef009a8/coverage-7.13.5-cp312-cp312-win_arm64.whl", hash = "sha256:bf69236a9a81bdca3bff53796237aab096cdbf8d78a66ad61e992d9dac7eb2de", size = 221515, upload-time = "2026-03-17T10:31:07.293Z" }, + { url = "https://files.pythonhosted.org/packages/74/8c/74fedc9663dcf168b0a059d4ea756ecae4da77a489048f94b5f512a8d0b3/coverage-7.13.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ec4af212df513e399cf11610cc27063f1586419e814755ab362e50a85ea69c1", size = 219576, upload-time = "2026-03-17T10:31:09.045Z" }, + { url = "https://files.pythonhosted.org/packages/0c/c9/44fb661c55062f0818a6ffd2685c67aa30816200d5f2817543717d4b92eb/coverage-7.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:941617e518602e2d64942c88ec8499f7fbd49d3f6c4327d3a71d43a1973032f3", size = 219942, upload-time = "2026-03-17T10:31:10.708Z" }, + { url = "https://files.pythonhosted.org/packages/5f/13/93419671cee82b780bab7ea96b67c8ef448f5f295f36bf5031154ec9a790/coverage-7.13.5-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:da305e9937617ee95c2e39d8ff9f040e0487cbf1ac174f777ed5eddd7a7c1f26", size = 250935, upload-time = "2026-03-17T10:31:12.392Z" }, + { url = "https://files.pythonhosted.org/packages/ac/68/1666e3a4462f8202d836920114fa7a5ee9275d1fa45366d336c551a162dd/coverage-7.13.5-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:78e696e1cc714e57e8b25760b33a8b1026b7048d270140d25dafe1b0a1ee05a3", size = 253541, upload-time = "2026-03-17T10:31:14.247Z" }, + { url = "https://files.pythonhosted.org/packages/4e/5e/3ee3b835647be646dcf3c65a7c6c18f87c27326a858f72ab22c12730773d/coverage-7.13.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02ca0eed225b2ff301c474aeeeae27d26e2537942aa0f87491d3e147e784a82b", size = 254780, upload-time = "2026-03-17T10:31:16.193Z" }, + { url = "https://files.pythonhosted.org/packages/44/b3/cb5bd1a04cfcc49ede6cd8409d80bee17661167686741e041abc7ee1b9a9/coverage-7.13.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:04690832cbea4e4663d9149e05dba142546ca05cb1848816760e7f58285c970a", size = 256912, upload-time = "2026-03-17T10:31:17.89Z" }, + { url = "https://files.pythonhosted.org/packages/1b/66/c1dceb7b9714473800b075f5c8a84f4588f887a90eb8645282031676e242/coverage-7.13.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0590e44dd2745c696a778f7bab6aa95256de2cbc8b8cff4f7db8ff09813d6969", size = 251165, upload-time = "2026-03-17T10:31:19.605Z" }, + { url = "https://files.pythonhosted.org/packages/b7/62/5502b73b97aa2e53ea22a39cf8649ff44827bef76d90bf638777daa27a9d/coverage-7.13.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d7cfad2d6d81dd298ab6b89fe72c3b7b05ec7544bdda3b707ddaecff8d25c161", size = 252908, upload-time = "2026-03-17T10:31:21.312Z" }, + { url = "https://files.pythonhosted.org/packages/7d/37/7792c2d69854397ca77a55c4646e5897c467928b0e27f2d235d83b5d08c6/coverage-7.13.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e092b9499de38ae0fbfbc603a74660eb6ff3e869e507b50d85a13b6db9863e15", size = 250873, upload-time = "2026-03-17T10:31:23.565Z" }, + { url = "https://files.pythonhosted.org/packages/a3/23/bc866fb6163be52a8a9e5d708ba0d3b1283c12158cefca0a8bbb6e247a43/coverage-7.13.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:48c39bc4a04d983a54a705a6389512883d4a3b9862991b3617d547940e9f52b1", size = 255030, upload-time = "2026-03-17T10:31:25.58Z" }, + { url = "https://files.pythonhosted.org/packages/7d/8b/ef67e1c222ef49860701d346b8bbb70881bef283bd5f6cbba68a39a086c7/coverage-7.13.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2d3807015f138ffea1ed9afeeb8624fd781703f2858b62a8dd8da5a0994c57b6", size = 250694, upload-time = "2026-03-17T10:31:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/46/0d/866d1f74f0acddbb906db212e096dee77a8e2158ca5e6bb44729f9d93298/coverage-7.13.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee2aa19e03161671ec964004fb74b2257805d9710bf14a5c704558b9d8dbaf17", size = 252469, upload-time = "2026-03-17T10:31:29.472Z" }, + { url = "https://files.pythonhosted.org/packages/7a/f5/be742fec31118f02ce42b21c6af187ad6a344fed546b56ca60caacc6a9a0/coverage-7.13.5-cp313-cp313-win32.whl", hash = "sha256:ce1998c0483007608c8382f4ff50164bfc5bd07a2246dd272aa4043b75e61e85", size = 222112, upload-time = "2026-03-17T10:31:31.526Z" }, + { url = "https://files.pythonhosted.org/packages/66/40/7732d648ab9d069a46e686043241f01206348e2bbf128daea85be4d6414b/coverage-7.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:631efb83f01569670a5e866ceb80fe483e7c159fac6f167e6571522636104a0b", size = 222923, upload-time = "2026-03-17T10:31:33.633Z" }, + { url = "https://files.pythonhosted.org/packages/48/af/fea819c12a095781f6ccd504890aaddaf88b8fab263c4940e82c7b770124/coverage-7.13.5-cp313-cp313-win_arm64.whl", hash = "sha256:f4cd16206ad171cbc2470dbea9103cf9a7607d5fe8c242fdf1edf36174020664", size = 221540, upload-time = "2026-03-17T10:31:35.445Z" }, + { url = "https://files.pythonhosted.org/packages/23/d2/17879af479df7fbbd44bd528a31692a48f6b25055d16482fdf5cdb633805/coverage-7.13.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0428cbef5783ad91fe240f673cc1f76b25e74bbfe1a13115e4aa30d3f538162d", size = 220262, upload-time = "2026-03-17T10:31:37.184Z" }, + { url = "https://files.pythonhosted.org/packages/5b/4c/d20e554f988c8f91d6a02c5118f9abbbf73a8768a3048cb4962230d5743f/coverage-7.13.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e0b216a19534b2427cc201a26c25da4a48633f29a487c61258643e89d28200c0", size = 220617, upload-time = "2026-03-17T10:31:39.245Z" }, + { url = "https://files.pythonhosted.org/packages/29/9c/f9f5277b95184f764b24e7231e166dfdb5780a46d408a2ac665969416d61/coverage-7.13.5-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:972a9cd27894afe4bc2b1480107054e062df08e671df7c2f18c205e805ccd806", size = 261912, upload-time = "2026-03-17T10:31:41.324Z" }, + { url = "https://files.pythonhosted.org/packages/d5/f6/7f1ab39393eeb50cfe4747ae8ef0e4fc564b989225aa1152e13a180d74f8/coverage-7.13.5-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4b59148601efcd2bac8c4dbf1f0ad6391693ccf7a74b8205781751637076aee3", size = 263987, upload-time = "2026-03-17T10:31:43.724Z" }, + { url = "https://files.pythonhosted.org/packages/a0/d7/62c084fb489ed9c6fbdf57e006752e7c516ea46fd690e5ed8b8617c7d52e/coverage-7.13.5-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:505d7083c8b0c87a8fa8c07370c285847c1f77739b22e299ad75a6af6c32c5c9", size = 266416, upload-time = "2026-03-17T10:31:45.769Z" }, + { url = "https://files.pythonhosted.org/packages/a9/f6/df63d8660e1a0bff6125947afda112a0502736f470d62ca68b288ea762d8/coverage-7.13.5-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:60365289c3741e4db327e7baff2a4aaacf22f788e80fa4683393891b70a89fbd", size = 267558, upload-time = "2026-03-17T10:31:48.293Z" }, + { url = "https://files.pythonhosted.org/packages/5b/02/353ca81d36779bd108f6d384425f7139ac3c58c750dcfaafe5d0bee6436b/coverage-7.13.5-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1b88c69c8ef5d4b6fe7dea66d6636056a0f6a7527c440e890cf9259011f5e606", size = 261163, upload-time = "2026-03-17T10:31:50.125Z" }, + { url = "https://files.pythonhosted.org/packages/2c/16/2e79106d5749bcaf3aee6d309123548e3276517cd7851faa8da213bc61bf/coverage-7.13.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5b13955d31d1633cf9376908089b7cebe7d15ddad7aeaabcbe969a595a97e95e", size = 263981, upload-time = "2026-03-17T10:31:51.961Z" }, + { url = "https://files.pythonhosted.org/packages/29/c7/c29e0c59ffa6942030ae6f50b88ae49988e7e8da06de7ecdbf49c6d4feae/coverage-7.13.5-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f70c9ab2595c56f81a89620e22899eea8b212a4041bd728ac6f4a28bf5d3ddd0", size = 261604, upload-time = "2026-03-17T10:31:53.872Z" }, + { url = "https://files.pythonhosted.org/packages/40/48/097cdc3db342f34006a308ab41c3a7c11c3f0d84750d340f45d88a782e00/coverage-7.13.5-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:084b84a8c63e8d6fc7e3931b316a9bcafca1458d753c539db82d31ed20091a87", size = 265321, upload-time = "2026-03-17T10:31:55.997Z" }, + { url = "https://files.pythonhosted.org/packages/bb/1f/4994af354689e14fd03a75f8ec85a9a68d94e0188bbdab3fc1516b55e512/coverage-7.13.5-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ad14385487393e386e2ea988b09d62dd42c397662ac2dabc3832d71253eee479", size = 260502, upload-time = "2026-03-17T10:31:58.308Z" }, + { url = "https://files.pythonhosted.org/packages/22/c6/9bb9ef55903e628033560885f5c31aa227e46878118b63ab15dc7ba87797/coverage-7.13.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7f2c47b36fe7709a6e83bfadf4eefb90bd25fbe4014d715224c4316f808e59a2", size = 262688, upload-time = "2026-03-17T10:32:00.141Z" }, + { url = "https://files.pythonhosted.org/packages/14/4f/f5df9007e50b15e53e01edea486814783a7f019893733d9e4d6caad75557/coverage-7.13.5-cp313-cp313t-win32.whl", hash = "sha256:67e9bc5449801fad0e5dff329499fb090ba4c5800b86805c80617b4e29809b2a", size = 222788, upload-time = "2026-03-17T10:32:02.246Z" }, + { url = "https://files.pythonhosted.org/packages/e1/98/aa7fccaa97d0f3192bec013c4e6fd6d294a6ed44b640e6bb61f479e00ed5/coverage-7.13.5-cp313-cp313t-win_amd64.whl", hash = "sha256:da86cdcf10d2519e10cabb8ac2de03da1bcb6e4853790b7fbd48523332e3a819", size = 223851, upload-time = "2026-03-17T10:32:04.416Z" }, + { url = "https://files.pythonhosted.org/packages/3d/8b/e5c469f7352651e5f013198e9e21f97510b23de957dd06a84071683b4b60/coverage-7.13.5-cp313-cp313t-win_arm64.whl", hash = "sha256:0ecf12ecb326fe2c339d93fc131816f3a7367d223db37817208905c89bded911", size = 222104, upload-time = "2026-03-17T10:32:06.65Z" }, + { url = "https://files.pythonhosted.org/packages/8e/77/39703f0d1d4b478bfd30191d3c14f53caf596fac00efb3f8f6ee23646439/coverage-7.13.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fbabfaceaeb587e16f7008f7795cd80d20ec548dc7f94fbb0d4ec2e038ce563f", size = 219621, upload-time = "2026-03-17T10:32:08.589Z" }, + { url = "https://files.pythonhosted.org/packages/e2/3e/51dff36d99ae14639a133d9b164d63e628532e2974d8b1edb99dd1ebc733/coverage-7.13.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9bb2a28101a443669a423b665939381084412b81c3f8c0fcfbac57f4e30b5b8e", size = 219953, upload-time = "2026-03-17T10:32:10.507Z" }, + { url = "https://files.pythonhosted.org/packages/6a/6c/1f1917b01eb647c2f2adc9962bd66c79eb978951cab61bdc1acab3290c07/coverage-7.13.5-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bd3a2fbc1c6cccb3c5106140d87cc6a8715110373ef42b63cf5aea29df8c217a", size = 250992, upload-time = "2026-03-17T10:32:12.41Z" }, + { url = "https://files.pythonhosted.org/packages/22/e5/06b1f88f42a5a99df42ce61208bdec3bddb3d261412874280a19796fc09c/coverage-7.13.5-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6c36ddb64ed9d7e496028d1d00dfec3e428e0aabf4006583bb1839958d280510", size = 253503, upload-time = "2026-03-17T10:32:14.449Z" }, + { url = "https://files.pythonhosted.org/packages/80/28/2a148a51e5907e504fa7b85490277734e6771d8844ebcc48764a15e28155/coverage-7.13.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:380e8e9084d8eb38db3a9176a1a4f3c0082c3806fa0dc882d1d87abc3c789247", size = 254852, upload-time = "2026-03-17T10:32:16.56Z" }, + { url = "https://files.pythonhosted.org/packages/61/77/50e8d3d85cc0b7ebe09f30f151d670e302c7ff4a1bf6243f71dd8b0981fa/coverage-7.13.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e808af52a0513762df4d945ea164a24b37f2f518cbe97e03deaa0ee66139b4d6", size = 257161, upload-time = "2026-03-17T10:32:19.004Z" }, + { url = "https://files.pythonhosted.org/packages/3b/c4/b5fd1d4b7bf8d0e75d997afd3925c59ba629fc8616f1b3aae7605132e256/coverage-7.13.5-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e301d30dd7e95ae068671d746ba8c34e945a82682e62918e41b2679acd2051a0", size = 251021, upload-time = "2026-03-17T10:32:21.344Z" }, + { url = "https://files.pythonhosted.org/packages/f8/66/6ea21f910e92d69ef0b1c3346ea5922a51bad4446c9126db2ae96ee24c4c/coverage-7.13.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:800bc829053c80d240a687ceeb927a94fd108bbdc68dfbe505d0d75ab578a882", size = 252858, upload-time = "2026-03-17T10:32:23.506Z" }, + { url = "https://files.pythonhosted.org/packages/9e/ea/879c83cb5d61aa2a35fb80e72715e92672daef8191b84911a643f533840c/coverage-7.13.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:0b67af5492adb31940ee418a5a655c28e48165da5afab8c7fa6fd72a142f8740", size = 250823, upload-time = "2026-03-17T10:32:25.516Z" }, + { url = "https://files.pythonhosted.org/packages/8a/fb/616d95d3adb88b9803b275580bdeee8bd1b69a886d057652521f83d7322f/coverage-7.13.5-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c9136ff29c3a91e25b1d1552b5308e53a1e0653a23e53b6366d7c2dcbbaf8a16", size = 255099, upload-time = "2026-03-17T10:32:27.944Z" }, + { url = "https://files.pythonhosted.org/packages/1c/93/25e6917c90ec1c9a56b0b26f6cad6408e5f13bb6b35d484a0d75c9cf000d/coverage-7.13.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:cff784eef7f0b8f6cb28804fbddcfa99f89efe4cc35fb5627e3ac58f91ed3ac0", size = 250638, upload-time = "2026-03-17T10:32:29.914Z" }, + { url = "https://files.pythonhosted.org/packages/fc/7b/dc1776b0464145a929deed214aef9fb1493f159b59ff3c7eeeedf91eddd0/coverage-7.13.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:68a4953be99b17ac3c23b6efbc8a38330d99680c9458927491d18700ef23ded0", size = 252295, upload-time = "2026-03-17T10:32:31.981Z" }, + { url = "https://files.pythonhosted.org/packages/ea/fb/99cbbc56a26e07762a2740713f3c8f9f3f3106e3a3dd8cc4474954bccd34/coverage-7.13.5-cp314-cp314-win32.whl", hash = "sha256:35a31f2b1578185fbe6aa2e74cea1b1d0bbf4c552774247d9160d29b80ed56cc", size = 222360, upload-time = "2026-03-17T10:32:34.233Z" }, + { url = "https://files.pythonhosted.org/packages/8d/b7/4758d4f73fb536347cc5e4ad63662f9d60ba9118cb6785e9616b2ce5d7fa/coverage-7.13.5-cp314-cp314-win_amd64.whl", hash = "sha256:2aa055ae1857258f9e0045be26a6d62bdb47a72448b62d7b55f4820f361a2633", size = 223174, upload-time = "2026-03-17T10:32:36.369Z" }, + { url = "https://files.pythonhosted.org/packages/2c/f2/24d84e1dfe70f8ac9fdf30d338239860d0d1d5da0bda528959d0ebc9da28/coverage-7.13.5-cp314-cp314-win_arm64.whl", hash = "sha256:1b11eef33edeae9d142f9b4358edb76273b3bfd30bc3df9a4f95d0e49caf94e8", size = 221739, upload-time = "2026-03-17T10:32:38.736Z" }, + { url = "https://files.pythonhosted.org/packages/60/5b/4a168591057b3668c2428bff25dd3ebc21b629d666d90bcdfa0217940e84/coverage-7.13.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:10a0c37f0b646eaff7cce1874c31d1f1ccb297688d4c747291f4f4c70741cc8b", size = 220351, upload-time = "2026-03-17T10:32:41.196Z" }, + { url = "https://files.pythonhosted.org/packages/f5/21/1fd5c4dbfe4a58b6b99649125635df46decdfd4a784c3cd6d410d303e370/coverage-7.13.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b5db73ba3c41c7008037fa731ad5459fc3944cb7452fc0aa9f822ad3533c583c", size = 220612, upload-time = "2026-03-17T10:32:43.204Z" }, + { url = "https://files.pythonhosted.org/packages/d6/fe/2a924b3055a5e7e4512655a9d4609781b0d62334fa0140c3e742926834e2/coverage-7.13.5-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:750db93a81e3e5a9831b534be7b1229df848b2e125a604fe6651e48aa070e5f9", size = 261985, upload-time = "2026-03-17T10:32:45.514Z" }, + { url = "https://files.pythonhosted.org/packages/d7/0d/c8928f2bd518c45990fe1a2ab8db42e914ef9b726c975facc4282578c3eb/coverage-7.13.5-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9ddb4f4a5479f2539644be484da179b653273bca1a323947d48ab107b3ed1f29", size = 264107, upload-time = "2026-03-17T10:32:47.971Z" }, + { url = "https://files.pythonhosted.org/packages/ef/ae/4ae35bbd9a0af9d820362751f0766582833c211224b38665c0f8de3d487f/coverage-7.13.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8a7a2049c14f413163e2bdabd37e41179b1d1ccb10ffc6ccc4b7a718429c607", size = 266513, upload-time = "2026-03-17T10:32:50.1Z" }, + { url = "https://files.pythonhosted.org/packages/9c/20/d326174c55af36f74eac6ae781612d9492f060ce8244b570bb9d50d9d609/coverage-7.13.5-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1c85e0b6c05c592ea6d8768a66a254bfb3874b53774b12d4c89c481eb78cb90", size = 267650, upload-time = "2026-03-17T10:32:52.391Z" }, + { url = "https://files.pythonhosted.org/packages/7a/5e/31484d62cbd0eabd3412e30d74386ece4a0837d4f6c3040a653878bfc019/coverage-7.13.5-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:777c4d1eff1b67876139d24288aaf1817f6c03d6bae9c5cc8d27b83bcfe38fe3", size = 261089, upload-time = "2026-03-17T10:32:54.544Z" }, + { url = "https://files.pythonhosted.org/packages/e9/d8/49a72d6de146eebb0b7e48cc0f4bc2c0dd858e3d4790ab2b39a2872b62bd/coverage-7.13.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6697e29b93707167687543480a40f0db8f356e86d9f67ddf2e37e2dfd91a9dab", size = 263982, upload-time = "2026-03-17T10:32:56.803Z" }, + { url = "https://files.pythonhosted.org/packages/06/3b/0351f1bd566e6e4dd39e978efe7958bde1d32f879e85589de147654f57bb/coverage-7.13.5-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8fdf453a942c3e4d99bd80088141c4c6960bb232c409d9c3558e2dbaa3998562", size = 261579, upload-time = "2026-03-17T10:32:59.466Z" }, + { url = "https://files.pythonhosted.org/packages/5d/ce/796a2a2f4017f554d7810f5c573449b35b1e46788424a548d4d19201b222/coverage-7.13.5-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:32ca0c0114c9834a43f045a87dcebd69d108d8ffb666957ea65aa132f50332e2", size = 265316, upload-time = "2026-03-17T10:33:01.847Z" }, + { url = "https://files.pythonhosted.org/packages/3d/16/d5ae91455541d1a78bc90abf495be600588aff8f6db5c8b0dae739fa39c9/coverage-7.13.5-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:8769751c10f339021e2638cd354e13adeac54004d1941119b2c96fe5276d45ea", size = 260427, upload-time = "2026-03-17T10:33:03.945Z" }, + { url = "https://files.pythonhosted.org/packages/48/11/07f413dba62db21fb3fad5d0de013a50e073cc4e2dc4306e770360f6dfc8/coverage-7.13.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cec2d83125531bd153175354055cdb7a09987af08a9430bd173c937c6d0fba2a", size = 262745, upload-time = "2026-03-17T10:33:06.285Z" }, + { url = "https://files.pythonhosted.org/packages/91/15/d792371332eb4663115becf4bad47e047d16234b1aff687b1b18c58d60ae/coverage-7.13.5-cp314-cp314t-win32.whl", hash = "sha256:0cd9ed7a8b181775459296e402ca4fb27db1279740a24e93b3b41942ebe4b215", size = 223146, upload-time = "2026-03-17T10:33:08.756Z" }, + { url = "https://files.pythonhosted.org/packages/db/51/37221f59a111dca5e85be7dbf09696323b5b9f13ff65e0641d535ed06ea8/coverage-7.13.5-cp314-cp314t-win_amd64.whl", hash = "sha256:301e3b7dfefecaca37c9f1aa6f0049b7d4ab8dd933742b607765d757aca77d43", size = 224254, upload-time = "2026-03-17T10:33:11.174Z" }, + { url = "https://files.pythonhosted.org/packages/54/83/6acacc889de8987441aa7d5adfbdbf33d288dad28704a67e574f1df9bcbb/coverage-7.13.5-cp314-cp314t-win_arm64.whl", hash = "sha256:9dacc2ad679b292709e0f5fc1ac74a6d4d5562e424058962c7bb0c658ad25e45", size = 222276, upload-time = "2026-03-17T10:33:13.466Z" }, + { url = "https://files.pythonhosted.org/packages/9e/ee/a4cf96b8ce1e566ed238f0659ac2d3f007ed1d14b181bcb684e19561a69a/coverage-7.13.5-py3-none-any.whl", hash = "sha256:34b02417cf070e173989b3db962f7ed56d2f644307b2cf9d5a0f258e13084a61", size = 211346, upload-time = "2026-03-17T10:33:15.691Z" }, +] + +[package.optional-dependencies] +toml = [ + { name = "tomli", marker = "python_full_version >= '3.10' and python_full_version <= '3.11'" }, +] + [[package]] name = "distro" version = "1.9.0" @@ -959,7 +1208,7 @@ wheels = [ [[package]] name = "mempalace" -version = "3.0.0" +version = "3.3.0" source = { editable = "." } dependencies = [ { name = "chromadb" }, @@ -968,30 +1217,42 @@ dependencies = [ [package.optional-dependencies] dev = [ + { name = "psutil" }, { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "pytest", version = "9.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "pytest-cov" }, { name = "ruff" }, ] +spellcheck = [ + { name = "autocorrect" }, +] [package.dev-dependencies] dev = [ + { name = "psutil" }, { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "pytest", version = "9.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "pytest-cov" }, { name = "ruff" }, ] [package.metadata] requires-dist = [ - { name = "chromadb", specifier = ">=0.4.0,<1" }, + { name = "autocorrect", marker = "extra == 'spellcheck'", specifier = ">=2.0" }, + { name = "chromadb", specifier = ">=0.5.0,<0.7" }, + { name = "psutil", marker = "extra == 'dev'", specifier = ">=5.9" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0" }, - { name = "pyyaml", specifier = ">=6.0" }, + { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0" }, + { name = "pyyaml", specifier = ">=6.0,<7" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4.0" }, ] -provides-extras = ["dev"] +provides-extras = ["dev", "spellcheck"] [package.metadata.requires-dev] dev = [ + { name = "psutil", specifier = ">=5.9" }, { name = "pytest", specifier = ">=7.0" }, + { name = "pytest-cov", specifier = ">=4.0" }, { name = "ruff", specifier = ">=0.4.0" }, ] @@ -2000,6 +2261,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c4/72/02445137af02769918a93807b2b7890047c32bfb9f90371cbc12688819eb/protobuf-6.33.6-py3-none-any.whl", hash = "sha256:77179e006c476e69bf8e8ce866640091ec42e1beb80b213c3900006ecfba6901", size = 170656, upload-time = "2026-03-18T19:04:59.826Z" }, ] +[[package]] +name = "psutil" +version = "7.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/08/510cbdb69c25a96f4ae523f733cdc963ae654904e8db864c07585ef99875/psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b", size = 130595, upload-time = "2026-01-28T18:14:57.293Z" }, + { url = "https://files.pythonhosted.org/packages/d6/f5/97baea3fe7a5a9af7436301f85490905379b1c6f2dd51fe3ecf24b4c5fbf/psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea", size = 131082, upload-time = "2026-01-28T18:14:59.732Z" }, + { url = "https://files.pythonhosted.org/packages/37/d6/246513fbf9fa174af531f28412297dd05241d97a75911ac8febefa1a53c6/psutil-7.2.2-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a571f2330c966c62aeda00dd24620425d4b0cc86881c89861fbc04549e5dc63", size = 181476, upload-time = "2026-01-28T18:15:01.884Z" }, + { url = "https://files.pythonhosted.org/packages/b8/b5/9182c9af3836cca61696dabe4fd1304e17bc56cb62f17439e1154f225dd3/psutil-7.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:917e891983ca3c1887b4ef36447b1e0873e70c933afc831c6b6da078ba474312", size = 184062, upload-time = "2026-01-28T18:15:04.436Z" }, + { url = "https://files.pythonhosted.org/packages/16/ba/0756dca669f5a9300d0cbcbfae9a4c30e446dfc7440ffe43ded5724bfd93/psutil-7.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:ab486563df44c17f5173621c7b198955bd6b613fb87c71c161f827d3fb149a9b", size = 139893, upload-time = "2026-01-28T18:15:06.378Z" }, + { url = "https://files.pythonhosted.org/packages/1c/61/8fa0e26f33623b49949346de05ec1ddaad02ed8ba64af45f40a147dbfa97/psutil-7.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:ae0aefdd8796a7737eccea863f80f81e468a1e4cf14d926bd9b6f5f2d5f90ca9", size = 135589, upload-time = "2026-01-28T18:15:08.03Z" }, + { url = "https://files.pythonhosted.org/packages/81/69/ef179ab5ca24f32acc1dac0c247fd6a13b501fd5534dbae0e05a1c48b66d/psutil-7.2.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:eed63d3b4d62449571547b60578c5b2c4bcccc5387148db46e0c2313dad0ee00", size = 130664, upload-time = "2026-01-28T18:15:09.469Z" }, + { url = "https://files.pythonhosted.org/packages/7b/64/665248b557a236d3fa9efc378d60d95ef56dd0a490c2cd37dafc7660d4a9/psutil-7.2.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7b6d09433a10592ce39b13d7be5a54fbac1d1228ed29abc880fb23df7cb694c9", size = 131087, upload-time = "2026-01-28T18:15:11.724Z" }, + { url = "https://files.pythonhosted.org/packages/d5/2e/e6782744700d6759ebce3043dcfa661fb61e2fb752b91cdeae9af12c2178/psutil-7.2.2-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fa4ecf83bcdf6e6c8f4449aff98eefb5d0604bf88cb883d7da3d8d2d909546a", size = 182383, upload-time = "2026-01-28T18:15:13.445Z" }, + { url = "https://files.pythonhosted.org/packages/57/49/0a41cefd10cb7505cdc04dab3eacf24c0c2cb158a998b8c7b1d27ee2c1f5/psutil-7.2.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e452c464a02e7dc7822a05d25db4cde564444a67e58539a00f929c51eddda0cf", size = 185210, upload-time = "2026-01-28T18:15:16.002Z" }, + { url = "https://files.pythonhosted.org/packages/dd/2c/ff9bfb544f283ba5f83ba725a3c5fec6d6b10b8f27ac1dc641c473dc390d/psutil-7.2.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c7663d4e37f13e884d13994247449e9f8f574bc4655d509c3b95e9ec9e2b9dc1", size = 141228, upload-time = "2026-01-28T18:15:18.385Z" }, + { url = "https://files.pythonhosted.org/packages/f2/fc/f8d9c31db14fcec13748d373e668bc3bed94d9077dbc17fb0eebc073233c/psutil-7.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:11fe5a4f613759764e79c65cf11ebdf26e33d6dd34336f8a337aa2996d71c841", size = 136284, upload-time = "2026-01-28T18:15:19.912Z" }, + { url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" }, + { url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" }, + { url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" }, + { url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" }, + { url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" }, + { url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" }, + { url = "https://files.pythonhosted.org/packages/b4/90/e2159492b5426be0c1fef7acba807a03511f97c5f86b3caeda6ad92351a7/psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988", size = 137737, upload-time = "2026-01-28T18:15:33.849Z" }, + { url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" }, +] + [[package]] name = "pydantic" version = "2.12.5" @@ -2230,6 +2519,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, ] +[[package]] +name = "pytest-cov" +version = "7.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coverage", version = "7.10.7", source = { registry = "https://pypi.org/simple" }, extra = ["toml"], marker = "python_full_version < '3.10'" }, + { name = "coverage", version = "7.13.5", source = { registry = "https://pypi.org/simple" }, extra = ["toml"], marker = "python_full_version >= '3.10'" }, + { name = "pluggy" }, + { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "pytest", version = "9.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/51/a849f96e117386044471c8ec2bd6cfebacda285da9525c9106aeb28da671/pytest_cov-7.1.0.tar.gz", hash = "sha256:30674f2b5f6351aa09702a9c8c364f6a01c27aae0c1366ae8016160d1efc56b2", size = 55592, upload-time = "2026-03-21T20:11:16.284Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/7a/d968e294073affff457b041c2be9868a40c1c71f4a35fcc1e45e5493067b/pytest_cov-7.1.0-py3-none-any.whl", hash = "sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678", size = 22876, upload-time = "2026-03-21T20:11:14.438Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" From 045023f4494afb1a5c0ba71ff112293b17384b96 Mon Sep 17 00:00:00 2001 From: Milla J <232237854+milla-jovovich@users.noreply.github.com> Date: Mon, 13 Apr 2026 18:09:59 -0700 Subject: [PATCH 34/42] fix: save hook auto-mines transcript without MEMPAL_DIR (#840) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TDD: test written first, failed, then fixed. Problem: save hook says "saved in background" but MEMPAL_DIR defaults to empty, so nothing actually mines. Users get no auto-save despite the hook firing every 15 messages. Fix: use TRANSCRIPT_PATH (received from Claude Code in the hook's JSON input) to discover the session directory. Mine that directory automatically. MEMPAL_DIR is still supported as override but no longer required. Also fixed: bare python3 → $(command -v python3) for nohup safety. Co-authored-by: Claude Opus 4.6 (1M context) --- hooks/mempal_save_hook.sh | 17 ++++++--- tests/test_save_hook_mines.py | 68 +++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 4 deletions(-) create mode 100644 tests/test_save_hook_mines.py diff --git a/hooks/mempal_save_hook.sh b/hooks/mempal_save_hook.sh index b15d961..9eda976 100755 --- a/hooks/mempal_save_hook.sh +++ b/hooks/mempal_save_hook.sh @@ -133,11 +133,20 @@ if [ "$SINCE_LAST" -ge "$SAVE_INTERVAL" ] && [ "$EXCHANGE_COUNT" -gt 0 ]; then echo "[$(date '+%H:%M:%S')] TRIGGERING SAVE at exchange $EXCHANGE_COUNT" >> "$STATE_DIR/hook.log" - # Optional: run mempalace ingest in background if MEMPAL_DIR is set + # Auto-mine the transcript. Two paths: + # 1. TRANSCRIPT_PATH (from Claude Code) — mine the directory it lives in + # 2. MEMPAL_DIR (user-configured) — mine that directory + # At least one should work. If neither is set, nothing mines. + PYTHON="$(command -v python3)" + MINE_DIR="" + if [ -n "$TRANSCRIPT_PATH" ] && [ -f "$TRANSCRIPT_PATH" ]; then + MINE_DIR="$(dirname "$TRANSCRIPT_PATH")" + fi if [ -n "$MEMPAL_DIR" ] && [ -d "$MEMPAL_DIR" ]; then - SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - REPO_DIR="$(dirname "$SCRIPT_DIR")" - python3 -m mempalace mine "$MEMPAL_DIR" >> "$STATE_DIR/hook.log" 2>&1 & + MINE_DIR="$MEMPAL_DIR" + fi + if [ -n "$MINE_DIR" ]; then + "$PYTHON" -m mempalace mine "$MINE_DIR" >> "$STATE_DIR/hook.log" 2>&1 & fi # Notify the AI that a checkpoint happened — but do NOT ask it to write diff --git a/tests/test_save_hook_mines.py b/tests/test_save_hook_mines.py new file mode 100644 index 0000000..a702a42 --- /dev/null +++ b/tests/test_save_hook_mines.py @@ -0,0 +1,68 @@ +"""TDD: save hook must actually mine conversations without MEMPAL_DIR. + +The save hook should auto-discover the conversation transcript and mine it +without the user needing to set MEMPAL_DIR. Currently MEMPAL_DIR defaults +to empty, which means the mining block is skipped and nothing is saved +despite the hook telling the agent "saved in background." + +Written BEFORE the fix. +""" + +import os + + +class TestSaveHookAutoMines: + """The save hook must mine the active transcript automatically.""" + + def test_hook_mines_transcript_path(self): + """The hook receives TRANSCRIPT_PATH from Claude Code. + It should use that to mine the conversation, not depend on MEMPAL_DIR.""" + hook_path = os.path.join( + os.path.dirname(os.path.dirname(__file__)), + "hooks", + "mempal_save_hook.sh", + ) + src = open(hook_path).read() + + # The hook ALREADY receives TRANSCRIPT_PATH in the JSON input. + # It should use this to mine the current session's transcript + # regardless of whether MEMPAL_DIR is set. + # The hook must have a path that uses TRANSCRIPT_PATH to determine + # what to mine, separate from the MEMPAL_DIR path. + uses_transcript = "TRANSCRIPT_PATH" in src + has_mine = "mempalace mine" in src + # TRANSCRIPT_PATH must appear in the mining logic, not just the parse block + transcript_drives_mine = "MINE_DIR" in src and "dirname" in src and "TRANSCRIPT_PATH" in src + + assert uses_transcript and has_mine and transcript_drives_mine, ( + "Save hook only mines when MEMPAL_DIR is set (defaults to empty). " + "The hook receives TRANSCRIPT_PATH from Claude Code — it should " + "mine that file automatically so conversations are saved without " + "the user setting an env var. Currently the hook says 'saved in " + "background' but nothing actually saves." + ) + + def test_mempal_dir_default_not_empty(self): + """If MEMPAL_DIR is still used, it should have a sensible default, + not an empty string that silently disables mining.""" + hook_path = os.path.join( + os.path.dirname(os.path.dirname(__file__)), + "hooks", + "mempal_save_hook.sh", + ) + src = open(hook_path).read() + + # Check if MEMPAL_DIR defaults to empty + has_empty_default = 'MEMPAL_DIR=""' in src + + # If it defaults to empty, mining is silently disabled + if has_empty_default: + # There must be an alternative mining path that doesn't need MEMPAL_DIR + has_alternative = ( + src.count("mempalace mine") > 1 + or "TRANSCRIPT_PATH" in src.split("mempalace mine")[0] + ) + assert has_alternative, ( + 'MEMPAL_DIR defaults to "" which silently disables mining. ' + "Either set a default path or add transcript-based mining." + ) From 06c4289999f7d321e1ca28447f218bb8d6b9fb16 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Mon, 13 Apr 2026 22:56:04 -0300 Subject: [PATCH 35/42] ci: serve docs from develop only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Docs deploy to GitHub Pages from develop for faster iteration cycles. Main was failing the deploy step with "Branch 'main' is not allowed to deploy to github-pages due to environment protection rules" on every release merge (v3.2.0, v3.3.0) — noise without signal, since docs weren't meant to serve from main anyway. Removes main from both the push trigger and the deploy-job guard. Develop continues to deploy as before; manual dispatch still works. --- .github/workflows/deploy-docs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml index 71df155..b516f36 100644 --- a/.github/workflows/deploy-docs.yml +++ b/.github/workflows/deploy-docs.yml @@ -2,7 +2,7 @@ name: Deploy Docs on: push: - branches: [main, develop] + branches: [develop] paths: - ".github/workflows/deploy-docs.yml" - "website/**" @@ -51,7 +51,7 @@ jobs: path: website/.vitepress/dist deploy: - if: github.ref_name == 'main' || github.ref_name == 'develop' + if: github.ref_name == 'develop' environment: name: github-pages url: ${{ steps.deployment.outputs.page_url }} From 267a644f4f34e64ea4161fc24b4dcdfdb3652bd5 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Tue, 14 Apr 2026 00:31:16 -0300 Subject: [PATCH 36/42] refactor: route all chromadb access through ChromaBackend Prerequisite for RFC 001 (plugin spec, #743). Removes every direct `import chromadb` outside the ChromaDB backend itself so the core modules depend only on the backend abstraction layer. Extends ChromaBackend with make_client, get_or_create_collection, delete_collection, create_collection, and backend_version. Adds update() to the BaseCollection contract. Non-backend callers (mcp_server, dedup, repair, migrate, cli) now go through the abstraction; tests patch ChromaBackend instead of chromadb. With this landed, the RFC 001 spec can be enforced and PalaceStore (#643) can ship as a plugin without touching core modules. --- mempalace/backends/base.py | 5 ++ mempalace/backends/chroma.py | 63 ++++++++++++++++++- mempalace/cli.py | 21 +++---- mempalace/dedup.py | 8 +-- mempalace/mcp_server.py | 12 ++-- mempalace/migrate.py | 18 +++--- mempalace/repair.py | 16 +++-- tests/test_cli.py | 106 +++++++++++++------------------- tests/test_dedup.py | 39 ++++++------ tests/test_repair.py | 114 ++++++++++++++++------------------- uv.lock | 2 +- 11 files changed, 215 insertions(+), 189 deletions(-) diff --git a/mempalace/backends/base.py b/mempalace/backends/base.py index 4685f51..877da53 100644 --- a/mempalace/backends/base.py +++ b/mempalace/backends/base.py @@ -27,6 +27,11 @@ class BaseCollection(ABC): ) -> None: raise NotImplementedError + @abstractmethod + def update(self, **kwargs: Any) -> None: + """Update existing records. Must raise if any ID is missing.""" + raise NotImplementedError + @abstractmethod def query(self, **kwargs: Any) -> Dict[str, Any]: raise NotImplementedError diff --git a/mempalace/backends/chroma.py b/mempalace/backends/chroma.py index 28fe55f..1a13675 100644 --- a/mempalace/backends/chroma.py +++ b/mempalace/backends/chroma.py @@ -55,6 +55,9 @@ class ChromaCollection(BaseCollection): def upsert(self, *, documents, ids, metadatas=None): self._collection.upsert(documents=documents, ids=ids, metadatas=metadatas) + def update(self, **kwargs): + self._collection.update(**kwargs) + def query(self, **kwargs): return self._collection.query(**kwargs) @@ -71,6 +74,44 @@ class ChromaCollection(BaseCollection): class ChromaBackend: """Factory for MemPalace's default ChromaDB backend.""" + def __init__(self): + # Per-instance client cache: palace_path -> chromadb.PersistentClient + self._clients: dict = {} + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + + def _client(self, palace_path: str): + """Return a cached PersistentClient for *palace_path*, creating one if needed.""" + if palace_path not in self._clients: + _fix_blob_seq_ids(palace_path) + self._clients[palace_path] = chromadb.PersistentClient(path=palace_path) + return self._clients[palace_path] + + # ------------------------------------------------------------------ + # Public static helpers (for callers that manage their own caching) + # ------------------------------------------------------------------ + + @staticmethod + def make_client(palace_path: str): + """Create and return a fresh PersistentClient (fix BLOB seq_ids first). + + Intended for long-lived callers (e.g. mcp_server) that keep their own + inode/mtime-based client cache. + """ + _fix_blob_seq_ids(palace_path) + return chromadb.PersistentClient(path=palace_path) + + @staticmethod + def backend_version() -> str: + """Return the installed chromadb package version string.""" + return chromadb.__version__ + + # ------------------------------------------------------------------ + # Collection lifecycle + # ------------------------------------------------------------------ + def get_collection(self, palace_path: str, collection_name: str, create: bool = False): if not create and not os.path.isdir(palace_path): raise FileNotFoundError(palace_path) @@ -82,8 +123,7 @@ class ChromaBackend: except (OSError, NotImplementedError): pass - _fix_blob_seq_ids(palace_path) - client = chromadb.PersistentClient(path=palace_path) + client = self._client(palace_path) if create: collection = client.get_or_create_collection( collection_name, metadata={"hnsw:space": "cosine"} @@ -91,3 +131,22 @@ class ChromaBackend: else: collection = client.get_collection(collection_name) return ChromaCollection(collection) + + def get_or_create_collection( + self, palace_path: str, collection_name: str + ) -> "ChromaCollection": + """Shorthand for get_collection(..., create=True).""" + return self.get_collection(palace_path, collection_name, create=True) + + def delete_collection(self, palace_path: str, collection_name: str) -> None: + """Delete *collection_name* from the palace at *palace_path*.""" + self._client(palace_path).delete_collection(collection_name) + + def create_collection( + self, palace_path: str, collection_name: str, hnsw_space: str = "cosine" + ) -> "ChromaCollection": + """Create (not get-or-create) *collection_name* with cosine HNSW space.""" + collection = self._client(palace_path).create_collection( + collection_name, metadata={"hnsw:space": hnsw_space} + ) + return ChromaCollection(collection) diff --git a/mempalace/cli.py b/mempalace/cli.py index fa92ed6..f7f68d7 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -172,8 +172,8 @@ def cmd_status(args): def cmd_repair(args): """Rebuild palace vector index from SQLite metadata.""" - import chromadb import shutil + from .backends.chroma import ChromaBackend from .migrate import confirm_destructive_action, contains_palace_database palace_path = os.path.abspath( @@ -193,10 +193,11 @@ def cmd_repair(args): print(f"{'=' * 55}\n") print(f" Palace: {palace_path}") + backend = ChromaBackend() + # Try to read existing drawers try: - client = chromadb.PersistentClient(path=palace_path) - col = client.get_collection("mempalace_drawers") + col = backend.get_collection(palace_path, "mempalace_drawers") total = col.count() print(f" Drawers found: {total}") except Exception as e: @@ -243,8 +244,8 @@ def cmd_repair(args): shutil.copytree(palace_path, backup_path) print(" Rebuilding collection...") - client.delete_collection("mempalace_drawers") - new_col = client.create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"}) + backend.delete_collection(palace_path, "mempalace_drawers") + new_col = backend.create_collection(palace_path, "mempalace_drawers") filed = 0 for i in range(0, len(all_ids), batch_size): @@ -297,7 +298,7 @@ def cmd_mcp(args): def cmd_compress(args): """Compress drawers in a wing using AAAK Dialect.""" - import chromadb + from .backends.chroma import ChromaBackend from .dialect import Dialect palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path @@ -317,9 +318,9 @@ def cmd_compress(args): dialect = Dialect() # Connect to palace + backend = ChromaBackend() try: - client = chromadb.PersistentClient(path=palace_path) - col = client.get_collection("mempalace_drawers") + col = backend.get_collection(palace_path, "mempalace_drawers") except Exception: print(f"\n No palace found at {palace_path}") print(" Run: mempalace init then mempalace mine ") @@ -394,9 +395,7 @@ def cmd_compress(args): # Store compressed versions (unless dry-run) if not args.dry_run: try: - comp_col = client.get_or_create_collection( - "mempalace_compressed", metadata={"hnsw:space": "cosine"} - ) + comp_col = backend.get_or_create_collection(palace_path, "mempalace_compressed") for doc_id, compressed, meta, stats in compressed_entries: comp_meta = dict(meta) comp_meta["compression_ratio"] = round(stats["size_ratio"], 1) diff --git a/mempalace/dedup.py b/mempalace/dedup.py index c2f9f6b..6b1bac1 100644 --- a/mempalace/dedup.py +++ b/mempalace/dedup.py @@ -27,7 +27,7 @@ import os import time from collections import defaultdict -import chromadb +from .backends.chroma import ChromaBackend COLLECTION_NAME = "mempalace_drawers" @@ -130,8 +130,7 @@ def dedup_source_group(col, drawer_ids, threshold=DEFAULT_THRESHOLD, dry_run=Tru def show_stats(palace_path=None): """Show duplication statistics without making changes.""" palace_path = palace_path or _get_palace_path() - client = chromadb.PersistentClient(path=palace_path) - col = client.get_collection(COLLECTION_NAME) + col = ChromaBackend().get_collection(palace_path, COLLECTION_NAME) groups = get_source_groups(col) @@ -163,8 +162,7 @@ def dedup_palace( print(" MemPalace Deduplicator") print(f"{'=' * 55}") - client = chromadb.PersistentClient(path=palace_path) - col = client.get_collection(COLLECTION_NAME) + col = ChromaBackend().get_collection(palace_path, COLLECTION_NAME) print(f" Palace: {palace_path}") print(f" Drawers: {col.count():,}") diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index 31be8a4..4653f5f 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -32,7 +32,7 @@ from pathlib import Path from .config import MempalaceConfig, sanitize_name, sanitize_content from .version import __version__ -import chromadb +from .backends.chroma import ChromaBackend, ChromaCollection from .query_sanitizer import sanitize_query from .searcher import search_memories from .palace_graph import ( @@ -177,7 +177,7 @@ def _get_client(): mtime_changed = current_mtime != 0.0 and abs(current_mtime - _palace_db_mtime) > 0.01 if _client_cache is None or inode_changed or mtime_changed: - _client_cache = chromadb.PersistentClient(path=_config.palace_path) + _client_cache = ChromaBackend.make_client(_config.palace_path) _collection_cache = None _metadata_cache = None _metadata_cache_time = 0 @@ -192,13 +192,15 @@ def _get_collection(create=False): try: client = _get_client() if create: - _collection_cache = client.get_or_create_collection( - _config.collection_name, metadata={"hnsw:space": "cosine"} + _collection_cache = ChromaCollection( + client.get_or_create_collection( + _config.collection_name, metadata={"hnsw:space": "cosine"} + ) ) _metadata_cache = None _metadata_cache_time = 0 elif _collection_cache is None: - _collection_cache = client.get_collection(_config.collection_name) + _collection_cache = ChromaCollection(client.get_collection(_config.collection_name)) _metadata_cache = None _metadata_cache_time = 0 return _collection_cache diff --git a/mempalace/migrate.py b/mempalace/migrate.py index 319c670..2eebb61 100644 --- a/mempalace/migrate.py +++ b/mempalace/migrate.py @@ -134,7 +134,7 @@ def confirm_destructive_action( def migrate(palace_path: str, dry_run: bool = False, confirm: bool = False): """Migrate a palace to the currently installed ChromaDB version.""" - import chromadb + from .backends.chroma import ChromaBackend palace_path = os.path.abspath(os.path.expanduser(palace_path)) db_path = os.path.join(palace_path, "chroma.sqlite3") @@ -152,19 +152,19 @@ def migrate(palace_path: str, dry_run: bool = False, confirm: bool = False): # Detect version source_version = detect_chromadb_version(db_path) + target_version = ChromaBackend.backend_version() print(f" Source: ChromaDB {source_version}") - print(f" Target: ChromaDB {chromadb.__version__}") + print(f" Target: ChromaDB {target_version}") # Try reading with current chromadb first try: - client = chromadb.PersistentClient(path=palace_path) - col = client.get_collection("mempalace_drawers") + col = ChromaBackend().get_collection(palace_path, "mempalace_drawers") count = col.count() - print(f"\n Palace is already readable by chromadb {chromadb.__version__}.") + print(f"\n Palace is already readable by chromadb {target_version}.") print(f" {count} drawers found. No migration needed.") return True except Exception: - print(f"\n Palace is NOT readable by chromadb {chromadb.__version__}.") + print(f"\n Palace is NOT readable by chromadb {target_version}.") print(" Extracting from SQLite directly...") # Extract all drawers via raw SQL @@ -208,8 +208,8 @@ def migrate(palace_path: str, dry_run: bool = False, confirm: bool = False): temp_palace = tempfile.mkdtemp(prefix="mempalace_migrate_") print(f" Creating fresh palace in {temp_palace}...") - client = chromadb.PersistentClient(path=temp_palace) - col = client.get_or_create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"}) + fresh_backend = ChromaBackend() + col = fresh_backend.get_or_create_collection(temp_palace, "mempalace_drawers") # Re-import in batches batch_size = 500 @@ -227,7 +227,7 @@ def migrate(palace_path: str, dry_run: bool = False, confirm: bool = False): # Verify before swapping final_count = col.count() del col - del client + del fresh_backend # Swap: remove old palace, move new one into place print(" Swapping old palace for migrated version...") diff --git a/mempalace/repair.py b/mempalace/repair.py index d51be60..9a9aa88 100644 --- a/mempalace/repair.py +++ b/mempalace/repair.py @@ -32,7 +32,7 @@ import os import shutil import time -import chromadb +from .backends.chroma import ChromaBackend COLLECTION_NAME = "mempalace_drawers" @@ -90,8 +90,7 @@ def scan_palace(palace_path=None, only_wing=None): print(f"\n Palace: {palace_path}") print(" Loading...") - client = chromadb.PersistentClient(path=palace_path) - col = client.get_collection(COLLECTION_NAME) + col = ChromaBackend().get_collection(palace_path, COLLECTION_NAME) where = {"wing": only_wing} if only_wing else None total = col.count() @@ -174,8 +173,7 @@ def prune_corrupt(palace_path=None, confirm=False): print(" Re-run with --confirm to actually delete.") return - client = chromadb.PersistentClient(path=palace_path) - col = client.get_collection(COLLECTION_NAME) + col = ChromaBackend().get_collection(palace_path, COLLECTION_NAME) before = col.count() print(f" Collection size before: {before:,}") @@ -222,9 +220,9 @@ def rebuild_index(palace_path=None): print(f"{'=' * 55}\n") print(f" Palace: {palace_path}") - client = chromadb.PersistentClient(path=palace_path) + backend = ChromaBackend() try: - col = client.get_collection(COLLECTION_NAME) + col = backend.get_collection(palace_path, COLLECTION_NAME) total = col.count() except Exception as e: print(f" Error reading palace: {e}") @@ -264,8 +262,8 @@ def rebuild_index(palace_path=None): # Rebuild with correct HNSW settings print(" Rebuilding collection with hnsw:space=cosine...") - client.delete_collection(COLLECTION_NAME) - new_col = client.create_collection(COLLECTION_NAME, metadata={"hnsw:space": "cosine"}) + backend.delete_collection(palace_path, COLLECTION_NAME) + new_col = backend.create_collection(palace_path, COLLECTION_NAME) filed = 0 for i in range(0, len(all_ids), batch_size): diff --git a/tests/test_cli.py b/tests/test_cli.py index 0e95a8c..c4b4203 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -412,12 +412,21 @@ def test_main_compress_dispatches(): # ── cmd_repair ───────────────────────────────────────────────────────── +def _mock_backend_for(col=None, new_col=None): + """Build a mock ChromaBackend whose get_collection/create_collection return *col* / *new_col*.""" + mock_backend = MagicMock() + if col is not None: + mock_backend.get_collection.return_value = col + if new_col is not None: + mock_backend.create_collection.return_value = new_col + return mock_backend + + @patch("mempalace.cli.MempalaceConfig") def test_cmd_repair_no_palace(mock_config_cls, tmp_path, capsys): mock_config_cls.return_value.palace_path = str(tmp_path / "nonexistent") args = argparse.Namespace(palace=None) - mock_chromadb = MagicMock() - with patch.dict("sys.modules", {"chromadb": mock_chromadb}): + with patch("mempalace.backends.chroma.ChromaBackend"): cmd_repair(args) out = capsys.readouterr().out assert "No palace found" in out @@ -429,8 +438,7 @@ def test_cmd_repair_requires_palace_database(mock_config_cls, tmp_path, capsys): palace_dir.mkdir() mock_config_cls.return_value.palace_path = str(palace_dir) args = argparse.Namespace(palace=None) - mock_chromadb = MagicMock() - with patch.dict("sys.modules", {"chromadb": mock_chromadb}): + with patch("mempalace.backends.chroma.ChromaBackend"): cmd_repair(args) out = capsys.readouterr().out assert "No palace database found" in out @@ -443,11 +451,9 @@ def test_cmd_repair_error_reading(mock_config_cls, tmp_path, capsys): (palace_dir / "chroma.sqlite3").write_text("db") mock_config_cls.return_value.palace_path = str(palace_dir) args = argparse.Namespace(palace=None) - mock_chromadb = MagicMock() - mock_client = MagicMock() - mock_client.get_collection.side_effect = Exception("corrupt db") - mock_chromadb.PersistentClient.return_value = mock_client - with patch.dict("sys.modules", {"chromadb": mock_chromadb}): + mock_backend = MagicMock() + mock_backend.get_collection.side_effect = Exception("corrupt db") + with patch("mempalace.backends.chroma.ChromaBackend", return_value=mock_backend): cmd_repair(args) out = capsys.readouterr().out assert "Error reading palace" in out @@ -460,13 +466,10 @@ def test_cmd_repair_zero_drawers(mock_config_cls, tmp_path, capsys): (palace_dir / "chroma.sqlite3").write_text("db") mock_config_cls.return_value.palace_path = str(palace_dir) args = argparse.Namespace(palace=None) - mock_chromadb = MagicMock() mock_col = MagicMock() mock_col.count.return_value = 0 - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col - mock_chromadb.PersistentClient.return_value = mock_client - with patch.dict("sys.modules", {"chromadb": mock_chromadb}): + mock_backend = _mock_backend_for(col=mock_col) + with patch("mempalace.backends.chroma.ChromaBackend", return_value=mock_backend): cmd_repair(args) out = capsys.readouterr().out assert "Nothing to repair" in out @@ -479,7 +482,6 @@ def test_cmd_repair_success(mock_config_cls, tmp_path, capsys): (palace_dir / "chroma.sqlite3").write_text("db") mock_config_cls.return_value.palace_path = str(palace_dir) args = argparse.Namespace(palace=None, yes=True) - mock_chromadb = MagicMock() mock_col = MagicMock() mock_col.count.return_value = 2 mock_col.get.return_value = { @@ -487,12 +489,9 @@ def test_cmd_repair_success(mock_config_cls, tmp_path, capsys): "documents": ["doc1", "doc2"], "metadatas": [{"wing": "a"}, {"wing": "b"}], } - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col mock_new_col = MagicMock() - mock_client.create_collection.return_value = mock_new_col - mock_chromadb.PersistentClient.return_value = mock_client - with patch.dict("sys.modules", {"chromadb": mock_chromadb}): + mock_backend = _mock_backend_for(col=mock_col, new_col=mock_new_col) + with patch("mempalace.backends.chroma.ChromaBackend", return_value=mock_backend): cmd_repair(args) out = capsys.readouterr().out assert "Repair complete" in out @@ -506,20 +505,17 @@ def test_cmd_repair_aborts_without_confirmation(mock_config_cls, tmp_path, capsy (palace_dir / "chroma.sqlite3").write_text("db") mock_config_cls.return_value.palace_path = str(palace_dir) args = argparse.Namespace(palace=None) - mock_chromadb = MagicMock() mock_col = MagicMock() mock_col.count.return_value = 1 - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col - mock_chromadb.PersistentClient.return_value = mock_client + mock_backend = _mock_backend_for(col=mock_col) with ( - patch.dict("sys.modules", {"chromadb": mock_chromadb}), + patch("mempalace.backends.chroma.ChromaBackend", return_value=mock_backend), patch("builtins.input", return_value="n"), ): cmd_repair(args) out = capsys.readouterr().out assert "Aborted." in out - mock_client.create_collection.assert_not_called() + mock_backend.create_collection.assert_not_called() # ── cmd_compress ─────────────────────────────────────────────────────── @@ -529,10 +525,10 @@ def test_cmd_repair_aborts_without_confirmation(mock_config_cls, tmp_path, capsy def test_cmd_compress_no_palace(mock_config_cls, capsys): mock_config_cls.return_value.palace_path = "/fake/palace" args = argparse.Namespace(palace=None, wing=None, dry_run=False, config=None) - mock_chromadb = MagicMock() - mock_chromadb.PersistentClient.side_effect = Exception("no palace") + mock_backend = MagicMock() + mock_backend.get_collection.side_effect = Exception("no palace") with ( - patch.dict("sys.modules", {"chromadb": mock_chromadb}), + patch("mempalace.backends.chroma.ChromaBackend", return_value=mock_backend), pytest.raises(SystemExit), ): cmd_compress(args) @@ -542,13 +538,10 @@ def test_cmd_compress_no_palace(mock_config_cls, capsys): def test_cmd_compress_no_drawers(mock_config_cls, capsys): mock_config_cls.return_value.palace_path = "/fake/palace" args = argparse.Namespace(palace=None, wing="mywing", dry_run=False, config=None) - mock_chromadb = MagicMock() mock_col = MagicMock() mock_col.get.return_value = {"documents": [], "metadatas": [], "ids": []} - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col - mock_chromadb.PersistentClient.return_value = mock_client - with patch.dict("sys.modules", {"chromadb": mock_chromadb}): + mock_backend = _mock_backend_for(col=mock_col) + with patch("mempalace.backends.chroma.ChromaBackend", return_value=mock_backend): cmd_compress(args) out = capsys.readouterr().out assert "No drawers found" in out @@ -567,7 +560,6 @@ def _make_mock_dialect_module(dialect_instance): def test_cmd_compress_dry_run(mock_config_cls, capsys): mock_config_cls.return_value.palace_path = "/fake/palace" args = argparse.Namespace(palace=None, wing=None, dry_run=True, config=None) - mock_chromadb = MagicMock() mock_col = MagicMock() mock_col.get.side_effect = [ { @@ -577,9 +569,7 @@ def test_cmd_compress_dry_run(mock_config_cls, capsys): }, {"documents": [], "metadatas": [], "ids": []}, ] - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col - mock_chromadb.PersistentClient.return_value = mock_client + mock_backend = _mock_backend_for(col=mock_col) mock_dialect = MagicMock() mock_dialect.compress.return_value = "compressed" @@ -593,12 +583,9 @@ def test_cmd_compress_dry_run(mock_config_cls, capsys): } mock_dialect_mod = _make_mock_dialect_module(mock_dialect) - with patch.dict( - "sys.modules", - { - "chromadb": mock_chromadb, - "mempalace.dialect": mock_dialect_mod, - }, + with ( + patch("mempalace.backends.chroma.ChromaBackend", return_value=mock_backend), + patch.dict("sys.modules", {"mempalace.dialect": mock_dialect_mod}), ): cmd_compress(args) out = capsys.readouterr().out @@ -613,22 +600,16 @@ def test_cmd_compress_with_config(mock_config_cls, tmp_path, capsys): config_file = tmp_path / "entities.json" config_file.write_text('{"people": [], "projects": []}') args = argparse.Namespace(palace=None, wing=None, dry_run=True, config=str(config_file)) - mock_chromadb = MagicMock() mock_col = MagicMock() mock_col.get.return_value = {"documents": [], "metadatas": [], "ids": []} - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col - mock_chromadb.PersistentClient.return_value = mock_client + mock_backend = _mock_backend_for(col=mock_col) mock_dialect = MagicMock() mock_dialect_mod = _make_mock_dialect_module(mock_dialect) - with patch.dict( - "sys.modules", - { - "chromadb": mock_chromadb, - "mempalace.dialect": mock_dialect_mod, - }, + with ( + patch("mempalace.backends.chroma.ChromaBackend", return_value=mock_backend), + patch.dict("sys.modules", {"mempalace.dialect": mock_dialect_mod}), ): cmd_compress(args) out = capsys.readouterr().out @@ -640,7 +621,6 @@ def test_cmd_compress_stores_results(mock_config_cls, capsys): """Non-dry-run compress stores to mempalace_compressed collection.""" mock_config_cls.return_value.palace_path = "/fake/palace" args = argparse.Namespace(palace=None, wing=None, dry_run=False, config=None) - mock_chromadb = MagicMock() mock_col = MagicMock() mock_col.get.side_effect = [ { @@ -650,11 +630,10 @@ def test_cmd_compress_stores_results(mock_config_cls, capsys): }, {"documents": [], "metadatas": [], "ids": []}, ] - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col mock_comp_col = MagicMock() - mock_client.get_or_create_collection.return_value = mock_comp_col - mock_chromadb.PersistentClient.return_value = mock_client + mock_backend = MagicMock() + mock_backend.get_collection.return_value = mock_col + mock_backend.get_or_create_collection.return_value = mock_comp_col mock_dialect = MagicMock() mock_dialect.compress.return_value = "compressed" @@ -668,12 +647,9 @@ def test_cmd_compress_stores_results(mock_config_cls, capsys): } mock_dialect_mod = _make_mock_dialect_module(mock_dialect) - with patch.dict( - "sys.modules", - { - "chromadb": mock_chromadb, - "mempalace.dialect": mock_dialect_mod, - }, + with ( + patch("mempalace.backends.chroma.ChromaBackend", return_value=mock_backend), + patch.dict("sys.modules", {"mempalace.dialect": mock_dialect_mod}), ): cmd_compress(args) out = capsys.readouterr().out diff --git a/tests/test_dedup.py b/tests/test_dedup.py index 2ddffb3..dfdd3de 100644 --- a/tests/test_dedup.py +++ b/tests/test_dedup.py @@ -198,8 +198,15 @@ def test_dedup_source_group_query_failure_keeps(): # ── show_stats ──────────────────────────────────────────────────────── -@patch("mempalace.dedup.chromadb") -def test_show_stats(mock_chromadb, tmp_path): +def _install_mock_backend(mock_backend_cls, collection): + mock_backend = MagicMock() + mock_backend.get_collection.return_value = collection + mock_backend_cls.return_value = mock_backend + return mock_backend + + +@patch("mempalace.dedup.ChromaBackend") +def test_show_stats(mock_backend_cls, tmp_path): mock_col = MagicMock() mock_col.count.return_value = 5 mock_col.get.side_effect = [ @@ -215,9 +222,7 @@ def test_show_stats(mock_chromadb, tmp_path): }, {"ids": []}, ] - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col - mock_chromadb.PersistentClient.return_value = mock_client + _install_mock_backend(mock_backend_cls, mock_col) dedup.show_stats(palace_path=str(tmp_path)) # should not raise @@ -227,13 +232,11 @@ def test_show_stats(mock_chromadb, tmp_path): @patch("mempalace.dedup.dedup_source_group") @patch("mempalace.dedup.get_source_groups") -@patch("mempalace.dedup.chromadb") -def test_dedup_palace_dry_run(mock_chromadb, mock_groups, mock_dedup_group, tmp_path): +@patch("mempalace.dedup.ChromaBackend") +def test_dedup_palace_dry_run(mock_backend_cls, mock_groups, mock_dedup_group, tmp_path): mock_col = MagicMock() mock_col.count.return_value = 10 - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col - mock_chromadb.PersistentClient.return_value = mock_client + _install_mock_backend(mock_backend_cls, mock_col) mock_groups.return_value = {"a.txt": ["d1", "d2", "d3", "d4", "d5"]} mock_dedup_group.return_value = (["d1", "d2", "d3"], ["d4", "d5"]) @@ -244,13 +247,11 @@ def test_dedup_palace_dry_run(mock_chromadb, mock_groups, mock_dedup_group, tmp_ @patch("mempalace.dedup.dedup_source_group") @patch("mempalace.dedup.get_source_groups") -@patch("mempalace.dedup.chromadb") -def test_dedup_palace_with_wing(mock_chromadb, mock_groups, mock_dedup_group, tmp_path): +@patch("mempalace.dedup.ChromaBackend") +def test_dedup_palace_with_wing(mock_backend_cls, mock_groups, mock_dedup_group, tmp_path): mock_col = MagicMock() mock_col.count.return_value = 10 - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col - mock_chromadb.PersistentClient.return_value = mock_client + _install_mock_backend(mock_backend_cls, mock_col) mock_groups.return_value = {} dedup.dedup_palace(palace_path=str(tmp_path), wing="test_wing", dry_run=True) @@ -259,13 +260,11 @@ def test_dedup_palace_with_wing(mock_chromadb, mock_groups, mock_dedup_group, tm @patch("mempalace.dedup.dedup_source_group") @patch("mempalace.dedup.get_source_groups") -@patch("mempalace.dedup.chromadb") -def test_dedup_palace_no_groups(mock_chromadb, mock_groups, mock_dedup_group, tmp_path): +@patch("mempalace.dedup.ChromaBackend") +def test_dedup_palace_no_groups(mock_backend_cls, mock_groups, mock_dedup_group, tmp_path): mock_col = MagicMock() mock_col.count.return_value = 3 - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col - mock_chromadb.PersistentClient.return_value = mock_client + _install_mock_backend(mock_backend_cls, mock_col) mock_groups.return_value = {} dedup.dedup_palace(palace_path=str(tmp_path), dry_run=True) diff --git a/tests/test_repair.py b/tests/test_repair.py index 604b0fb..9ae1812 100644 --- a/tests/test_repair.py +++ b/tests/test_repair.py @@ -66,22 +66,28 @@ def test_paginate_ids_offset_exception_fallback(): # ── scan_palace ─────────────────────────────────────────────────────── -@patch("mempalace.repair.chromadb") -def test_scan_palace_no_ids(mock_chromadb, tmp_path): +def _install_mock_backend(mock_backend_cls, collection): + """Wire mock_backend_cls so ChromaBackend().get_collection(...) returns *collection*.""" + mock_backend = MagicMock() + mock_backend.get_collection.return_value = collection + mock_backend_cls.return_value = mock_backend + return mock_backend + + +@patch("mempalace.repair.ChromaBackend") +def test_scan_palace_no_ids(mock_backend_cls, tmp_path): mock_col = MagicMock() mock_col.count.return_value = 0 mock_col.get.return_value = {"ids": []} - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col - mock_chromadb.PersistentClient.return_value = mock_client + _install_mock_backend(mock_backend_cls, mock_col) good, bad = repair.scan_palace(palace_path=str(tmp_path)) assert good == set() assert bad == set() -@patch("mempalace.repair.chromadb") -def test_scan_palace_all_good(mock_chromadb, tmp_path): +@patch("mempalace.repair.ChromaBackend") +def test_scan_palace_all_good(mock_backend_cls, tmp_path): mock_col = MagicMock() mock_col.count.return_value = 2 # _paginate_ids call @@ -89,9 +95,7 @@ def test_scan_palace_all_good(mock_chromadb, tmp_path): {"ids": ["id1", "id2"]}, # paginate {"ids": ["id1", "id2"]}, # probe batch — both returned ] - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col - mock_chromadb.PersistentClient.return_value = mock_client + _install_mock_backend(mock_backend_cls, mock_col) good, bad = repair.scan_palace(palace_path=str(tmp_path)) assert "id1" in good @@ -99,8 +103,8 @@ def test_scan_palace_all_good(mock_chromadb, tmp_path): assert len(bad) == 0 -@patch("mempalace.repair.chromadb") -def test_scan_palace_with_bad_ids(mock_chromadb, tmp_path): +@patch("mempalace.repair.ChromaBackend") +def test_scan_palace_with_bad_ids(mock_backend_cls, tmp_path): mock_col = MagicMock() mock_col.count.return_value = 2 @@ -117,26 +121,22 @@ def test_scan_palace_with_bad_ids(mock_chromadb, tmp_path): raise Exception("batch fail") mock_col.get.side_effect = get_side_effect - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col - mock_chromadb.PersistentClient.return_value = mock_client + _install_mock_backend(mock_backend_cls, mock_col) good, bad = repair.scan_palace(palace_path=str(tmp_path)) assert "good1" in good assert "bad1" in bad -@patch("mempalace.repair.chromadb") -def test_scan_palace_with_wing_filter(mock_chromadb, tmp_path): +@patch("mempalace.repair.ChromaBackend") +def test_scan_palace_with_wing_filter(mock_backend_cls, tmp_path): mock_col = MagicMock() mock_col.count.return_value = 1 mock_col.get.side_effect = [ {"ids": ["id1"]}, # paginate {"ids": ["id1"]}, # probe ] - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col - mock_chromadb.PersistentClient.return_value = mock_client + _install_mock_backend(mock_backend_cls, mock_col) repair.scan_palace(palace_path=str(tmp_path), only_wing="test_wing") # Verify where filter was passed @@ -147,38 +147,36 @@ def test_scan_palace_with_wing_filter(mock_chromadb, tmp_path): # ── prune_corrupt ───────────────────────────────────────────────────── -@patch("mempalace.repair.chromadb") -def test_prune_corrupt_no_file(mock_chromadb, tmp_path): +@patch("mempalace.repair.ChromaBackend") +def test_prune_corrupt_no_file(mock_backend_cls, tmp_path): # Should print message and return without error repair.prune_corrupt(palace_path=str(tmp_path)) -@patch("mempalace.repair.chromadb") -def test_prune_corrupt_dry_run(mock_chromadb, tmp_path): +@patch("mempalace.repair.ChromaBackend") +def test_prune_corrupt_dry_run(mock_backend_cls, tmp_path): bad_file = tmp_path / "corrupt_ids.txt" bad_file.write_text("bad1\nbad2\n") repair.prune_corrupt(palace_path=str(tmp_path), confirm=False) - # No chromadb calls in dry run - mock_chromadb.PersistentClient.assert_not_called() + # No backend calls in dry run + mock_backend_cls.assert_not_called() -@patch("mempalace.repair.chromadb") -def test_prune_corrupt_confirmed(mock_chromadb, tmp_path): +@patch("mempalace.repair.ChromaBackend") +def test_prune_corrupt_confirmed(mock_backend_cls, tmp_path): bad_file = tmp_path / "corrupt_ids.txt" bad_file.write_text("bad1\nbad2\n") mock_col = MagicMock() mock_col.count.side_effect = [10, 8] - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col - mock_chromadb.PersistentClient.return_value = mock_client + _install_mock_backend(mock_backend_cls, mock_col) repair.prune_corrupt(palace_path=str(tmp_path), confirm=True) mock_col.delete.assert_called_once() -@patch("mempalace.repair.chromadb") -def test_prune_corrupt_delete_failure_fallback(mock_chromadb, tmp_path): +@patch("mempalace.repair.ChromaBackend") +def test_prune_corrupt_delete_failure_fallback(mock_backend_cls, tmp_path): bad_file = tmp_path / "corrupt_ids.txt" bad_file.write_text("bad1\nbad2\n") @@ -186,9 +184,7 @@ def test_prune_corrupt_delete_failure_fallback(mock_chromadb, tmp_path): mock_col.count.side_effect = [10, 8] # Batch delete fails, per-id succeeds mock_col.delete.side_effect = [Exception("batch fail"), None, None] - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col - mock_chromadb.PersistentClient.return_value = mock_client + _install_mock_backend(mock_backend_cls, mock_col) repair.prune_corrupt(palace_path=str(tmp_path), confirm=True) assert mock_col.delete.call_count == 3 # 1 batch + 2 individual @@ -197,29 +193,27 @@ def test_prune_corrupt_delete_failure_fallback(mock_chromadb, tmp_path): # ── rebuild_index ───────────────────────────────────────────────────── -@patch("mempalace.repair.chromadb") -def test_rebuild_index_no_palace(mock_chromadb, tmp_path): +@patch("mempalace.repair.ChromaBackend") +def test_rebuild_index_no_palace(mock_backend_cls, tmp_path): nonexistent = str(tmp_path / "nope") repair.rebuild_index(palace_path=nonexistent) - mock_chromadb.PersistentClient.assert_not_called() + mock_backend_cls.assert_not_called() @patch("mempalace.repair.shutil") -@patch("mempalace.repair.chromadb") -def test_rebuild_index_empty_palace(mock_chromadb, mock_shutil, tmp_path): +@patch("mempalace.repair.ChromaBackend") +def test_rebuild_index_empty_palace(mock_backend_cls, mock_shutil, tmp_path): mock_col = MagicMock() mock_col.count.return_value = 0 - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col - mock_chromadb.PersistentClient.return_value = mock_client + mock_backend = _install_mock_backend(mock_backend_cls, mock_col) repair.rebuild_index(palace_path=str(tmp_path)) - mock_client.delete_collection.assert_not_called() + mock_backend.delete_collection.assert_not_called() @patch("mempalace.repair.shutil") -@patch("mempalace.repair.chromadb") -def test_rebuild_index_success(mock_chromadb, mock_shutil, tmp_path): +@patch("mempalace.repair.ChromaBackend") +def test_rebuild_index_success(mock_backend_cls, mock_shutil, tmp_path): # Create a fake sqlite file sqlite_path = tmp_path / "chroma.sqlite3" sqlite_path.write_text("fake") @@ -233,10 +227,8 @@ def test_rebuild_index_success(mock_chromadb, mock_shutil, tmp_path): } mock_new_col = MagicMock() - mock_client = MagicMock() - mock_client.get_collection.return_value = mock_col - mock_client.create_collection.return_value = mock_new_col - mock_chromadb.PersistentClient.return_value = mock_client + mock_backend = _install_mock_backend(mock_backend_cls, mock_col) + mock_backend.create_collection.return_value = mock_new_col repair.rebuild_index(palace_path=str(tmp_path)) @@ -244,11 +236,9 @@ def test_rebuild_index_success(mock_chromadb, mock_shutil, tmp_path): mock_shutil.copy2.assert_called_once() assert "chroma.sqlite3" in str(mock_shutil.copy2.call_args) - # Verify: deleted and recreated with cosine - mock_client.delete_collection.assert_called_once_with("mempalace_drawers") - mock_client.create_collection.assert_called_once_with( - "mempalace_drawers", metadata={"hnsw:space": "cosine"} - ) + # Verify: deleted and recreated (cosine is the backend default) + mock_backend.delete_collection.assert_called_once_with(str(tmp_path), "mempalace_drawers") + mock_backend.create_collection.assert_called_once_with(str(tmp_path), "mempalace_drawers") # Verify: used upsert not add mock_new_col.upsert.assert_called_once() @@ -256,11 +246,11 @@ def test_rebuild_index_success(mock_chromadb, mock_shutil, tmp_path): @patch("mempalace.repair.shutil") -@patch("mempalace.repair.chromadb") -def test_rebuild_index_error_reading(mock_chromadb, mock_shutil, tmp_path): - mock_client = MagicMock() - mock_client.get_collection.side_effect = Exception("corrupt") - mock_chromadb.PersistentClient.return_value = mock_client +@patch("mempalace.repair.ChromaBackend") +def test_rebuild_index_error_reading(mock_backend_cls, mock_shutil, tmp_path): + mock_backend = MagicMock() + mock_backend.get_collection.side_effect = Exception("corrupt") + mock_backend_cls.return_value = mock_backend repair.rebuild_index(palace_path=str(tmp_path)) - mock_client.delete_collection.assert_not_called() + mock_backend.delete_collection.assert_not_called() diff --git a/uv.lock b/uv.lock index 413f104..f9b6dca 100644 --- a/uv.lock +++ b/uv.lock @@ -1239,7 +1239,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "autocorrect", marker = "extra == 'spellcheck'", specifier = ">=2.0" }, - { name = "chromadb", specifier = ">=0.5.0,<0.7" }, + { name = "chromadb", specifier = ">=0.5.0" }, { name = "psutil", marker = "extra == 'dev'", specifier = ">=5.9" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0" }, { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0" }, From 001700ce728c29fe9d480043da93e2fa9168e1db Mon Sep 17 00:00:00 2001 From: mvalentsev Date: Tue, 14 Apr 2026 08:56:57 +0500 Subject: [PATCH 37/42] fix: update stale org URLs in pyproject.toml and README (#787) --- README.md | 18 +++++++++--------- pyproject.toml | 6 +++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index be54ea0..b00d6ea 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ Other memory systems try to fix this by letting AI decide what's worth rememberi > > **What's still true and reproducible:** > -> - **96.6% R@5 on LongMemEval in raw mode**, on 500 questions, zero API calls — independently reproduced on M2 Ultra in under 5 minutes by [@gizmax](https://github.com/milla-jovovich/mempalace/issues/39). +> - **96.6% R@5 on LongMemEval in raw mode**, on 500 questions, zero API calls — independently reproduced on M2 Ultra in under 5 minutes by [@gizmax](https://github.com/MemPalace/mempalace/issues/39). > - Local, free, no subscription, no cloud, no data leaving your machine. > - The architecture (wings, rooms, closets, drawers) is real and useful, even if it's not a magical retrieval boost. > @@ -78,7 +78,7 @@ Other memory systems try to fix this by letting AI decide what's worth rememberi > 3. Wiring `fact_checker.py` into the KG ops so the contradiction detection claim becomes true > 4. Pinning ChromaDB to a tested range (Issue #100), fixing the shell injection in hooks (#110), and addressing the macOS ARM64 segfault (#74) > -> **Thank you to everyone who poked holes in this.** Brutal honest criticism is exactly what makes open source work, and it's what we asked for. Special thanks to [@panuhorsmalahti](https://github.com/milla-jovovich/mempalace/issues/43), [@lhl](https://github.com/milla-jovovich/mempalace/issues/27), [@gizmax](https://github.com/milla-jovovich/mempalace/issues/39), and everyone who filed an issue or a PR in the first 48 hours. We're listening, we're fixing, and we'd rather be right than impressive. +> **Thank you to everyone who poked holes in this.** Brutal honest criticism is exactly what makes open source work, and it's what we asked for. Special thanks to [@panuhorsmalahti](https://github.com/MemPalace/mempalace/issues/43), [@lhl](https://github.com/MemPalace/mempalace/issues/27), [@gizmax](https://github.com/MemPalace/mempalace/issues/39), and everyone who filed an issue or a PR in the first 48 hours. We're listening, we're fixing, and we'd rather be right than impressive. > > — *Milla Jovovich & Ben Sigman* @@ -129,7 +129,7 @@ After the one-time setup (install → init → mine), you don't run MemPalace co Native marketplace install: ```bash -claude plugin marketplace add milla-jovovich/mempalace +claude plugin marketplace add MemPalace/mempalace claude plugin install --scope user mempalace ``` @@ -251,7 +251,7 @@ You say what you're looking for and boom, it already knows which wing to go to. **Rooms** — specific topics within a wing. Auth, billing, deploy — endless rooms. **Halls** — connections between related rooms *within* the same wing. If Room A (auth) and Room B (security) are related, a hall links them. **Tunnels** — connections *between* wings. When Person A and a Project both have a room about "auth," a tunnel cross-references them automatically. -**Closets** — summaries that point to the original content. (In v3.0.0 these are plain-text summaries; AAAK-encoded closets are coming in a future update — see [Task #30](https://github.com/milla-jovovich/mempalace/issues/30).) +**Closets** — summaries that point to the original content. (In v3.0.0 these are plain-text summaries; AAAK-encoded closets are coming in a future update — see [Task #30](https://github.com/MemPalace/mempalace/issues/30).) **Drawers** — the original verbatim files. The exact words, never summarized. **Halls** are memory types — the same in every wing, acting as corridors: @@ -307,11 +307,11 @@ AAAK is a lossy abbreviation system — entity codes, structural markers, and se - **AAAK currently regresses LongMemEval** vs raw verbatim retrieval (84.2% R@5 vs 96.6%). The 96.6% headline number is from **raw mode**, not AAAK mode. - **The MemPalace storage default is raw verbatim text in ChromaDB** — that's where the benchmark wins come from. AAAK is a separate compression layer for context loading, not the storage format. -We're iterating on the dialect spec, adding a real tokenizer for stats, and exploring better break points for when to use it. Track progress in [Issue #43](https://github.com/milla-jovovich/mempalace/issues/43) and [#27](https://github.com/milla-jovovich/mempalace/issues/27). +We're iterating on the dialect spec, adding a real tokenizer for stats, and exploring better break points for when to use it. Track progress in [Issue #43](https://github.com/MemPalace/mempalace/issues/43) and [#27](https://github.com/MemPalace/mempalace/issues/27). ### Contradiction Detection (experimental, not yet wired into KG) -A separate utility (`fact_checker.py`) can check assertions against entity facts. It's not currently called automatically by the knowledge graph operations — this is being fixed (track in [Issue #27](https://github.com/milla-jovovich/mempalace/issues/27)). When enabled it catches things like: +A separate utility (`fact_checker.py`) can check assertions against entity facts. It's not currently called automatically by the knowledge graph operations — this is being fixed (track in [Issue #27](https://github.com/MemPalace/mempalace/issues/27)). When enabled it catches things like: ``` Input: "Soren finished the auth migration" @@ -463,7 +463,7 @@ Letta charges $20–200/mo for agent-managed memory. MemPalace does it with a wi ```bash # Via plugin (recommended) -claude plugin marketplace add milla-jovovich/mempalace +claude plugin marketplace add MemPalace/mempalace claude plugin install --scope user mempalace # Or manually @@ -743,10 +743,10 @@ MIT — see [LICENSE](LICENSE). [version-shield]: https://img.shields.io/badge/version-3.3.0-4dc9f6?style=flat-square&labelColor=0a0e14 -[release-link]: https://github.com/milla-jovovich/mempalace/releases +[release-link]: https://github.com/MemPalace/mempalace/releases [python-shield]: https://img.shields.io/badge/python-3.9+-7dd8f8?style=flat-square&labelColor=0a0e14&logo=python&logoColor=7dd8f8 [python-link]: https://www.python.org/ [license-shield]: https://img.shields.io/badge/license-MIT-b0e8ff?style=flat-square&labelColor=0a0e14 -[license-link]: https://github.com/milla-jovovich/mempalace/blob/main/LICENSE +[license-link]: https://github.com/MemPalace/mempalace/blob/main/LICENSE [discord-shield]: https://img.shields.io/badge/discord-join-5865F2?style=flat-square&labelColor=0a0e14&logo=discord&logoColor=5865F2 [discord-link]: https://discord.com/invite/ycTQQCu6kn diff --git a/pyproject.toml b/pyproject.toml index 2fac2f9..f3067f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,9 +30,9 @@ dependencies = [ ] [project.urls] -Homepage = "https://github.com/milla-jovovich/mempalace" -Repository = "https://github.com/milla-jovovich/mempalace" -"Bug Tracker" = "https://github.com/milla-jovovich/mempalace/issues" +Homepage = "https://github.com/MemPalace/mempalace" +Repository = "https://github.com/MemPalace/mempalace" +"Bug Tracker" = "https://github.com/MemPalace/mempalace/issues" [project.scripts] mempalace = "mempalace.cli:main" From 3415e70dc6f2b447c80dc4b00a016088f7e3222f Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Tue, 14 Apr 2026 11:32:37 -0300 Subject: [PATCH 38/42] chore: bump plugin manifests to 3.3.0 and fix owner URL Aligns marketplace.json and both plugin.json files with version.py / pyproject.toml (already at 3.3.0) so `/plugin update` reflects the v3.1.0/v3.2.0/v3.3.0 tags that had been landing without manifest bumps. Also updates marketplace.json `owner.url` from the stale github.com/milla-jovovich path to the current github.com/MemPalace org. Refs #874 --- .claude-plugin/marketplace.json | 4 ++-- .claude-plugin/plugin.json | 2 +- .codex-plugin/plugin.json | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 6b23ccf..cbe6307 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -2,14 +2,14 @@ "name": "mempalace", "owner": { "name": "milla-jovovich", - "url": "https://github.com/milla-jovovich" + "url": "https://github.com/MemPalace" }, "plugins": [ { "name": "mempalace", "source": "./.claude-plugin", "description": "AI memory system — mine projects and conversations into a searchable palace. 19 MCP tools, auto-save hooks, guided setup.", - "version": "3.0.14", + "version": "3.3.0", "author": { "name": "milla-jovovich" } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 20b2cb2..ba5759a 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "mempalace", - "version": "3.0.14", + "version": "3.3.0", "description": "Give your AI a memory — mine projects and conversations into a searchable palace. 19 MCP tools, auto-save hooks, and guided setup.", "author": { "name": "milla-jovovich" diff --git a/.codex-plugin/plugin.json b/.codex-plugin/plugin.json index 23d3ee7..9017d9b 100644 --- a/.codex-plugin/plugin.json +++ b/.codex-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "mempalace", - "version": "3.0.14", + "version": "3.3.0", "description": "Give your AI a memory — mine projects and conversations into a searchable palace. 19 MCP tools, auto-save hooks, and guided setup.", "author": { "name": "milla-jovovich" From 06240c73b3b2a4fbb00409ff41d87fd5bc33932f Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Tue, 14 Apr 2026 11:32:44 -0300 Subject: [PATCH 39/42] ci: add version guard to catch tag/manifest drift Fails a tag push if `vX.Y.Z` does not match `mempalace/version.py` (the single source of truth per CLAUDE.md), and fails PRs that touch any version file without keeping all five in sync (pyproject.toml, version.py, .claude-plugin/marketplace.json, .claude-plugin/plugin.json, .codex-plugin/plugin.json). Prevents the class of bug described in #874, where v3.1.0/v3.2.0/v3.3.0 tags all landed pointing at commits that still carried manifest version 3.0.14, blocking `/plugin update` for end users. Refs #874 --- .github/workflows/version-guard.yml | 85 +++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 .github/workflows/version-guard.yml diff --git a/.github/workflows/version-guard.yml b/.github/workflows/version-guard.yml new file mode 100644 index 0000000..ea9e730 --- /dev/null +++ b/.github/workflows/version-guard.yml @@ -0,0 +1,85 @@ +name: Version Guard + +on: + push: + tags: ['v*'] + pull_request: + paths: + - 'pyproject.toml' + - 'mempalace/version.py' + - '.claude-plugin/marketplace.json' + - '.claude-plugin/plugin.json' + - '.codex-plugin/plugin.json' + - '.github/workflows/version-guard.yml' + +jobs: + check-versions: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Extract versions from all sources + id: versions + run: | + set -euo pipefail + py_version=$(grep -E '^__version__' mempalace/version.py | cut -d'"' -f2) + pyproject_version=$(grep -E '^version' pyproject.toml | head -1 | cut -d'"' -f2) + marketplace_version=$(jq -r '.plugins[0].version' .claude-plugin/marketplace.json) + plugin_version=$(jq -r '.version' .claude-plugin/plugin.json) + codex_version=$(jq -r '.version' .codex-plugin/plugin.json) + + echo "py_version=$py_version" >> "$GITHUB_OUTPUT" + echo "pyproject_version=$pyproject_version" >> "$GITHUB_OUTPUT" + echo "marketplace_version=$marketplace_version" >> "$GITHUB_OUTPUT" + echo "plugin_version=$plugin_version" >> "$GITHUB_OUTPUT" + echo "codex_version=$codex_version" >> "$GITHUB_OUTPUT" + + { + echo "## Detected versions" + echo "" + echo "| Source | Version |" + echo "| --- | --- |" + echo "| mempalace/version.py | \`$py_version\` |" + echo "| pyproject.toml | \`$pyproject_version\` |" + echo "| .claude-plugin/marketplace.json | \`$marketplace_version\` |" + echo "| .claude-plugin/plugin.json | \`$plugin_version\` |" + echo "| .codex-plugin/plugin.json | \`$codex_version\` |" + } >> "$GITHUB_STEP_SUMMARY" + + - name: Verify all sources agree + env: + PY: ${{ steps.versions.outputs.py_version }} + PYPROJECT: ${{ steps.versions.outputs.pyproject_version }} + MARKETPLACE: ${{ steps.versions.outputs.marketplace_version }} + PLUGIN: ${{ steps.versions.outputs.plugin_version }} + CODEX: ${{ steps.versions.outputs.codex_version }} + run: | + set -euo pipefail + fail=0 + check() { + local name="$1" value="$2" expected="$3" + if [[ "$value" != "$expected" ]]; then + echo "::error file=$name::version mismatch — expected $expected, got $value" + fail=1 + fi + } + # All five must agree with each other (use version.py as the reference, per CLAUDE.md) + check "pyproject.toml" "$PYPROJECT" "$PY" + check ".claude-plugin/marketplace.json" "$MARKETPLACE" "$PY" + check ".claude-plugin/plugin.json" "$PLUGIN" "$PY" + check ".codex-plugin/plugin.json" "$CODEX" "$PY" + exit $fail + + - name: Verify tag matches manifest (tag pushes only) + if: startsWith(github.ref, 'refs/tags/v') + env: + PY: ${{ steps.versions.outputs.py_version }} + run: | + set -euo pipefail + tag_version="${GITHUB_REF_NAME#v}" + if [[ "$tag_version" != "$PY" ]]; then + echo "::error::tag $GITHUB_REF_NAME does not match manifest version $PY" + echo "Bump mempalace/version.py, pyproject.toml, and all plugin manifests before tagging." + exit 1 + fi + echo "Tag $GITHUB_REF_NAME matches manifest version $PY" From 162edf39fe431ebd9c82656fa0b497f2175f2b6c Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Tue, 14 Apr 2026 11:34:54 -0300 Subject: [PATCH 40/42] ci: let semver pre-release tags bypass strict manifest match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tags matching `vX.Y.Z-*` (e.g. v3.4.0-rc1, v1.0.0-beta.2) are treated as internal/staging builds. They skip the tag-vs-manifest check because pre-releases do not flow to end users via `/plugin update`, which reads the manifest on the default branch. Stable tags `vX.Y.Z` still require all five version sources to match exactly, so the protection against the #874 drift remains intact. The cross-file consistency check on PRs is unchanged — all manifests must still agree with mempalace/version.py whenever any version file moves. --- .github/workflows/version-guard.yml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/.github/workflows/version-guard.yml b/.github/workflows/version-guard.yml index ea9e730..9cb30fe 100644 --- a/.github/workflows/version-guard.yml +++ b/.github/workflows/version-guard.yml @@ -77,9 +77,25 @@ jobs: run: | set -euo pipefail tag_version="${GITHUB_REF_NAME#v}" + + # Semver pre-release tags (v3.4.0-rc1, v1.0.0-beta.2, ...) are treated + # as internal/staging and are not validated against the manifest. They + # do not flow to end users via `/plugin update`, which reads the + # manifest on the default branch. + if [[ "$tag_version" == *-* ]]; then + echo "Pre-release tag $GITHUB_REF_NAME — skipping strict manifest match." + { + echo "" + echo "> Pre-release tag detected: \`$GITHUB_REF_NAME\`." + echo "> Manifest ($PY) is not required to match. Pre-releases are not published via \`/plugin update\`." + } >> "$GITHUB_STEP_SUMMARY" + exit 0 + fi + if [[ "$tag_version" != "$PY" ]]; then echo "::error::tag $GITHUB_REF_NAME does not match manifest version $PY" - echo "Bump mempalace/version.py, pyproject.toml, and all plugin manifests before tagging." + echo "Bump mempalace/version.py, pyproject.toml, and all plugin manifests before tagging a stable release." + echo "For an internal/staging tag, use a semver pre-release suffix (e.g. v${PY}-rc1)." exit 1 fi echo "Tag $GITHUB_REF_NAME matches manifest version $PY" From 97a4b0e749caae09e767dfecac8cfcb4622c8049 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Tue, 14 Apr 2026 11:37:47 -0300 Subject: [PATCH 41/42] fix: ship CNAME in Pages artifact to pin custom domain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds website/public/CNAME containing `mempalaceofficial.com` so the VitePress build output always includes /CNAME in the Pages artifact. Without this, the custom-domain setting is only held in the repo's Pages API config — if it ever drifts (manual edit, org move, workflow change), the site reverts to .github.io with no record in source. Note: this does not fix the current site outage. The root cause is DNS — mempalaceofficial.com has no A/AAAA/CNAME records pointing at GitHub Pages IPs. That has to be fixed at the registrar. This commit is the belt-and-suspenders so that once DNS is back, the domain is pinned in source and the next workflow refactor can't accidentally drop it. --- website/public/CNAME | 1 + 1 file changed, 1 insertion(+) create mode 100644 website/public/CNAME diff --git a/website/public/CNAME b/website/public/CNAME new file mode 100644 index 0000000..3e353cb --- /dev/null +++ b/website/public/CNAME @@ -0,0 +1 @@ +mempalaceofficial.com From 625162edd34ae8fac126b0a1d7c059eaee8480cd Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Tue, 14 Apr 2026 11:50:00 -0300 Subject: [PATCH 42/42] docs: tighten SECURITY.md with real version policy and GHPVR-only channel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Builds on @Yorji-Porji's draft by fixing three issues before it lands: - Replace the `< 1.0.0` placeholder table with MemPalace's actual support policy: current major (3.x) receives fixes, 2.x and earlier do not. - Remove the `[Insert Maintainer Email Here]` placeholder and the email fallback. GitHub Private Vulnerability Reporting is enabled on this repo; the policy points there exclusively so there is no risk of a researcher emailing a dead address. - Drop the meta-note ("Adjust the table above…") that was an instruction to the maintainer, not policy text. Structure, triage timelines, and credit language are kept as drafted. --- SECURITY.md | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 72f7bc4..42c0238 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -2,30 +2,32 @@ ## Supported Versions -Please check the table below for the supported versions that are currently receiving security updates. +MemPalace follows semantic versioning. Security fixes land on the current major version line. -| Version | Supported | -| ------- | ------------------ | -| `main` / `develop` | :white_check_mark: | -| `< 1.0.0` | :x: | - -*(Note: Adjust the table above to reflect MemPalace's actual release cycle)* +| Version | Supported | +| ------------------ | --------- | +| 3.x (current) | Yes | +| 2.x and earlier | No | ## Reporting a Vulnerability **Please do not report security vulnerabilities through public GitHub issues.** -We take the security of MemPalace seriously. If you believe you have found a security vulnerability, please report it to us privately using one of the following methods: +We take the security of MemPalace seriously. If you believe you have found a security vulnerability, please report it privately using **GitHub Private Vulnerability Reporting**: -1. **GitHub Private Vulnerability Reporting:** Navigate to the "Security" tab in this repository, click on "Advisories," and select "Report a vulnerability." -2. **Direct Contact:** If private reporting is not enabled, please email the core maintainers directly at `[Insert Maintainer Email Here]`. +1. Open the [Security tab](https://github.com/MemPalace/mempalace/security) of this repository. +2. Click **Advisories** → **Report a vulnerability**. +3. Fill in the form with the details below. -### What to include in your report: -* A descriptive summary of the vulnerability. -* Detailed steps to reproduce the issue (including any proof-of-concept scripts or specific file paths). -* The potential impact and severity of the vulnerability. +### What to include in your report -### What to expect: -* We aim to acknowledge receipt of your vulnerability report within 48 hours. -* We will triage the issue and keep you updated on our progress toward a patch. -* Once the vulnerability is resolved and an update is released, we will publish a security advisory and credit you for the discovery (if you wish to be credited). +- A descriptive summary of the vulnerability. +- Detailed steps to reproduce the issue (including any proof-of-concept scripts or specific file paths). +- The affected version(s) and platform(s). +- The potential impact and severity. + +### What to expect + +- We aim to acknowledge receipt within 48 hours. +- We will triage the issue and keep you updated on progress toward a patch. +- Once the vulnerability is resolved and an update is released, we will publish a security advisory and credit you for the discovery (if you wish to be credited).