From b68485dfd4673848bcd44064a0c8764d89f6b48a Mon Sep 17 00:00:00 2001 From: Anthony Clendenen Date: Thu, 23 Apr 2026 13:33:28 -0700 Subject: [PATCH 1/2] fix(closet_llm): reject non-http(s) endpoints LLMConfig accepted any URL scheme from LLM_ENDPOINT / --endpoint, so a misconfigured endpoint such as file:///etc/passwd would be passed straight to urllib.request.urlopen. Validate the scheme at construction time and raise ValueError on anything other than http/https, preserving the "privacy by architecture" guarantee. Co-Authored-By: Claude Opus 4.7 (1M context) --- mempalace/closet_llm.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mempalace/closet_llm.py b/mempalace/closet_llm.py index 6274f79..50000c8 100644 --- a/mempalace/closet_llm.py +++ b/mempalace/closet_llm.py @@ -40,6 +40,7 @@ import json import os import re import time +import urllib.parse import urllib.request import urllib.error from datetime import datetime @@ -101,6 +102,14 @@ class LLMConfig: self.endpoint = (endpoint or os.environ.get("LLM_ENDPOINT", "")).rstrip("/") self.key = key or os.environ.get("LLM_KEY", "") self.model = model or os.environ.get("LLM_MODEL", "") + if self.endpoint: + # Privacy-by-architecture: reject file:// and other non-HTTP schemes + # so a misconfigured endpoint cannot exfiltrate local files. + scheme = urllib.parse.urlparse(self.endpoint).scheme.lower() + if scheme not in ("http", "https"): + raise ValueError( + f"LLM_ENDPOINT must use http:// or https:// (got scheme {scheme!r})" + ) def missing(self) -> list: missing = [] From ca5899e361a1bc8823145f6d1efad22f22639409 Mon Sep 17 00:00:00 2001 From: Anthony Clendenen Date: Thu, 23 Apr 2026 13:33:38 -0700 Subject: [PATCH 2/2] refactor: fix ruff bugbear and silent-except findings - B904: chain OSError/collection errors with "raise ... from e" in normalize.py and searcher.py so the original traceback is preserved. - B007: rename unused loop variables to _name in dedup, dialect, layers, and room_detector_local. - S110/S112: replace bare "try/except/pass" and "try/except/continue" with logger.debug(..., exc_info=True) in mcp_server, searcher, palace, palace_graph, miner, convo_miner, and fact_checker so background failures are observable without changing behaviour. A module-level logger ("mempalace_mcp", matching mcp_server/searcher) is added to the five files that didn't already have one. Configured ruff checks (E/F/W/C901) and ruff --select B, S110, S112 all pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- mempalace/convo_miner.py | 5 ++++- mempalace/dedup.py | 2 +- mempalace/dialect.py | 2 +- mempalace/fact_checker.py | 4 ++++ mempalace/layers.py | 2 +- mempalace/mcp_server.py | 4 ++-- mempalace/miner.py | 5 ++++- mempalace/normalize.py | 4 ++-- mempalace/palace.py | 7 +++++-- mempalace/palace_graph.py | 2 +- mempalace/room_detector_local.py | 2 +- mempalace/searcher.py | 10 ++++++---- 12 files changed, 32 insertions(+), 17 deletions(-) diff --git a/mempalace/convo_miner.py b/mempalace/convo_miner.py index 2cf57e4..915b4d1 100644 --- a/mempalace/convo_miner.py +++ b/mempalace/convo_miner.py @@ -11,6 +11,7 @@ Same palace as project mining. Different ingest strategy. import os import sys import hashlib +import logging from pathlib import Path from datetime import datetime from collections import defaultdict @@ -24,6 +25,8 @@ from .palace import ( mine_lock, ) +logger = logging.getLogger("mempalace_mcp") + # Cached hall keywords — avoids re-reading config per drawer _HALL_KEYWORDS_CACHE = None @@ -331,7 +334,7 @@ def _file_chunks_locked(collection, source_file, chunks, wing, room, agent, extr try: collection.delete(where={"source_file": source_file}) except Exception: - pass + logger.debug("Stale-drawer purge failed for %s", source_file, exc_info=True) # Batch chunks into bounded upserts so large transcripts keep most of # the embedding speedup without one huge Chroma/SQLite request. Keep diff --git a/mempalace/dedup.py b/mempalace/dedup.py index 6b1bac1..5e57aff 100644 --- a/mempalace/dedup.py +++ b/mempalace/dedup.py @@ -89,7 +89,7 @@ def dedup_source_group(col, drawer_ids, threshold=DEFAULT_THRESHOLD, dry_run=Tru kept = [] to_delete = [] - for did, doc, meta in items: + for did, doc, _meta in items: if not doc or len(doc) < 20: to_delete.append(did) continue diff --git a/mempalace/dialect.py b/mempalace/dialect.py index b72c52c..e6e214c 100644 --- a/mempalace/dialect.py +++ b/mempalace/dialect.py @@ -873,7 +873,7 @@ class Dialect: for date_key in sorted(by_date.keys()): lines.append(f"=MOMENTS[{date_key}]=") - for z, fnum in by_date[date_key]: + for z, _fnum in by_date[date_key]: entities = [] for p in z.get("people", []): code = self.encode_entity(p) diff --git a/mempalace/fact_checker.py b/mempalace/fact_checker.py index 403d913..8f1c3ba 100644 --- a/mempalace/fact_checker.py +++ b/mempalace/fact_checker.py @@ -27,6 +27,7 @@ Usage: from __future__ import annotations +import logging import os import re from datetime import datetime, timezone @@ -35,6 +36,8 @@ from datetime import datetime, timezone # ~/.mempalace/known_entities.json on every check_text call. from .miner import _load_known_entities_raw +logger = logging.getLogger("mempalace_mcp") + # Narrow detection patterns — parse "X is Y's Z" and "X's Z is Y". # Names are captured greedily as word sequences (letters + optional @@ -214,6 +217,7 @@ def _check_kg_contradictions(text: str, palace_path: str) -> list: try: facts = kg.query_entity(subject, direction="outgoing") except Exception: + logger.debug("KG lookup failed for subject %r", subject, exc_info=True) continue if not facts: continue diff --git a/mempalace/layers.py b/mempalace/layers.py index d549afe..b92890a 100644 --- a/mempalace/layers.py +++ b/mempalace/layers.py @@ -157,7 +157,7 @@ class Layer1: lines.append(room_line) total_len += len(room_line) - for imp, meta, doc in entries: + for _imp, meta, doc in entries: source = Path(meta.get("source_file", "")).name if meta.get("source_file") else "" # Truncate doc to keep L1 compact diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index 46982bb..58f9ba9 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -900,7 +900,7 @@ def tool_add_drawer( if existing and existing["ids"]: return {"success": True, "reason": "already_exists", "drawer_id": drawer_id} except Exception: - pass + logger.debug("Idempotency pre-check failed for %s", drawer_id, exc_info=True) try: col.upsert( @@ -1418,7 +1418,7 @@ def tool_hook_settings(silent_save: bool = None, desktop_toast: bool = None): try: config = MempalaceConfig() except Exception: - pass + logger.debug("Could not re-read config after update", exc_info=True) result = { "success": True, diff --git a/mempalace/miner.py b/mempalace/miner.py index ba0c630..88734c9 100644 --- a/mempalace/miner.py +++ b/mempalace/miner.py @@ -12,6 +12,7 @@ import sys import shlex import hashlib import fnmatch +import logging from pathlib import Path from datetime import datetime from collections import defaultdict @@ -31,6 +32,8 @@ from .palace import ( upsert_closet_lines, ) +logger = logging.getLogger("mempalace_mcp") + READABLE_EXTENSIONS = { ".txt", ".md", @@ -842,7 +845,7 @@ def process_file( try: collection.delete(where={"source_file": source_file}) except Exception: - pass + logger.debug("Stale-drawer purge failed for %s", source_file, exc_info=True) # Batch chunks into bounded upserts so the embedding model sees many # chunks per forward pass without building one huge Chroma/SQLite diff --git a/mempalace/normalize.py b/mempalace/normalize.py index 4252afa..ca62cca 100644 --- a/mempalace/normalize.py +++ b/mempalace/normalize.py @@ -118,14 +118,14 @@ def normalize(filepath: str) -> str: try: file_size = os.path.getsize(filepath) except OSError as e: - raise IOError(f"Could not read {filepath}: {e}") + raise IOError(f"Could not read {filepath}: {e}") from e if file_size > 500 * 1024 * 1024: # 500 MB safety limit raise IOError(f"File too large ({file_size // (1024 * 1024)} MB): {filepath}") try: with open(filepath, "r", encoding="utf-8", errors="replace") as f: content = f.read() except OSError as e: - raise IOError(f"Could not read {filepath}: {e}") + raise IOError(f"Could not read {filepath}: {e}") from e if not content.strip(): return content diff --git a/mempalace/palace.py b/mempalace/palace.py index 97f67ff..e5f6411 100644 --- a/mempalace/palace.py +++ b/mempalace/palace.py @@ -6,12 +6,15 @@ Consolidates collection access patterns used by both miners and the MCP server. import contextlib import hashlib +import logging import os import re import threading from .backends.chroma import ChromaBackend +logger = logging.getLogger("mempalace_mcp") + SKIP_DIRS = { ".git", "node_modules", @@ -229,7 +232,7 @@ def purge_file_closets(closets_col, source_file: str) -> None: try: closets_col.delete(where={"source_file": source_file}) except Exception: - pass + logger.debug("Closet purge failed for %s", source_file, exc_info=True) def upsert_closet_lines(closets_col, closet_id_base, lines, metadata): @@ -307,7 +310,7 @@ def mine_lock(source_file: str): fcntl.flock(lf, fcntl.LOCK_UN) except Exception: - pass + logger.debug("Mine-lock release failed", exc_info=True) lf.close() diff --git a/mempalace/palace_graph.py b/mempalace/palace_graph.py index 3296cd5..0fff763 100644 --- a/mempalace/palace_graph.py +++ b/mempalace/palace_graph.py @@ -575,7 +575,7 @@ def follow_tunnels(wing: str, room: str, col=None, config=None): if did and did in drawer_map: c["drawer_preview"] = drawer_map[did][:300] except Exception: - pass + logger.debug("Drawer preview hydration failed", exc_info=True) return connections diff --git a/mempalace/room_detector_local.py b/mempalace/room_detector_local.py index 31d5b05..8e3fc20 100644 --- a/mempalace/room_detector_local.py +++ b/mempalace/room_detector_local.py @@ -202,7 +202,7 @@ def detect_rooms_from_files(project_dir: str) -> list: SKIP_DIRS = {".git", "node_modules", "__pycache__", ".venv", "venv", "dist", "build"} - for root, dirs, filenames in os.walk(project_path): + for _root, dirs, filenames in os.walk(project_path): dirs[:] = [d for d in dirs if d not in SKIP_DIRS] for filename in filenames: name_lower = filename.lower().replace("-", "_").replace(" ", "_") diff --git a/mempalace/searcher.py b/mempalace/searcher.py index ddddc46..536610e 100644 --- a/mempalace/searcher.py +++ b/mempalace/searcher.py @@ -245,7 +245,7 @@ def _expand_with_neighbors(drawers_col, matched_doc: str, matched_meta: dict, ra all_meta = drawers_col.get(where={"source_file": src}, include=["metadatas"]) total_drawers = len(all_meta.ids) if all_meta.ids else None except Exception: - pass + logger.debug("total_drawers lookup failed for %s", src, exc_info=True) return { "text": combined_text, @@ -297,10 +297,10 @@ def search(query: str, palace_path: str, wing: str = None, room: str = None, n_r """ try: col = get_collection(palace_path, create=False) - except Exception: + except Exception as e: print(f"\n No palace found at {palace_path}") print(" Run: mempalace init then mempalace mine ") - raise SearchError(f"No palace found at {palace_path}") + raise SearchError(f"No palace found at {palace_path}") from e # Alert the user if this palace predates hnsw:space=cosine being set on # creation — their similarity scores will be junk until they run repair. @@ -795,7 +795,8 @@ def search_memories( if source and source not in closet_boost_by_source: closet_boost_by_source[source] = (rank, cdist, cdoc[:200]) except Exception: - pass # no closets yet — hybrid degrades to pure drawer search + # No closets yet — hybrid degrades to pure drawer search. + logger.debug("Closet collection unavailable; using drawer-only search", exc_info=True) # Rank-based boost. The ordinal signal ("which closet matched best") is # more reliable than absolute distance on narrative content, where @@ -877,6 +878,7 @@ def search_memories( include=["documents", "metadatas"], ) except Exception: + logger.debug("Neighbor fetch failed for %s", full_source, exc_info=True) continue docs = source_drawers.documents metas_ = source_drawers.metadatas