Files
mempalace/mempalace/palace.py
T
Igor Lins e Silva 28e263748b merge: develop (#784 file-locking, #820 version sync)
Non-trivial merge in convo_miner.py: this branch's _file_convo_chunks
(purge stale + upsert with normalize_version) and develop's
_file_chunks_locked (mine_lock + double-checked file_already_mined)
both touched the same critical section. Combined into a single
_file_chunks_locked helper that does lock → double-check → purge →
upsert, preserving both the multi-agent safety guarantee from #784
and the schema-rebuild contract from this PR.

Also folds develop's mine_lock import into both miner.py and
convo_miner.py alongside NORMALIZE_VERSION.

707/707 tests pass, ruff + format clean under CI-pinned 0.4.x.
2026-04-13 16:29:50 -03:00

135 lines
3.9 KiB
Python

"""
palace.py — Shared palace operations.
Consolidates collection access patterns used by both miners and the MCP server.
"""
import contextlib
import hashlib
import os
from .backends.chroma import ChromaBackend
SKIP_DIRS = {
".git",
"node_modules",
"__pycache__",
".venv",
"venv",
"env",
"dist",
"build",
".next",
"coverage",
".mempalace",
".ruff_cache",
".mypy_cache",
".pytest_cache",
".cache",
".tox",
".nox",
".idea",
".vscode",
".ipynb_checkpoints",
".eggs",
"htmlcov",
"target",
}
_DEFAULT_BACKEND = ChromaBackend()
# Schema version for drawer normalization. Bump when the normalization
# pipeline changes in a way that existing drawers should be rebuilt to pick up
# (e.g., new noise-stripping rules). `file_already_mined` treats drawers with
# a missing or stale `normalize_version` as "not mined", so the next mine pass
# silently rebuilds them — users don't need to manually erase + re-mine.
#
# v2 (2026-04): introduced strip_noise() for Claude Code JSONL; previous
# drawers stored system tags / hook chrome verbatim.
NORMALIZE_VERSION = 2
def get_collection(
palace_path: str,
collection_name: str = "mempalace_drawers",
create: bool = True,
):
"""Get the palace collection through the backend layer."""
return _DEFAULT_BACKEND.get_collection(
palace_path,
collection_name=collection_name,
create=create,
)
@contextlib.contextmanager
def mine_lock(source_file: str):
"""Cross-platform file lock for mine operations.
Prevents multiple agents from mining the same file simultaneously,
which causes duplicate drawers when the delete+insert cycle interleaves.
"""
lock_dir = os.path.join(os.path.expanduser("~"), ".mempalace", "locks")
os.makedirs(lock_dir, exist_ok=True)
lock_path = os.path.join(
lock_dir, hashlib.sha256(source_file.encode()).hexdigest()[:16] + ".lock"
)
lf = open(lock_path, "w")
try:
if os.name == "nt":
import msvcrt
msvcrt.locking(lf.fileno(), msvcrt.LK_LOCK, 1)
else:
import fcntl
fcntl.flock(lf, fcntl.LOCK_EX)
yield
finally:
try:
if os.name == "nt":
import msvcrt
msvcrt.locking(lf.fileno(), msvcrt.LK_UNLCK, 1)
else:
import fcntl
fcntl.flock(lf, fcntl.LOCK_UN)
except Exception:
pass
lf.close()
def file_already_mined(collection, source_file: str, check_mtime: bool = False) -> bool:
"""Check if a file has already been filed in the palace.
Returns False (so the file gets re-mined) when:
- no drawers exist for this source_file
- the stored `normalize_version` is missing or older than the current
schema (triggers silent rebuild after a normalization upgrade)
- `check_mtime=True` and the file's mtime differs from the stored one
When check_mtime=True (used by project miner), also re-mines on content
change. When check_mtime=False (used by convo miner), transcripts are
assumed immutable, so only the version gate triggers a rebuild.
"""
try:
results = collection.get(where={"source_file": source_file}, limit=1)
if not results.get("ids"):
return False
stored_meta = results.get("metadatas", [{}])[0] or {}
# Pre-v2 drawers have no version field — treat them as stale.
stored_version = stored_meta.get("normalize_version", 1)
if stored_version < NORMALIZE_VERSION:
return False
if check_mtime:
stored_mtime = stored_meta.get("source_mtime")
if stored_mtime is None:
return False
current_mtime = os.path.getmtime(source_file)
return abs(float(stored_mtime) - current_mtime) < 0.001
return True
except Exception:
return False