Merge pull request #119 from milla-jovovich/fix/repair-split-rooms

fix: repair command, split args, Claude export, room keywords
This commit is contained in:
Ben Sigman
2026-04-07 12:03:49 -07:00
committed by GitHub
3 changed files with 110 additions and 3 deletions
+79 -1
View File
@@ -124,7 +124,7 @@ def cmd_split(args):
import sys
# Rebuild argv for split_mega_files argparse
argv = [args.dir]
argv = ["--source", args.dir]
if args.output_dir:
argv += ["--output-dir", args.output_dir]
if args.dry_run:
@@ -147,6 +147,77 @@ def cmd_status(args):
status(palace_path=palace_path)
def cmd_repair(args):
"""Rebuild palace vector index from SQLite metadata."""
import chromadb
import shutil
palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
if not os.path.isdir(palace_path):
print(f"\n No palace found at {palace_path}")
return
print(f"\n{'=' * 55}")
print(" MemPalace Repair")
print(f"{'=' * 55}\n")
print(f" Palace: {palace_path}")
# Try to read existing drawers
try:
client = chromadb.PersistentClient(path=palace_path)
col = client.get_collection("mempalace_drawers")
total = col.count()
print(f" Drawers found: {total}")
except Exception as e:
print(f" Error reading palace: {e}")
print(" Cannot recover — palace may need to be re-mined from source files.")
return
if total == 0:
print(" Nothing to repair.")
return
# Extract all drawers in batches
print("\n Extracting drawers...")
batch_size = 5000
all_ids = []
all_docs = []
all_metas = []
offset = 0
while offset < total:
batch = col.get(limit=batch_size, offset=offset, include=["documents", "metadatas"])
all_ids.extend(batch["ids"])
all_docs.extend(batch["documents"])
all_metas.extend(batch["metadatas"])
offset += batch_size
print(f" Extracted {len(all_ids)} drawers")
# Backup and rebuild
backup_path = palace_path + ".backup"
if os.path.exists(backup_path):
shutil.rmtree(backup_path)
print(f" Backing up to {backup_path}...")
shutil.copytree(palace_path, backup_path)
print(" Rebuilding collection...")
client.delete_collection("mempalace_drawers")
new_col = client.create_collection("mempalace_drawers")
filed = 0
for i in range(0, len(all_ids), batch_size):
batch_ids = all_ids[i : i + batch_size]
batch_docs = all_docs[i : i + batch_size]
batch_metas = all_metas[i : i + batch_size]
new_col.add(documents=batch_docs, ids=batch_ids, metadatas=batch_metas)
filed += len(batch_ids)
print(f" Re-filed {filed}/{len(all_ids)} drawers...")
print(f"\n Repair complete. {filed} drawers rebuilt.")
print(f" Backup saved at {backup_path}")
print(f"\n{'=' * 55}\n")
def cmd_compress(args):
"""Compress drawers in a wing using AAAK Dialect."""
import chromadb
@@ -350,6 +421,12 @@ def main():
help="Only split files containing at least N sessions (default: 2)",
)
# repair
sub.add_parser(
"repair",
help="Rebuild palace vector index from stored data (fixes segfaults after corruption)",
)
# status
sub.add_parser("status", help="Show what's been filed")
@@ -366,6 +443,7 @@ def main():
"search": cmd_search,
"compress": cmd_compress,
"wake-up": cmd_wakeup,
"repair": cmd_repair,
"status": cmd_status,
}
dispatch[args.command](args)
+23 -1
View File
@@ -95,11 +95,33 @@ def _try_claude_code_jsonl(content: str) -> Optional[str]:
def _try_claude_ai_json(data) -> Optional[str]:
"""Claude.ai JSON export: [{"role": "user", "content": "..."}]"""
"""Claude.ai JSON export: flat messages list or privacy export with chat_messages."""
if isinstance(data, dict):
data = data.get("messages", data.get("chat_messages", []))
if not isinstance(data, list):
return None
# Privacy export: array of conversation objects with chat_messages inside each
if data and isinstance(data[0], dict) and "chat_messages" in data[0]:
all_messages = []
for convo in data:
if not isinstance(convo, dict):
continue
chat_msgs = convo.get("chat_messages", [])
for item in chat_msgs:
if not isinstance(item, dict):
continue
role = item.get("role", "")
text = _extract_content(item.get("content", ""))
if role in ("user", "human") and text:
all_messages.append(("user", text))
elif role in ("assistant", "ai") and text:
all_messages.append(("assistant", text))
if len(all_messages) >= 2:
return _messages_to_transcript(all_messages)
return None
# Flat messages list
messages = []
for item in data:
if not isinstance(item, dict):
+8 -1
View File
@@ -255,7 +255,14 @@ def get_user_approval(rooms: list) -> list:
def save_config(project_dir: str, project_name: str, rooms: list):
config = {
"wing": project_name,
"rooms": [{"name": r["name"], "description": r["description"]} for r in rooms],
"rooms": [
{
"name": r["name"],
"description": r["description"],
"keywords": r.get("keywords", [r["name"]]),
}
for r in rooms
],
}
config_path = Path(project_dir).expanduser().resolve() / "mempalace.yaml"
with open(config_path, "w") as f: