Merge pull request #119 from milla-jovovich/fix/repair-split-rooms
fix: repair command, split args, Claude export, room keywords
This commit is contained in:
+79
-1
@@ -124,7 +124,7 @@ def cmd_split(args):
|
||||
import sys
|
||||
|
||||
# Rebuild argv for split_mega_files argparse
|
||||
argv = [args.dir]
|
||||
argv = ["--source", args.dir]
|
||||
if args.output_dir:
|
||||
argv += ["--output-dir", args.output_dir]
|
||||
if args.dry_run:
|
||||
@@ -147,6 +147,77 @@ def cmd_status(args):
|
||||
status(palace_path=palace_path)
|
||||
|
||||
|
||||
def cmd_repair(args):
|
||||
"""Rebuild palace vector index from SQLite metadata."""
|
||||
import chromadb
|
||||
import shutil
|
||||
|
||||
palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
|
||||
|
||||
if not os.path.isdir(palace_path):
|
||||
print(f"\n No palace found at {palace_path}")
|
||||
return
|
||||
|
||||
print(f"\n{'=' * 55}")
|
||||
print(" MemPalace Repair")
|
||||
print(f"{'=' * 55}\n")
|
||||
print(f" Palace: {palace_path}")
|
||||
|
||||
# Try to read existing drawers
|
||||
try:
|
||||
client = chromadb.PersistentClient(path=palace_path)
|
||||
col = client.get_collection("mempalace_drawers")
|
||||
total = col.count()
|
||||
print(f" Drawers found: {total}")
|
||||
except Exception as e:
|
||||
print(f" Error reading palace: {e}")
|
||||
print(" Cannot recover — palace may need to be re-mined from source files.")
|
||||
return
|
||||
|
||||
if total == 0:
|
||||
print(" Nothing to repair.")
|
||||
return
|
||||
|
||||
# Extract all drawers in batches
|
||||
print("\n Extracting drawers...")
|
||||
batch_size = 5000
|
||||
all_ids = []
|
||||
all_docs = []
|
||||
all_metas = []
|
||||
offset = 0
|
||||
while offset < total:
|
||||
batch = col.get(limit=batch_size, offset=offset, include=["documents", "metadatas"])
|
||||
all_ids.extend(batch["ids"])
|
||||
all_docs.extend(batch["documents"])
|
||||
all_metas.extend(batch["metadatas"])
|
||||
offset += batch_size
|
||||
print(f" Extracted {len(all_ids)} drawers")
|
||||
|
||||
# Backup and rebuild
|
||||
backup_path = palace_path + ".backup"
|
||||
if os.path.exists(backup_path):
|
||||
shutil.rmtree(backup_path)
|
||||
print(f" Backing up to {backup_path}...")
|
||||
shutil.copytree(palace_path, backup_path)
|
||||
|
||||
print(" Rebuilding collection...")
|
||||
client.delete_collection("mempalace_drawers")
|
||||
new_col = client.create_collection("mempalace_drawers")
|
||||
|
||||
filed = 0
|
||||
for i in range(0, len(all_ids), batch_size):
|
||||
batch_ids = all_ids[i : i + batch_size]
|
||||
batch_docs = all_docs[i : i + batch_size]
|
||||
batch_metas = all_metas[i : i + batch_size]
|
||||
new_col.add(documents=batch_docs, ids=batch_ids, metadatas=batch_metas)
|
||||
filed += len(batch_ids)
|
||||
print(f" Re-filed {filed}/{len(all_ids)} drawers...")
|
||||
|
||||
print(f"\n Repair complete. {filed} drawers rebuilt.")
|
||||
print(f" Backup saved at {backup_path}")
|
||||
print(f"\n{'=' * 55}\n")
|
||||
|
||||
|
||||
def cmd_compress(args):
|
||||
"""Compress drawers in a wing using AAAK Dialect."""
|
||||
import chromadb
|
||||
@@ -350,6 +421,12 @@ def main():
|
||||
help="Only split files containing at least N sessions (default: 2)",
|
||||
)
|
||||
|
||||
# repair
|
||||
sub.add_parser(
|
||||
"repair",
|
||||
help="Rebuild palace vector index from stored data (fixes segfaults after corruption)",
|
||||
)
|
||||
|
||||
# status
|
||||
sub.add_parser("status", help="Show what's been filed")
|
||||
|
||||
@@ -366,6 +443,7 @@ def main():
|
||||
"search": cmd_search,
|
||||
"compress": cmd_compress,
|
||||
"wake-up": cmd_wakeup,
|
||||
"repair": cmd_repair,
|
||||
"status": cmd_status,
|
||||
}
|
||||
dispatch[args.command](args)
|
||||
|
||||
+23
-1
@@ -95,11 +95,33 @@ def _try_claude_code_jsonl(content: str) -> Optional[str]:
|
||||
|
||||
|
||||
def _try_claude_ai_json(data) -> Optional[str]:
|
||||
"""Claude.ai JSON export: [{"role": "user", "content": "..."}]"""
|
||||
"""Claude.ai JSON export: flat messages list or privacy export with chat_messages."""
|
||||
if isinstance(data, dict):
|
||||
data = data.get("messages", data.get("chat_messages", []))
|
||||
if not isinstance(data, list):
|
||||
return None
|
||||
|
||||
# Privacy export: array of conversation objects with chat_messages inside each
|
||||
if data and isinstance(data[0], dict) and "chat_messages" in data[0]:
|
||||
all_messages = []
|
||||
for convo in data:
|
||||
if not isinstance(convo, dict):
|
||||
continue
|
||||
chat_msgs = convo.get("chat_messages", [])
|
||||
for item in chat_msgs:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
role = item.get("role", "")
|
||||
text = _extract_content(item.get("content", ""))
|
||||
if role in ("user", "human") and text:
|
||||
all_messages.append(("user", text))
|
||||
elif role in ("assistant", "ai") and text:
|
||||
all_messages.append(("assistant", text))
|
||||
if len(all_messages) >= 2:
|
||||
return _messages_to_transcript(all_messages)
|
||||
return None
|
||||
|
||||
# Flat messages list
|
||||
messages = []
|
||||
for item in data:
|
||||
if not isinstance(item, dict):
|
||||
|
||||
@@ -255,7 +255,14 @@ def get_user_approval(rooms: list) -> list:
|
||||
def save_config(project_dir: str, project_name: str, rooms: list):
|
||||
config = {
|
||||
"wing": project_name,
|
||||
"rooms": [{"name": r["name"], "description": r["description"]} for r in rooms],
|
||||
"rooms": [
|
||||
{
|
||||
"name": r["name"],
|
||||
"description": r["description"],
|
||||
"keywords": r.get("keywords", [r["name"]]),
|
||||
}
|
||||
for r in rooms
|
||||
],
|
||||
}
|
||||
config_path = Path(project_dir).expanduser().resolve() / "mempalace.yaml"
|
||||
with open(config_path, "w") as f:
|
||||
|
||||
Reference in New Issue
Block a user