fix: repair command, split args, Claude export, room keywords
- Add `mempalace repair` command to rebuild vector index from SQLite when HNSW files are corrupted after crash/interrupt (fixes #74, #72, #96) - Fix split command passing dir as positional instead of --source flag to split_mega_files (fixes #63) - Handle Claude privacy export format (array of conversation objects with chat_messages inside each) in normalize.py (fixes #63) - Persist room keywords in mempalace.yaml so mine can match files in docs/ to room "documentation" (fixes #108)
This commit is contained in:
+79
-1
@@ -124,7 +124,7 @@ def cmd_split(args):
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
# Rebuild argv for split_mega_files argparse
|
# Rebuild argv for split_mega_files argparse
|
||||||
argv = [args.dir]
|
argv = ["--source", args.dir]
|
||||||
if args.output_dir:
|
if args.output_dir:
|
||||||
argv += ["--output-dir", args.output_dir]
|
argv += ["--output-dir", args.output_dir]
|
||||||
if args.dry_run:
|
if args.dry_run:
|
||||||
@@ -147,6 +147,77 @@ def cmd_status(args):
|
|||||||
status(palace_path=palace_path)
|
status(palace_path=palace_path)
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_repair(args):
|
||||||
|
"""Rebuild palace vector index from SQLite metadata."""
|
||||||
|
import chromadb
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
|
||||||
|
|
||||||
|
if not os.path.isdir(palace_path):
|
||||||
|
print(f"\n No palace found at {palace_path}")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"\n{'=' * 55}")
|
||||||
|
print(" MemPalace Repair")
|
||||||
|
print(f"{'=' * 55}\n")
|
||||||
|
print(f" Palace: {palace_path}")
|
||||||
|
|
||||||
|
# Try to read existing drawers
|
||||||
|
try:
|
||||||
|
client = chromadb.PersistentClient(path=palace_path)
|
||||||
|
col = client.get_collection("mempalace_drawers")
|
||||||
|
total = col.count()
|
||||||
|
print(f" Drawers found: {total}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Error reading palace: {e}")
|
||||||
|
print(" Cannot recover — palace may need to be re-mined from source files.")
|
||||||
|
return
|
||||||
|
|
||||||
|
if total == 0:
|
||||||
|
print(" Nothing to repair.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Extract all drawers in batches
|
||||||
|
print("\n Extracting drawers...")
|
||||||
|
batch_size = 5000
|
||||||
|
all_ids = []
|
||||||
|
all_docs = []
|
||||||
|
all_metas = []
|
||||||
|
offset = 0
|
||||||
|
while offset < total:
|
||||||
|
batch = col.get(limit=batch_size, offset=offset, include=["documents", "metadatas"])
|
||||||
|
all_ids.extend(batch["ids"])
|
||||||
|
all_docs.extend(batch["documents"])
|
||||||
|
all_metas.extend(batch["metadatas"])
|
||||||
|
offset += batch_size
|
||||||
|
print(f" Extracted {len(all_ids)} drawers")
|
||||||
|
|
||||||
|
# Backup and rebuild
|
||||||
|
backup_path = palace_path + ".backup"
|
||||||
|
if os.path.exists(backup_path):
|
||||||
|
shutil.rmtree(backup_path)
|
||||||
|
print(f" Backing up to {backup_path}...")
|
||||||
|
shutil.copytree(palace_path, backup_path)
|
||||||
|
|
||||||
|
print(" Rebuilding collection...")
|
||||||
|
client.delete_collection("mempalace_drawers")
|
||||||
|
new_col = client.create_collection("mempalace_drawers")
|
||||||
|
|
||||||
|
filed = 0
|
||||||
|
for i in range(0, len(all_ids), batch_size):
|
||||||
|
batch_ids = all_ids[i : i + batch_size]
|
||||||
|
batch_docs = all_docs[i : i + batch_size]
|
||||||
|
batch_metas = all_metas[i : i + batch_size]
|
||||||
|
new_col.add(documents=batch_docs, ids=batch_ids, metadatas=batch_metas)
|
||||||
|
filed += len(batch_ids)
|
||||||
|
print(f" Re-filed {filed}/{len(all_ids)} drawers...")
|
||||||
|
|
||||||
|
print(f"\n Repair complete. {filed} drawers rebuilt.")
|
||||||
|
print(f" Backup saved at {backup_path}")
|
||||||
|
print(f"\n{'=' * 55}\n")
|
||||||
|
|
||||||
|
|
||||||
def cmd_compress(args):
|
def cmd_compress(args):
|
||||||
"""Compress drawers in a wing using AAAK Dialect."""
|
"""Compress drawers in a wing using AAAK Dialect."""
|
||||||
import chromadb
|
import chromadb
|
||||||
@@ -350,6 +421,12 @@ def main():
|
|||||||
help="Only split files containing at least N sessions (default: 2)",
|
help="Only split files containing at least N sessions (default: 2)",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# repair
|
||||||
|
sub.add_parser(
|
||||||
|
"repair",
|
||||||
|
help="Rebuild palace vector index from stored data (fixes segfaults after corruption)",
|
||||||
|
)
|
||||||
|
|
||||||
# status
|
# status
|
||||||
sub.add_parser("status", help="Show what's been filed")
|
sub.add_parser("status", help="Show what's been filed")
|
||||||
|
|
||||||
@@ -366,6 +443,7 @@ def main():
|
|||||||
"search": cmd_search,
|
"search": cmd_search,
|
||||||
"compress": cmd_compress,
|
"compress": cmd_compress,
|
||||||
"wake-up": cmd_wakeup,
|
"wake-up": cmd_wakeup,
|
||||||
|
"repair": cmd_repair,
|
||||||
"status": cmd_status,
|
"status": cmd_status,
|
||||||
}
|
}
|
||||||
dispatch[args.command](args)
|
dispatch[args.command](args)
|
||||||
|
|||||||
+23
-1
@@ -95,11 +95,33 @@ def _try_claude_code_jsonl(content: str) -> Optional[str]:
|
|||||||
|
|
||||||
|
|
||||||
def _try_claude_ai_json(data) -> Optional[str]:
|
def _try_claude_ai_json(data) -> Optional[str]:
|
||||||
"""Claude.ai JSON export: [{"role": "user", "content": "..."}]"""
|
"""Claude.ai JSON export: flat messages list or privacy export with chat_messages."""
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
data = data.get("messages", data.get("chat_messages", []))
|
data = data.get("messages", data.get("chat_messages", []))
|
||||||
if not isinstance(data, list):
|
if not isinstance(data, list):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# Privacy export: array of conversation objects with chat_messages inside each
|
||||||
|
if data and isinstance(data[0], dict) and "chat_messages" in data[0]:
|
||||||
|
all_messages = []
|
||||||
|
for convo in data:
|
||||||
|
if not isinstance(convo, dict):
|
||||||
|
continue
|
||||||
|
chat_msgs = convo.get("chat_messages", [])
|
||||||
|
for item in chat_msgs:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
role = item.get("role", "")
|
||||||
|
text = _extract_content(item.get("content", ""))
|
||||||
|
if role in ("user", "human") and text:
|
||||||
|
all_messages.append(("user", text))
|
||||||
|
elif role in ("assistant", "ai") and text:
|
||||||
|
all_messages.append(("assistant", text))
|
||||||
|
if len(all_messages) >= 2:
|
||||||
|
return _messages_to_transcript(all_messages)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Flat messages list
|
||||||
messages = []
|
messages = []
|
||||||
for item in data:
|
for item in data:
|
||||||
if not isinstance(item, dict):
|
if not isinstance(item, dict):
|
||||||
|
|||||||
@@ -255,7 +255,14 @@ def get_user_approval(rooms: list) -> list:
|
|||||||
def save_config(project_dir: str, project_name: str, rooms: list):
|
def save_config(project_dir: str, project_name: str, rooms: list):
|
||||||
config = {
|
config = {
|
||||||
"wing": project_name,
|
"wing": project_name,
|
||||||
"rooms": [{"name": r["name"], "description": r["description"]} for r in rooms],
|
"rooms": [
|
||||||
|
{
|
||||||
|
"name": r["name"],
|
||||||
|
"description": r["description"],
|
||||||
|
"keywords": r.get("keywords", [r["name"]]),
|
||||||
|
}
|
||||||
|
for r in rooms
|
||||||
|
],
|
||||||
}
|
}
|
||||||
config_path = Path(project_dir).expanduser().resolve() / "mempalace.yaml"
|
config_path = Path(project_dir).expanduser().resolve() / "mempalace.yaml"
|
||||||
with open(config_path, "w") as f:
|
with open(config_path, "w") as f:
|
||||||
|
|||||||
Reference in New Issue
Block a user