fix: resolve ruff lint and format errors across codebase

Fix E402 import ordering, F841 unused variable, F541 unnecessary
f-strings, F401 unused import, and auto-format 6 files.
This commit is contained in:
bensig
2026-04-04 18:37:17 -07:00
parent 0f8fa8c7d5
commit 6d8c462219
7 changed files with 333 additions and 200 deletions
+4 -5
View File
@@ -25,20 +25,19 @@ import os
import sys
import json
import shutil
import ssl
import tempfile
import argparse
import urllib.request
import ssl
# Bypass SSL for restricted environments
ssl._create_default_https_context = ssl._create_unverified_context
from pathlib import Path
from collections import defaultdict
from datetime import datetime
import chromadb
# Bypass SSL for restricted environments
ssl._create_default_https_context = ssl._create_unverified_context
sys.path.insert(0, str(Path(__file__).parent.parent))
HF_BASE = "https://huggingface.co/datasets/Salesforce/ConvoMem/resolve/main/core_benchmark/evidence_questions"
+7 -3
View File
@@ -334,15 +334,19 @@ def main():
)
p_split.add_argument("dir", help="Directory containing transcript files")
p_split.add_argument(
"--output-dir", default=None,
"--output-dir",
default=None,
help="Write split files here (default: same directory as source files)",
)
p_split.add_argument(
"--dry-run", action="store_true",
"--dry-run",
action="store_true",
help="Show what would be split without writing files",
)
p_split.add_argument(
"--min-sessions", type=int, default=2,
"--min-sessions",
type=int,
default=2,
help="Only split files containing at least N sessions (default: 2)",
)
+103 -69
View File
@@ -101,16 +101,23 @@ class KnowledgeGraph:
conn = self._conn()
conn.execute(
"INSERT OR REPLACE INTO entities (id, name, type, properties) VALUES (?, ?, ?, ?)",
(eid, name, entity_type, props)
(eid, name, entity_type, props),
)
conn.commit()
conn.close()
return eid
def add_triple(self, subject: str, predicate: str, obj: str,
valid_from: str = None, valid_to: str = None,
confidence: float = 1.0, source_closet: str = None,
source_file: str = None):
def add_triple(
self,
subject: str,
predicate: str,
obj: str,
valid_from: str = None,
valid_to: str = None,
confidence: float = 1.0,
source_closet: str = None,
source_file: str = None,
):
"""
Add a relationship triple: subject → predicate → object.
@@ -125,19 +132,13 @@ class KnowledgeGraph:
# Auto-create entities if they don't exist
conn = self._conn()
conn.execute(
"INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)",
(sub_id, subject)
)
conn.execute(
"INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)",
(obj_id, obj)
)
conn.execute("INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", (sub_id, subject))
conn.execute("INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", (obj_id, obj))
# Check for existing identical triple
existing = conn.execute(
"SELECT id FROM triples WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL",
(sub_id, pred, obj_id)
(sub_id, pred, obj_id),
).fetchone()
if existing:
@@ -149,7 +150,17 @@ class KnowledgeGraph:
conn.execute(
"""INSERT INTO triples (id, subject, predicate, object, valid_from, valid_to, confidence, source_closet, source_file)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(triple_id, sub_id, pred, obj_id, valid_from, valid_to, confidence, source_closet, source_file)
(
triple_id,
sub_id,
pred,
obj_id,
valid_from,
valid_to,
confidence,
source_closet,
source_file,
),
)
conn.commit()
conn.close()
@@ -165,7 +176,7 @@ class KnowledgeGraph:
conn = self._conn()
conn.execute(
"UPDATE triples SET valid_to=? WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL",
(ended, sub_id, pred, obj_id)
(ended, sub_id, pred, obj_id),
)
conn.commit()
conn.close()
@@ -191,17 +202,19 @@ class KnowledgeGraph:
query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)"
params.extend([as_of, as_of])
for row in conn.execute(query, params).fetchall():
results.append({
"direction": "outgoing",
"subject": name,
"predicate": row[2],
"object": row[10], # obj_name
"valid_from": row[4],
"valid_to": row[5],
"confidence": row[6],
"source_closet": row[7],
"current": row[5] is None,
})
results.append(
{
"direction": "outgoing",
"subject": name,
"predicate": row[2],
"object": row[10], # obj_name
"valid_from": row[4],
"valid_to": row[5],
"confidence": row[6],
"source_closet": row[7],
"current": row[5] is None,
}
)
if direction in ("incoming", "both"):
query = "SELECT t.*, e.name as sub_name FROM triples t JOIN entities e ON t.subject = e.id WHERE t.object = ?"
@@ -210,17 +223,19 @@ class KnowledgeGraph:
query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)"
params.extend([as_of, as_of])
for row in conn.execute(query, params).fetchall():
results.append({
"direction": "incoming",
"subject": row[10], # sub_name
"predicate": row[2],
"object": name,
"valid_from": row[4],
"valid_to": row[5],
"confidence": row[6],
"source_closet": row[7],
"current": row[5] is None,
})
results.append(
{
"direction": "incoming",
"subject": row[10], # sub_name
"predicate": row[2],
"object": name,
"valid_from": row[4],
"valid_to": row[5],
"confidence": row[6],
"source_closet": row[7],
"current": row[5] is None,
}
)
conn.close()
return results
@@ -243,14 +258,16 @@ class KnowledgeGraph:
results = []
for row in conn.execute(query, params).fetchall():
results.append({
"subject": row[10],
"predicate": pred,
"object": row[11],
"valid_from": row[4],
"valid_to": row[5],
"current": row[5] is None,
})
results.append(
{
"subject": row[10],
"predicate": pred,
"object": row[11],
"valid_from": row[4],
"valid_to": row[5],
"current": row[5] is None,
}
)
conn.close()
return results
@@ -259,14 +276,17 @@ class KnowledgeGraph:
conn = self._conn()
if entity_name:
eid = self._entity_id(entity_name)
rows = conn.execute("""
rows = conn.execute(
"""
SELECT t.*, s.name as sub_name, o.name as obj_name
FROM triples t
JOIN entities s ON t.subject = s.id
JOIN entities o ON t.object = o.id
WHERE (t.subject = ? OR t.object = ?)
ORDER BY t.valid_from ASC NULLS LAST
""", (eid, eid)).fetchall()
""",
(eid, eid),
).fetchall()
else:
rows = conn.execute("""
SELECT t.*, s.name as sub_name, o.name as obj_name
@@ -278,14 +298,17 @@ class KnowledgeGraph:
""").fetchall()
conn.close()
return [{
"subject": r[10],
"predicate": r[2],
"object": r[11],
"valid_from": r[4],
"valid_to": r[5],
"current": r[5] is None,
} for r in rows]
return [
{
"subject": r[10],
"predicate": r[2],
"object": r[11],
"valid_from": r[4],
"valid_to": r[5],
"current": r[5] is None,
}
for r in rows
]
# ── Stats ─────────────────────────────────────────────────────────────
@@ -295,9 +318,12 @@ class KnowledgeGraph:
triples = conn.execute("SELECT COUNT(*) FROM triples").fetchone()[0]
current = conn.execute("SELECT COUNT(*) FROM triples WHERE valid_to IS NULL").fetchone()[0]
expired = triples - current
predicates = [r[0] for r in conn.execute(
"SELECT DISTINCT predicate FROM triples ORDER BY predicate"
).fetchall()]
predicates = [
r[0]
for r in conn.execute(
"SELECT DISTINCT predicate FROM triples ORDER BY predicate"
).fetchall()
]
conn.close()
return {
"entities": entities,
@@ -317,16 +343,21 @@ class KnowledgeGraph:
for key, facts in entity_facts.items():
name = facts.get("full_name", key.capitalize())
etype = facts.get("type", "person")
self.add_entity(name, etype, {
"gender": facts.get("gender", ""),
"birthday": facts.get("birthday", ""),
})
self.add_entity(
name,
etype,
{
"gender": facts.get("gender", ""),
"birthday": facts.get("birthday", ""),
},
)
# Relationships
parent = facts.get("parent")
if parent:
self.add_triple(name, "child_of", parent.capitalize(),
valid_from=facts.get("birthday"))
self.add_triple(
name, "child_of", parent.capitalize(), valid_from=facts.get("birthday")
)
partner = facts.get("partner")
if partner:
@@ -334,8 +365,12 @@ class KnowledgeGraph:
relationship = facts.get("relationship", "")
if relationship == "daughter":
self.add_triple(name, "is_child_of", facts.get("parent", "").capitalize() or name,
valid_from=facts.get("birthday"))
self.add_triple(
name,
"is_child_of",
facts.get("parent", "").capitalize() or name,
valid_from=facts.get("birthday"),
)
elif relationship == "husband":
self.add_triple(name, "is_partner_of", facts.get("partner", name).capitalize())
elif relationship == "brother":
@@ -346,5 +381,4 @@ class KnowledgeGraph:
# Interests
for interest in facts.get("interests", []):
self.add_triple(name, "loves", interest.capitalize(),
valid_from="2025-01-01")
self.add_triple(name, "loves", interest.capitalize(), valid_from="2025-01-01")
+94 -40
View File
@@ -26,12 +26,12 @@ from datetime import datetime
from .config import MempalaceConfig
from .searcher import search_memories
from .palace_graph import traverse, find_tunnels, graph_stats
import chromadb
from .knowledge_graph import KnowledgeGraph
_kg = KnowledgeGraph()
import chromadb
logging.basicConfig(level=logging.INFO, format="%(message)s", stream=sys.stderr)
logger = logging.getLogger("mempalace_mcp")
@@ -312,19 +312,24 @@ def tool_kg_query(entity: str, as_of: str = None, direction: str = "both"):
return {"entity": entity, "as_of": as_of, "facts": results, "count": len(results)}
def tool_kg_add(subject: str, predicate: str, object: str,
valid_from: str = None, source_closet: str = None):
def tool_kg_add(
subject: str, predicate: str, object: str, valid_from: str = None, source_closet: str = None
):
"""Add a relationship to the knowledge graph."""
triple_id = _kg.add_triple(subject, predicate, object,
valid_from=valid_from, source_closet=source_closet)
return {"success": True, "triple_id": triple_id,
"fact": f"{subject}{predicate}{object}"}
triple_id = _kg.add_triple(
subject, predicate, object, valid_from=valid_from, source_closet=source_closet
)
return {"success": True, "triple_id": triple_id, "fact": f"{subject}{predicate}{object}"}
def tool_kg_invalidate(subject: str, predicate: str, object: str, ended: str = None):
"""Mark a fact as no longer true (set end date)."""
_kg.invalidate(subject, predicate, object, ended=ended)
return {"success": True, "fact": f"{subject}{predicate}{object}", "ended": ended or "today"}
return {
"success": True,
"fact": f"{subject}{predicate}{object}",
"ended": ended or "today",
}
def tool_kg_timeline(entity: str = None):
@@ -362,16 +367,18 @@ def tool_diary_write(agent_name: str, entry: str, topic: str = "general"):
col.add(
ids=[entry_id],
documents=[entry],
metadatas=[{
"wing": wing,
"room": room,
"hall": "hall_diary",
"topic": topic,
"type": "diary_entry",
"agent": agent_name,
"filed_at": now.isoformat(),
"date": now.strftime("%Y-%m-%d"),
}],
metadatas=[
{
"wing": wing,
"room": room,
"hall": "hall_diary",
"topic": topic,
"type": "diary_entry",
"agent": agent_name,
"filed_at": now.isoformat(),
"date": now.strftime("%Y-%m-%d"),
}
],
)
logger.info(f"Diary entry: {entry_id}{wing}/diary/{topic}")
return {
@@ -407,12 +414,14 @@ def tool_diary_read(agent_name: str, last_n: int = 10):
# Combine and sort by timestamp
entries = []
for doc, meta in zip(results["documents"], results["metadatas"]):
entries.append({
"date": meta.get("date", ""),
"timestamp": meta.get("filed_at", ""),
"topic": meta.get("topic", ""),
"content": doc,
})
entries.append(
{
"date": meta.get("date", ""),
"timestamp": meta.get("filed_at", ""),
"topic": meta.get("topic", ""),
"content": doc,
}
)
entries.sort(key=lambda x: x["timestamp"], reverse=True)
entries = entries[:last_n]
@@ -465,9 +474,18 @@ TOOLS = {
"input_schema": {
"type": "object",
"properties": {
"entity": {"type": "string", "description": "Entity to query (e.g. 'Max', 'MyProject', 'Alice')"},
"as_of": {"type": "string", "description": "Date filter — only facts valid at this date (YYYY-MM-DD, optional)"},
"direction": {"type": "string", "description": "outgoing (entity→?), incoming (?→entity), or both (default: both)"},
"entity": {
"type": "string",
"description": "Entity to query (e.g. 'Max', 'MyProject', 'Alice')",
},
"as_of": {
"type": "string",
"description": "Date filter — only facts valid at this date (YYYY-MM-DD, optional)",
},
"direction": {
"type": "string",
"description": "outgoing (entity→?), incoming (?→entity), or both (default: both)",
},
},
"required": ["entity"],
},
@@ -479,10 +497,19 @@ TOOLS = {
"type": "object",
"properties": {
"subject": {"type": "string", "description": "The entity doing/being something"},
"predicate": {"type": "string", "description": "The relationship type (e.g. 'loves', 'works_on', 'daughter_of')"},
"predicate": {
"type": "string",
"description": "The relationship type (e.g. 'loves', 'works_on', 'daughter_of')",
},
"object": {"type": "string", "description": "The entity being connected to"},
"valid_from": {"type": "string", "description": "When this became true (YYYY-MM-DD, optional)"},
"source_closet": {"type": "string", "description": "Closet ID where this fact appears (optional)"},
"valid_from": {
"type": "string",
"description": "When this became true (YYYY-MM-DD, optional)",
},
"source_closet": {
"type": "string",
"description": "Closet ID where this fact appears (optional)",
},
},
"required": ["subject", "predicate", "object"],
},
@@ -496,7 +523,10 @@ TOOLS = {
"subject": {"type": "string", "description": "Entity"},
"predicate": {"type": "string", "description": "Relationship"},
"object": {"type": "string", "description": "Connected entity"},
"ended": {"type": "string", "description": "When it stopped being true (YYYY-MM-DD, default: today)"},
"ended": {
"type": "string",
"description": "When it stopped being true (YYYY-MM-DD, default: today)",
},
},
"required": ["subject", "predicate", "object"],
},
@@ -507,7 +537,10 @@ TOOLS = {
"input_schema": {
"type": "object",
"properties": {
"entity": {"type": "string", "description": "Entity to get timeline for (optional — omit for full timeline)"},
"entity": {
"type": "string",
"description": "Entity to get timeline for (optional — omit for full timeline)",
},
},
},
"handler": tool_kg_timeline,
@@ -522,8 +555,14 @@ TOOLS = {
"input_schema": {
"type": "object",
"properties": {
"start_room": {"type": "string", "description": "Room to start from (e.g. 'chromadb-setup', 'riley-school')"},
"max_hops": {"type": "integer", "description": "How many connections to follow (default: 2)"},
"start_room": {
"type": "string",
"description": "Room to start from (e.g. 'chromadb-setup', 'riley-school')",
},
"max_hops": {
"type": "integer",
"description": "How many connections to follow (default: 2)",
},
},
"required": ["start_room"],
},
@@ -611,9 +650,18 @@ TOOLS = {
"input_schema": {
"type": "object",
"properties": {
"agent_name": {"type": "string", "description": "Your name — each agent gets their own diary wing"},
"entry": {"type": "string", "description": "Your diary entry in AAAK format — compressed, entity-coded, emotion-marked"},
"topic": {"type": "string", "description": "Topic tag (optional, default: general)"},
"agent_name": {
"type": "string",
"description": "Your name — each agent gets their own diary wing",
},
"entry": {
"type": "string",
"description": "Your diary entry in AAAK format — compressed, entity-coded, emotion-marked",
},
"topic": {
"type": "string",
"description": "Topic tag (optional, default: general)",
},
},
"required": ["agent_name", "entry"],
},
@@ -624,8 +672,14 @@ TOOLS = {
"input_schema": {
"type": "object",
"properties": {
"agent_name": {"type": "string", "description": "Your name — each agent gets their own diary wing"},
"last_n": {"type": "integer", "description": "Number of recent entries to read (default: 10)"},
"agent_name": {
"type": "string",
"description": "Your name — each agent gets their own diary wing",
},
"last_n": {
"type": "integer",
"description": "Number of recent entries to read (default: 10)",
},
},
"required": ["agent_name"],
},
+30 -21
View File
@@ -263,7 +263,9 @@ def _warn_ambiguous(people: list) -> list:
# ─────────────────────────────────────────────────────────────────────────────
def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: str, config_dir: Path = None):
def _generate_aaak_bootstrap(
people: list, projects: list, wings: list, mode: str, config_dir: Path = None
):
"""
Generate AAAK entity registry + critical facts bootstrap from onboarding data.
These files teach the AI about the user's world from session one.
@@ -292,7 +294,6 @@ def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: st
name = p["name"]
code = entity_codes[name]
rel = p.get("relationship", "")
ctx = p.get("context", "")
registry_lines.append(f" {code}={name} ({rel})" if rel else f" {code}={name}")
if projects:
@@ -301,13 +302,15 @@ def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: st
code = proj[:4].upper()
registry_lines.append(f" {code}={proj}")
registry_lines.extend([
"",
"## AAAK Quick Reference",
" Symbols: ♡=love ★=importance ⚠=warning →=relationship |=separator",
" Structure: KEY:value | GROUP(details) | entity.attribute",
" Read naturally — expand codes, treat *markers* as emotional context.",
])
registry_lines.extend(
[
"",
"## AAAK Quick Reference",
" Symbols: ♡=love ★=importance ⚠=warning →=relationship |=separator",
" Structure: KEY:value | GROUP(details) | entity.attribute",
" Read naturally — expand codes, treat *markers* as emotional context.",
]
)
(mempalace_dir / "aaak_entities.md").write_text("\n".join(registry_lines))
@@ -325,7 +328,9 @@ def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: st
for p in personal_people:
code = entity_codes[p["name"]]
rel = p.get("relationship", "")
facts_lines.append(f"- **{p['name']}** ({code}) — {rel}" if rel else f"- **{p['name']}** ({code})")
facts_lines.append(
f"- **{p['name']}** ({code}) — {rel}" if rel else f"- **{p['name']}** ({code})"
)
facts_lines.append("")
if work_people:
@@ -333,7 +338,9 @@ def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: st
for p in work_people:
code = entity_codes[p["name"]]
rel = p.get("relationship", "")
facts_lines.append(f"- **{p['name']}** ({code}) — {rel}" if rel else f"- **{p['name']}** ({code})")
facts_lines.append(
f"- **{p['name']}** ({code}) — {rel}" if rel else f"- **{p['name']}** ({code})"
)
facts_lines.append("")
if projects:
@@ -342,13 +349,15 @@ def _generate_aaak_bootstrap(people: list, projects: list, wings: list, mode: st
facts_lines.append(f"- **{proj}**")
facts_lines.append("")
facts_lines.extend([
"## Palace",
f"Wings: {', '.join(wings)}",
f"Mode: {mode}",
"",
"*This file will be enriched by palace_facts.py after mining.*",
])
facts_lines.extend(
[
"## Palace",
f"Wings: {', '.join(wings)}",
f"Mode: {mode}",
"",
"*This file will be enriched by palace_facts.py after mining.*",
]
)
(mempalace_dir / "critical_facts.md").write_text("\n".join(facts_lines))
@@ -433,9 +442,9 @@ def run_onboarding(
print(f" {registry.summary()}")
print(f"\n Wings: {', '.join(wings)}")
print(f"\n Registry saved to: {registry._path}")
print(f"\n AAAK entity registry: ~/.mempalace/aaak_entities.md")
print(f" Critical facts bootstrap: ~/.mempalace/critical_facts.md")
print(f"\n Your AI will know your world from the first session.")
print("\n AAAK entity registry: ~/.mempalace/aaak_entities.md")
print(" Critical facts bootstrap: ~/.mempalace/critical_facts.md")
print("\n Your AI will know your world from the first session.")
print()
return registry
+42 -31
View File
@@ -71,15 +71,17 @@ def build_graph(col=None, config=None):
wings = sorted(data["wings"])
if len(wings) >= 2:
for i, wa in enumerate(wings):
for wb in wings[i + 1:]:
for wb in wings[i + 1 :]:
for hall in data["halls"]:
edges.append({
"room": room,
"wing_a": wa,
"wing_b": wb,
"hall": hall,
"count": data["count"],
})
edges.append(
{
"room": room,
"wing_a": wa,
"wing_b": wb,
"hall": hall,
"count": data["count"],
}
)
# Convert sets to lists for JSON serialization
nodes = {}
@@ -104,17 +106,22 @@ def traverse(start_room: str, col=None, config=None, max_hops: int = 2):
nodes, edges = build_graph(col, config)
if start_room not in nodes:
return {"error": f"Room '{start_room}' not found", "suggestions": _fuzzy_match(start_room, nodes)}
return {
"error": f"Room '{start_room}' not found",
"suggestions": _fuzzy_match(start_room, nodes),
}
start = nodes[start_room]
visited = {start_room}
results = [{
"room": start_room,
"wings": start["wings"],
"halls": start["halls"],
"count": start["count"],
"hop": 0,
}]
results = [
{
"room": start_room,
"wings": start["wings"],
"halls": start["halls"],
"count": start["count"],
"hop": 0,
}
]
# BFS traversal
frontier = [(start_room, 0)]
@@ -133,14 +140,16 @@ def traverse(start_room: str, col=None, config=None, max_hops: int = 2):
shared_wings = current_wings & set(data["wings"])
if shared_wings:
visited.add(room)
results.append({
"room": room,
"wings": data["wings"],
"halls": data["halls"],
"count": data["count"],
"hop": depth + 1,
"connected_via": sorted(shared_wings),
})
results.append(
{
"room": room,
"wings": data["wings"],
"halls": data["halls"],
"count": data["count"],
"hop": depth + 1,
"connected_via": sorted(shared_wings),
}
)
if depth + 1 < max_hops:
frontier.append((room, depth + 1))
@@ -167,13 +176,15 @@ def find_tunnels(wing_a: str = None, wing_b: str = None, col=None, config=None):
if wing_b and wing_b not in wings:
continue
tunnels.append({
"room": room,
"wings": wings,
"halls": data["halls"],
"count": data["count"],
"recent": data["dates"][-1] if data["dates"] else "",
})
tunnels.append(
{
"room": room,
"wings": wings,
"halls": data["halls"],
"count": data["count"],
"recent": data["dates"][-1] if data["dates"] else "",
}
)
tunnels.sort(key=lambda x: -x["count"])
return tunnels[:50]
+53 -31
View File
@@ -26,16 +26,16 @@ import argparse
import json
import os
import re
import sys
from pathlib import Path
HOME = Path.home()
LUMI_DIR = Path(os.environ.get("MEMPALACE_SOURCE_DIR", str(HOME / "Desktop/transcripts")))
HOME = Path.home()
LUMI_DIR = Path(os.environ.get("MEMPALACE_SOURCE_DIR", str(HOME / "Desktop/transcripts")))
# People we know about (for name detection in content)
# Loaded from ~/.mempalace/known_names.json if it exists, otherwise generic fallback.
_KNOWN_NAMES_PATH = HOME / ".mempalace" / "known_names.json"
def _load_known_people() -> list:
"""Load known names from config file, falling back to a generic list."""
if _KNOWN_NAMES_PATH.exists():
@@ -49,6 +49,7 @@ def _load_known_people() -> list:
# Generic fallback — override by creating ~/.mempalace/known_names.json
return ["Alice", "Ben", "Riley", "Max", "Sam", "Devon", "Jordan"]
KNOWN_PEOPLE = _load_known_people()
@@ -69,7 +70,7 @@ def is_true_session_start(lines, idx):
True session start: 'Claude Code v' header NOT followed by 'Ctrl+E'/'previous messages'
within the next 6 lines (those are context restores, not new sessions).
"""
nearby = "".join(lines[idx:idx + 6])
nearby = "".join(lines[idx : idx + 6])
return "Ctrl+E" not in nearby and "previous messages" not in nearby
@@ -87,13 +88,20 @@ def extract_timestamp(lines):
Find the first timestamp line: ⏺ H:MM AM/PM Weekday, Month DD, YYYY
Returns (datetime_str, iso_str) or (None, None).
"""
ts_pattern = re.compile(
r"\s+(\d{1,2}:\d{2}\s+[AP]M)\s+\w+,\s+(\w+)\s+(\d{1,2}),\s+(\d{4})"
)
ts_pattern = re.compile(r"\s+(\d{1,2}:\d{2}\s+[AP]M)\s+\w+,\s+(\w+)\s+(\d{1,2}),\s+(\d{4})")
months = {
"January": "01", "February": "02", "March": "03", "April": "04",
"May": "05", "June": "06", "July": "07", "August": "08",
"September": "09", "October": "10", "November": "11", "December": "12",
"January": "01",
"February": "02",
"March": "03",
"April": "04",
"May": "05",
"June": "06",
"July": "07",
"August": "08",
"September": "09",
"October": "10",
"November": "11",
"December": "12",
}
for line in lines[:50]:
m = ts_pattern.search(line)
@@ -177,16 +185,16 @@ def split_file(filepath, output_dir, dry_run=False):
continue # Skip tiny fragments
ts_human, ts_iso = extract_timestamp(chunk)
people = extract_people(chunk)
subject = extract_subject(chunk)
people = extract_people(chunk)
subject = extract_subject(chunk)
# Build filename: SOURCESTEM__DATE_TIME_People_subject.txt
# Source stem prefix prevents collisions when multiple mega-files
# produce sessions with the same timestamp/people/subject.
ts_part = ts_human or f"part{i+1:02d}"
ts_part = ts_human or f"part{i + 1:02d}"
people_part = "-".join(people[:3]) if people else "unknown"
src_stem = re.sub(r"[^\w-]", "_", path.stem)[:40]
name = f"{src_stem}__{ts_part}_{people_part}_{subject}.txt"
src_stem = re.sub(r"[^\w-]", "_", path.stem)[:40]
name = f"{src_stem}__{ts_part}_{people_part}_{subject}.txt"
# Sanitize
name = re.sub(r"[^\w\.\-]", "_", name)
name = re.sub(r"_+", "_", name)
@@ -194,7 +202,7 @@ def split_file(filepath, output_dir, dry_run=False):
out_path = out_dir / name
if dry_run:
print(f" [{i+1}/{len(boundaries)-1}] {name} ({len(chunk)} lines)")
print(f" [{i + 1}/{len(boundaries) - 1}] {name} ({len(chunk)} lines)")
else:
out_path.write_text("".join(chunk))
print(f"{name} ({len(chunk)} lines)")
@@ -208,19 +216,33 @@ def main():
parser = argparse.ArgumentParser(
description="Split concatenated transcript mega-files into per-session files"
)
parser.add_argument("--source", type=str, default=None,
help="Source directory (default: MEMPALACE_SOURCE_DIR or ~/Desktop/transcripts)")
parser.add_argument("--output-dir", type=str, default=None,
help="Output directory (default: same as source)")
parser.add_argument("--min-sessions", type=int, default=2,
help="Only split files with at least N sessions (default: 2)")
parser.add_argument("--dry-run", action="store_true",
help="Show what would happen without writing files")
parser.add_argument("--file", type=str, default=None,
help="Split a single specific file instead of scanning dir")
parser.add_argument(
"--source",
type=str,
default=None,
help="Source directory (default: MEMPALACE_SOURCE_DIR or ~/Desktop/transcripts)",
)
parser.add_argument(
"--output-dir", type=str, default=None, help="Output directory (default: same as source)"
)
parser.add_argument(
"--min-sessions",
type=int,
default=2,
help="Only split files with at least N sessions (default: 2)",
)
parser.add_argument(
"--dry-run", action="store_true", help="Show what would happen without writing files"
)
parser.add_argument(
"--file",
type=str,
default=None,
help="Split a single specific file instead of scanning dir",
)
args = parser.parse_args()
src_dir = Path(args.source) if args.source else LUMI_DIR
src_dir = Path(args.source) if args.source else LUMI_DIR
output_dir = args.output_dir or None # None = same dir as file
if args.file:
@@ -239,13 +261,13 @@ def main():
print(f"No mega-files found in {src_dir} (min {args.min_sessions} sessions).")
return
print(f"\n{'='*60}")
print(f"\n{'=' * 60}")
print(f" Mega-file splitter — {'DRY RUN' if args.dry_run else 'SPLITTING'}")
print(f"{'='*60}")
print(f"{'=' * 60}")
print(f" Source: {src_dir}")
print(f" Output: {output_dir or 'same dir as source'}")
print(f" Mega-files: {len(mega_files)}")
print(f"{''*60}\n")
print(f"{'' * 60}\n")
total_written = 0
for f, n_sessions in mega_files:
@@ -260,7 +282,7 @@ def main():
else:
print()
print(f"{''*60}")
print(f"{'' * 60}")
if args.dry_run:
print(f" DRY RUN — would create {total_written} files from {len(mega_files)} mega-files")
else: