58eca5075a
Addresses findings from security audit (ref #401): inconsistent sanitization across MCP tools, unfiltered argument dispatch allowing audit trail spoofing, SQLite concurrency issues, WAL permission race condition, sensitive content in logs, and internal error leakage to MCP callers. Co-authored-by: Israel Domínguez <isra@MacBook-Pro-de-Israel.local> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
402 lines
16 KiB
Python
402 lines
16 KiB
Python
"""
|
|
knowledge_graph.py — Temporal Entity-Relationship Graph for MemPalace
|
|
=====================================================================
|
|
|
|
Real knowledge graph with:
|
|
- Entity nodes (people, projects, tools, concepts)
|
|
- Typed relationship edges (daughter_of, does, loves, works_on, etc.)
|
|
- Temporal validity (valid_from → valid_to — knows WHEN facts are true)
|
|
- Closet references (links back to the verbatim memory)
|
|
|
|
Storage: SQLite (local, no dependencies, no subscriptions)
|
|
Query: entity-first traversal with time filtering
|
|
|
|
This is what competes with Zep's temporal knowledge graph.
|
|
Zep uses Neo4j in the cloud ($25/mo+). We use SQLite locally (free).
|
|
|
|
Usage:
|
|
from mempalace.knowledge_graph import KnowledgeGraph
|
|
|
|
kg = KnowledgeGraph()
|
|
kg.add_triple("Max", "child_of", "Alice", valid_from="2015-04-01")
|
|
kg.add_triple("Max", "does", "swimming", valid_from="2025-01-01")
|
|
kg.add_triple("Max", "loves", "chess", valid_from="2025-10-01")
|
|
|
|
# Query: everything about Max
|
|
kg.query_entity("Max")
|
|
|
|
# Query: what was true about Max in January 2026?
|
|
kg.query_entity("Max", as_of="2026-01-15")
|
|
|
|
# Query: who is connected to Alice?
|
|
kg.query_entity("Alice", direction="both")
|
|
|
|
# Invalidate: Max's sports injury resolved
|
|
kg.invalidate("Max", "has_issue", "sports_injury", ended="2026-02-15")
|
|
"""
|
|
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import sqlite3
|
|
import threading
|
|
from datetime import date, datetime
|
|
from pathlib import Path
|
|
|
|
|
|
DEFAULT_KG_PATH = os.path.expanduser("~/.mempalace/knowledge_graph.sqlite3")
|
|
|
|
|
|
class KnowledgeGraph:
|
|
def __init__(self, db_path: str = None):
|
|
self.db_path = db_path or DEFAULT_KG_PATH
|
|
Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
|
|
self._connection = None
|
|
self._lock = threading.Lock()
|
|
self._init_db()
|
|
|
|
def _init_db(self):
|
|
conn = self._conn()
|
|
conn.executescript("""
|
|
PRAGMA journal_mode=WAL;
|
|
|
|
CREATE TABLE IF NOT EXISTS entities (
|
|
id TEXT PRIMARY KEY,
|
|
name TEXT NOT NULL,
|
|
type TEXT DEFAULT 'unknown',
|
|
properties TEXT DEFAULT '{}',
|
|
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS triples (
|
|
id TEXT PRIMARY KEY,
|
|
subject TEXT NOT NULL,
|
|
predicate TEXT NOT NULL,
|
|
object TEXT NOT NULL,
|
|
valid_from TEXT,
|
|
valid_to TEXT,
|
|
confidence REAL DEFAULT 1.0,
|
|
source_closet TEXT,
|
|
source_file TEXT,
|
|
extracted_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (subject) REFERENCES entities(id),
|
|
FOREIGN KEY (object) REFERENCES entities(id)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_triples_subject ON triples(subject);
|
|
CREATE INDEX IF NOT EXISTS idx_triples_object ON triples(object);
|
|
CREATE INDEX IF NOT EXISTS idx_triples_predicate ON triples(predicate);
|
|
CREATE INDEX IF NOT EXISTS idx_triples_valid ON triples(valid_from, valid_to);
|
|
""")
|
|
conn.commit()
|
|
|
|
def _conn(self):
|
|
if self._connection is None:
|
|
self._connection = sqlite3.connect(self.db_path, timeout=10, check_same_thread=False)
|
|
self._connection.execute("PRAGMA journal_mode=WAL")
|
|
self._connection.row_factory = sqlite3.Row
|
|
return self._connection
|
|
|
|
def close(self):
|
|
"""Close the database connection."""
|
|
if self._connection is not None:
|
|
self._connection.close()
|
|
self._connection = None
|
|
|
|
def _entity_id(self, name: str) -> str:
|
|
return name.lower().replace(" ", "_").replace("'", "")
|
|
|
|
# ── Write operations ──────────────────────────────────────────────────
|
|
|
|
def add_entity(self, name: str, entity_type: str = "unknown", properties: dict = None):
|
|
"""Add or update an entity node."""
|
|
eid = self._entity_id(name)
|
|
props = json.dumps(properties or {})
|
|
with self._lock:
|
|
conn = self._conn()
|
|
with conn:
|
|
conn.execute(
|
|
"INSERT OR REPLACE INTO entities (id, name, type, properties) VALUES (?, ?, ?, ?)",
|
|
(eid, name, entity_type, props),
|
|
)
|
|
return eid
|
|
|
|
def add_triple(
|
|
self,
|
|
subject: str,
|
|
predicate: str,
|
|
obj: str,
|
|
valid_from: str = None,
|
|
valid_to: str = None,
|
|
confidence: float = 1.0,
|
|
source_closet: str = None,
|
|
source_file: str = None,
|
|
):
|
|
"""
|
|
Add a relationship triple: subject → predicate → object.
|
|
|
|
Examples:
|
|
add_triple("Max", "child_of", "Alice", valid_from="2015-04-01")
|
|
add_triple("Max", "does", "swimming", valid_from="2025-01-01")
|
|
add_triple("Alice", "worried_about", "Max injury", valid_from="2026-01", valid_to="2026-02")
|
|
"""
|
|
sub_id = self._entity_id(subject)
|
|
obj_id = self._entity_id(obj)
|
|
pred = predicate.lower().replace(" ", "_")
|
|
|
|
# Auto-create entities if they don't exist
|
|
with self._lock:
|
|
conn = self._conn()
|
|
with conn:
|
|
conn.execute(
|
|
"INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", (sub_id, subject)
|
|
)
|
|
conn.execute(
|
|
"INSERT OR IGNORE INTO entities (id, name) VALUES (?, ?)", (obj_id, obj)
|
|
)
|
|
|
|
# Check for existing identical triple
|
|
existing = conn.execute(
|
|
"SELECT id FROM triples WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL",
|
|
(sub_id, pred, obj_id),
|
|
).fetchone()
|
|
|
|
if existing:
|
|
return existing["id"] # Already exists and still valid
|
|
|
|
triple_id = f"t_{sub_id}_{pred}_{obj_id}_{hashlib.sha256(f'{valid_from}{datetime.now().isoformat()}'.encode()).hexdigest()[:12]}"
|
|
|
|
conn.execute(
|
|
"""INSERT INTO triples (id, subject, predicate, object, valid_from, valid_to, confidence, source_closet, source_file)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
(
|
|
triple_id,
|
|
sub_id,
|
|
pred,
|
|
obj_id,
|
|
valid_from,
|
|
valid_to,
|
|
confidence,
|
|
source_closet,
|
|
source_file,
|
|
),
|
|
)
|
|
return triple_id
|
|
|
|
def invalidate(self, subject: str, predicate: str, obj: str, ended: str = None):
|
|
"""Mark a relationship as no longer valid (set valid_to date)."""
|
|
sub_id = self._entity_id(subject)
|
|
obj_id = self._entity_id(obj)
|
|
pred = predicate.lower().replace(" ", "_")
|
|
ended = ended or date.today().isoformat()
|
|
|
|
with self._lock:
|
|
conn = self._conn()
|
|
with conn:
|
|
conn.execute(
|
|
"UPDATE triples SET valid_to=? WHERE subject=? AND predicate=? AND object=? AND valid_to IS NULL",
|
|
(ended, sub_id, pred, obj_id),
|
|
)
|
|
|
|
# ── Query operations ──────────────────────────────────────────────────
|
|
|
|
def query_entity(self, name: str, as_of: str = None, direction: str = "outgoing"):
|
|
"""
|
|
Get all relationships for an entity.
|
|
|
|
direction: "outgoing" (entity → ?), "incoming" (? → entity), "both"
|
|
as_of: date string — only return facts valid at that time
|
|
"""
|
|
eid = self._entity_id(name)
|
|
|
|
results = []
|
|
with self._lock:
|
|
conn = self._conn()
|
|
|
|
if direction in ("outgoing", "both"):
|
|
query = "SELECT t.*, e.name as obj_name FROM triples t JOIN entities e ON t.object = e.id WHERE t.subject = ?"
|
|
params = [eid]
|
|
if as_of:
|
|
query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)"
|
|
params.extend([as_of, as_of])
|
|
for row in conn.execute(query, params).fetchall():
|
|
results.append(
|
|
{
|
|
"direction": "outgoing",
|
|
"subject": name,
|
|
"predicate": row["predicate"],
|
|
"object": row["obj_name"],
|
|
"valid_from": row["valid_from"],
|
|
"valid_to": row["valid_to"],
|
|
"confidence": row["confidence"],
|
|
"source_closet": row["source_closet"],
|
|
"current": row["valid_to"] is None,
|
|
}
|
|
)
|
|
|
|
if direction in ("incoming", "both"):
|
|
query = "SELECT t.*, e.name as sub_name FROM triples t JOIN entities e ON t.subject = e.id WHERE t.object = ?"
|
|
params = [eid]
|
|
if as_of:
|
|
query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)"
|
|
params.extend([as_of, as_of])
|
|
for row in conn.execute(query, params).fetchall():
|
|
results.append(
|
|
{
|
|
"direction": "incoming",
|
|
"subject": row["sub_name"],
|
|
"predicate": row["predicate"],
|
|
"object": name,
|
|
"valid_from": row["valid_from"],
|
|
"valid_to": row["valid_to"],
|
|
"confidence": row["confidence"],
|
|
"source_closet": row["source_closet"],
|
|
"current": row["valid_to"] is None,
|
|
}
|
|
)
|
|
|
|
return results
|
|
|
|
def query_relationship(self, predicate: str, as_of: str = None):
|
|
"""Get all triples with a given relationship type."""
|
|
pred = predicate.lower().replace(" ", "_")
|
|
conn = self._conn()
|
|
query = """
|
|
SELECT t.*, s.name as sub_name, o.name as obj_name
|
|
FROM triples t
|
|
JOIN entities s ON t.subject = s.id
|
|
JOIN entities o ON t.object = o.id
|
|
WHERE t.predicate = ?
|
|
"""
|
|
params = [pred]
|
|
if as_of:
|
|
query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)"
|
|
params.extend([as_of, as_of])
|
|
|
|
results = []
|
|
for row in conn.execute(query, params).fetchall():
|
|
results.append(
|
|
{
|
|
"subject": row["sub_name"],
|
|
"predicate": pred,
|
|
"object": row["obj_name"],
|
|
"valid_from": row["valid_from"],
|
|
"valid_to": row["valid_to"],
|
|
"current": row["valid_to"] is None,
|
|
}
|
|
)
|
|
return results
|
|
|
|
def timeline(self, entity_name: str = None):
|
|
"""Get all facts in chronological order, optionally filtered by entity."""
|
|
conn = self._conn()
|
|
if entity_name:
|
|
eid = self._entity_id(entity_name)
|
|
rows = conn.execute(
|
|
"""
|
|
SELECT t.*, s.name as sub_name, o.name as obj_name
|
|
FROM triples t
|
|
JOIN entities s ON t.subject = s.id
|
|
JOIN entities o ON t.object = o.id
|
|
WHERE (t.subject = ? OR t.object = ?)
|
|
ORDER BY t.valid_from ASC NULLS LAST
|
|
LIMIT 100
|
|
""",
|
|
(eid, eid),
|
|
).fetchall()
|
|
else:
|
|
rows = conn.execute("""
|
|
SELECT t.*, s.name as sub_name, o.name as obj_name
|
|
FROM triples t
|
|
JOIN entities s ON t.subject = s.id
|
|
JOIN entities o ON t.object = o.id
|
|
ORDER BY t.valid_from ASC NULLS LAST
|
|
LIMIT 100
|
|
""").fetchall()
|
|
|
|
return [
|
|
{
|
|
"subject": r["sub_name"],
|
|
"predicate": r["predicate"],
|
|
"object": r["obj_name"],
|
|
"valid_from": r["valid_from"],
|
|
"valid_to": r["valid_to"],
|
|
"current": r["valid_to"] is None,
|
|
}
|
|
for r in rows
|
|
]
|
|
|
|
# ── Stats ─────────────────────────────────────────────────────────────
|
|
|
|
def stats(self):
|
|
conn = self._conn()
|
|
entities = conn.execute("SELECT COUNT(*) as cnt FROM entities").fetchone()["cnt"]
|
|
triples = conn.execute("SELECT COUNT(*) as cnt FROM triples").fetchone()["cnt"]
|
|
current = conn.execute(
|
|
"SELECT COUNT(*) as cnt FROM triples WHERE valid_to IS NULL"
|
|
).fetchone()["cnt"]
|
|
expired = triples - current
|
|
predicates = [
|
|
r["predicate"]
|
|
for r in conn.execute(
|
|
"SELECT DISTINCT predicate FROM triples ORDER BY predicate"
|
|
).fetchall()
|
|
]
|
|
return {
|
|
"entities": entities,
|
|
"triples": triples,
|
|
"current_facts": current,
|
|
"expired_facts": expired,
|
|
"relationship_types": predicates,
|
|
}
|
|
|
|
# ── Seed from known facts ─────────────────────────────────────────────
|
|
|
|
def seed_from_entity_facts(self, entity_facts: dict):
|
|
"""
|
|
Seed the knowledge graph from fact_checker.py ENTITY_FACTS.
|
|
This bootstraps the graph with known ground truth.
|
|
"""
|
|
for key, facts in entity_facts.items():
|
|
name = facts.get("full_name", key.capitalize())
|
|
etype = facts.get("type", "person")
|
|
self.add_entity(
|
|
name,
|
|
etype,
|
|
{
|
|
"gender": facts.get("gender", ""),
|
|
"birthday": facts.get("birthday", ""),
|
|
},
|
|
)
|
|
|
|
# Relationships
|
|
parent = facts.get("parent")
|
|
if parent:
|
|
self.add_triple(
|
|
name, "child_of", parent.capitalize(), valid_from=facts.get("birthday")
|
|
)
|
|
|
|
partner = facts.get("partner")
|
|
if partner:
|
|
self.add_triple(name, "married_to", partner.capitalize())
|
|
|
|
relationship = facts.get("relationship", "")
|
|
if relationship == "daughter":
|
|
self.add_triple(
|
|
name,
|
|
"is_child_of",
|
|
facts.get("parent", "").capitalize() or name,
|
|
valid_from=facts.get("birthday"),
|
|
)
|
|
elif relationship == "husband":
|
|
self.add_triple(name, "is_partner_of", facts.get("partner", name).capitalize())
|
|
elif relationship == "brother":
|
|
self.add_triple(name, "is_sibling_of", facts.get("sibling", name).capitalize())
|
|
elif relationship == "dog":
|
|
self.add_triple(name, "is_pet_of", facts.get("owner", name).capitalize())
|
|
self.add_entity(name, "animal")
|
|
|
|
# Interests
|
|
for interest in facts.get("interests", []):
|
|
self.add_triple(name, "loves", interest.capitalize(), valid_from="2025-01-01")
|