perf: graph cache with write-invalidation in build_graph()

build_graph() scans every drawer's metadata in 1000-item batches on
every call — O(n) per graph build with no caching. At 50K+ drawers
this costs several seconds per MCP tool call (traverse, find_tunnels,
graph_stats all call build_graph on every invocation).

Add a module-level cache (nodes + edges + timestamp) with a 60-second
TTL. Cache is invalidated via invalidate_graph_cache(), exported for
write operations to call. Tests updated with setup_method cache resets
and two new tests verifying cache hit and invalidation behaviour.
This commit is contained in:
jp
2026-04-11 19:44:49 -07:00
parent d4c942417a
commit 84e2aa16e4
2 changed files with 64 additions and 0 deletions
+27
View File
@@ -18,6 +18,7 @@ No external graph DB needed — built from ChromaDB metadata.
import hashlib import hashlib
import json import json
import os import os
import time
from collections import Counter, defaultdict from collections import Counter, defaultdict
from datetime import datetime, timezone from datetime import datetime, timezone
@@ -25,6 +26,20 @@ from .config import MempalaceConfig
from .palace import get_collection as _get_palace_collection from .palace import get_collection as _get_palace_collection
from .palace import mine_lock from .palace import mine_lock
# Module-level graph cache — mirrors _metadata_cache pattern in mcp_server.py
_graph_cache_nodes = None
_graph_cache_edges = None
_graph_cache_time = 0.0
_GRAPH_CACHE_TTL = 60.0 # seconds — graph changes less often than metadata
def invalidate_graph_cache():
"""Clear the graph cache. Called from mcp_server.py on writes."""
global _graph_cache_nodes, _graph_cache_edges, _graph_cache_time
_graph_cache_nodes = None
_graph_cache_edges = None
_graph_cache_time = 0.0
def _get_collection(config=None): def _get_collection(config=None):
config = config or MempalaceConfig() config = config or MempalaceConfig()
@@ -42,10 +57,18 @@ def build_graph(col=None, config=None):
""" """
Build the palace graph from ChromaDB metadata. Build the palace graph from ChromaDB metadata.
Returns cached result if fresh (within TTL). Cache is invalidated
on writes via invalidate_graph_cache().
Returns: Returns:
nodes: dict of {room: {wings: set, halls: set, count: int}} nodes: dict of {room: {wings: set, halls: set, count: int}}
edges: list of {room, wing_a, wing_b, hall} — one per tunnel crossing edges: list of {room, wing_a, wing_b, hall} — one per tunnel crossing
""" """
global _graph_cache_nodes, _graph_cache_edges, _graph_cache_time
now = time.time()
if _graph_cache_nodes is not None and (now - _graph_cache_time) < _GRAPH_CACHE_TTL:
return _graph_cache_nodes, _graph_cache_edges
if col is None: if col is None:
col = _get_collection(config) col = _get_collection(config)
if not col: if not col:
@@ -101,6 +124,10 @@ def build_graph(col=None, config=None):
"dates": sorted(data["dates"])[-5:] if data["dates"] else [], "dates": sorted(data["dates"])[-5:] if data["dates"] else [],
} }
_graph_cache_nodes = nodes
_graph_cache_edges = edges
_graph_cache_time = time.time()
return nodes, edges return nodes, edges
+37
View File
@@ -30,6 +30,7 @@ with patch.dict("sys.modules", {"chromadb": MagicMock()}):
build_graph, build_graph,
find_tunnels, find_tunnels,
graph_stats, graph_stats,
invalidate_graph_cache,
traverse, traverse,
) )
@@ -38,6 +39,9 @@ with patch.dict("sys.modules", {"chromadb": MagicMock()}):
class TestBuildGraph: class TestBuildGraph:
def setup_method(self):
invalidate_graph_cache()
def test_empty_collection(self): def test_empty_collection(self):
col = _make_fake_collection([]) col = _make_fake_collection([])
nodes, edges = build_graph(col=col) nodes, edges = build_graph(col=col)
@@ -114,11 +118,38 @@ class TestBuildGraph:
nodes, _ = build_graph(col=col) nodes, _ = build_graph(col=col)
assert len(nodes["busy"]["dates"]) <= 5 assert len(nodes["busy"]["dates"]) <= 5
def test_cache_returns_same_result(self):
"""Second call within TTL returns cached nodes without re-scanning."""
col = _make_fake_collection(
[{"room": "auth", "wing": "wing_code", "hall": "security", "date": "2026-01-01"}]
)
nodes1, edges1 = build_graph(col=col)
# Second call with a *different* collection — should still return cached result
col2 = _make_fake_collection([])
nodes2, edges2 = build_graph(col=col2)
assert nodes1 == nodes2
assert edges1 == edges2
def test_invalidate_clears_cache(self):
"""invalidate_graph_cache() forces a fresh scan on next call."""
col = _make_fake_collection(
[{"room": "auth", "wing": "wing_code", "hall": "security", "date": "2026-01-01"}]
)
build_graph(col=col)
invalidate_graph_cache()
col_empty = _make_fake_collection([])
nodes, edges = build_graph(col=col_empty)
assert nodes == {}
assert edges == []
# --- traverse --- # --- traverse ---
class TestTraverse: class TestTraverse:
def setup_method(self):
invalidate_graph_cache()
def _build_col(self): def _build_col(self):
return _make_fake_collection( return _make_fake_collection(
[ [
@@ -156,6 +187,9 @@ class TestTraverse:
class TestFindTunnels: class TestFindTunnels:
def setup_method(self):
invalidate_graph_cache()
def _build_tunnel_col(self): def _build_tunnel_col(self):
return _make_fake_collection( return _make_fake_collection(
[ [
@@ -192,6 +226,9 @@ class TestFindTunnels:
class TestGraphStats: class TestGraphStats:
def setup_method(self):
invalidate_graph_cache()
def test_empty_graph(self): def test_empty_graph(self):
col = _make_fake_collection([]) col = _make_fake_collection([])
stats = graph_stats(col=col) stats = graph_stats(col=col)