fix: batch ChromaDB reads to avoid SQLite variable limit

col.get() without limit generates SELECT ... WHERE id IN (...) with all
document IDs, which exceeds SQLite's ~999 variable limit when a palace
has more than ~1000 drawers.

This breaks both `mempalace compress` and `mempalace wake-up` on large
palaces. Reproduced on a 13880-file codebase (242K+ drawers).

Fix: paginate reads in batches of 500 using ChromaDB's offset/limit
parameters in both Layer1.generate() and cmd_compress().
This commit is contained in:
Maurice Wen
2026-04-07 21:40:12 +08:00
parent 1782628b8a
commit 0e77981dec
2 changed files with 45 additions and 25 deletions
+24 -13
View File
@@ -177,20 +177,31 @@ def cmd_compress(args):
print(" Run: mempalace init <dir> then mempalace mine <dir>") print(" Run: mempalace init <dir> then mempalace mine <dir>")
sys.exit(1) sys.exit(1)
# Query drawers in the wing # Query drawers in batches to avoid SQLite variable limit (~999)
where = {"wing": args.wing} if args.wing else None where = {"wing": args.wing} if args.wing else None
try: _BATCH = 500
kwargs = {"include": ["documents", "metadatas"]} docs, metas, ids = [], [], []
if where: offset = 0
kwargs["where"] = where while True:
results = col.get(**kwargs) try:
except Exception as e: kwargs = {"include": ["documents", "metadatas"], "limit": _BATCH, "offset": offset}
print(f"\n Error reading drawers: {e}") if where:
sys.exit(1) kwargs["where"] = where
batch = col.get(**kwargs)
docs = results["documents"] except Exception as e:
metas = results["metadatas"] if not docs:
ids = results["ids"] print(f"\n Error reading drawers: {e}")
sys.exit(1)
break
batch_docs = batch.get("documents", [])
if not batch_docs:
break
docs.extend(batch_docs)
metas.extend(batch.get("metadatas", []))
ids.extend(batch.get("ids", []))
offset += len(batch_docs)
if len(batch_docs) < _BATCH:
break
if not docs: if not docs:
wing_label = f" in wing '{args.wing}'" if args.wing else "" wing_label = f" in wing '{args.wing}'" if args.wing else ""
+21 -12
View File
@@ -96,18 +96,27 @@ class Layer1:
except Exception: except Exception:
return "## L1 — No palace found. Run: mempalace mine <dir>" return "## L1 — No palace found. Run: mempalace mine <dir>"
# Fetch all drawers (with optional wing filter) # Fetch all drawers in batches to avoid SQLite variable limit (~999)
kwargs = {"include": ["documents", "metadatas"]} _BATCH = 500
if self.wing: docs, metas = [], []
kwargs["where"] = {"wing": self.wing} offset = 0
while True:
try: kwargs = {"include": ["documents", "metadatas"], "limit": _BATCH, "offset": offset}
results = col.get(**kwargs) if self.wing:
except Exception: kwargs["where"] = {"wing": self.wing}
return "## L1 — No drawers found." try:
batch = col.get(**kwargs)
docs = results.get("documents", []) except Exception:
metas = results.get("metadatas", []) break
batch_docs = batch.get("documents", [])
batch_metas = batch.get("metadatas", [])
if not batch_docs:
break
docs.extend(batch_docs)
metas.extend(batch_metas)
offset += len(batch_docs)
if len(batch_docs) < _BATCH:
break
if not docs: if not docs:
return "## L1 — No memories yet." return "## L1 — No memories yet."