Merge pull request #1185 from MemPalace/perf/batched-upsert-gpu

perf(mining): batch per-chunk upserts + optional GPU acceleration
This commit is contained in:
Igor Lins e Silva
2026-04-24 20:34:28 -03:00
committed by GitHub
12 changed files with 995 additions and 66 deletions
+301
View File
@@ -0,0 +1,301 @@
"""Mining throughput benchmark: per-chunk vs batched upsert, CPU vs GPU.
Compares the legacy per-chunk ``add_drawer`` loop against the batched
``collection.upsert`` path introduced in the "batched upsert + GPU" PR.
Runs both paths on an identical seeded synthetic corpus, reports
wall-clock time + drawers/sec, and prints a markdown table suitable
for pasting into a PR description.
Usage
-----
# CPU (whatever onnxruntime is installed — CPU if you don't have
# onnxruntime-gpu):
uv run python benchmarks/mine_bench.py
# GPU (NVIDIA):
uv venv /tmp/gpu && source /tmp/gpu/bin/activate
uv pip install -e '.[gpu]' 'nvidia-cudnn-cu12>=9,<10' \\
'nvidia-cuda-runtime-cu12' 'nvidia-cublas-cu12'
export LD_LIBRARY_PATH=$(python -c "import nvidia.cudnn, os; \\
print(os.path.dirname(nvidia.cudnn.__file__)+'/lib')"):$LD_LIBRARY_PATH
MEMPALACE_EMBEDDING_DEVICE=cuda python benchmarks/mine_bench.py
Flags
-----
--device cpu|cuda|coreml|dml|auto Override MEMPALACE_EMBEDDING_DEVICE
--scenarios small,medium,large Which scenarios to run
--seed 42 RNG seed for reproducibility
"""
from __future__ import annotations
import argparse
import hashlib
import os
import random
import shutil
import string
import sys
import tempfile
import time
from datetime import datetime
from pathlib import Path
def build_corpus(dest: Path, n_files: int, paragraphs_per_file: int, seed: int) -> None:
"""Generate ``n_files`` markdown files of random words under ``dest``."""
rng = random.Random(seed)
dest.mkdir(parents=True, exist_ok=True)
for i in range(n_files):
paragraphs = []
for _ in range(paragraphs_per_file):
words = [
"".join(rng.choices(string.ascii_lowercase, k=rng.randint(3, 10)))
for _ in range(12)
]
paragraphs.append(" ".join(words))
(dest / f"doc_{i:03d}.md").write_text("\n\n".join(paragraphs))
(dest / "mempalace.yaml").write_text(
"wing: bench\n"
"rooms:\n"
" - name: general\n"
" description: all\n"
" keywords: [general]\n"
)
def _process_file_unbatched(filepath, project_path, collection, wing, rooms, agent, closets_col):
"""Legacy per-chunk upsert path (pre-batching).
Reproduces the exact loop shape the miner used before this PR so the
comparison is apples-to-apples; only the upsert granularity differs.
"""
from mempalace import miner
from mempalace.palace import (
build_closet_lines,
file_already_mined,
mine_lock,
purge_file_closets,
upsert_closet_lines,
)
source_file = str(filepath)
if file_already_mined(collection, source_file, check_mtime=True):
return 0, "general"
try:
content = filepath.read_text(encoding="utf-8", errors="replace")
except OSError:
return 0, "general"
content = content.strip()
if len(content) < miner.MIN_CHUNK_SIZE:
return 0, "general"
room = miner.detect_room(filepath, content, rooms, project_path)
chunks = miner.chunk_text(content, source_file)
with mine_lock(source_file):
if file_already_mined(collection, source_file, check_mtime=True):
return 0, room
try:
collection.delete(where={"source_file": source_file})
except Exception:
pass
drawers_added = 0
for chunk in chunks:
miner.add_drawer(
collection=collection,
wing=wing,
room=room,
content=chunk["content"],
source_file=source_file,
chunk_index=chunk["chunk_index"],
agent=agent,
)
drawers_added += 1
if closets_col and drawers_added > 0:
drawer_ids = [
f"drawer_{wing}_{room}_"
f"{hashlib.sha256((source_file + str(c['chunk_index'])).encode()).hexdigest()[:24]}"
for c in chunks
]
closet_lines = build_closet_lines(source_file, drawer_ids, content, wing, room)
closet_id_base = (
f"closet_{wing}_{room}_"
f"{hashlib.sha256(source_file.encode()).hexdigest()[:24]}"
)
closet_meta = {
"wing": wing,
"room": room,
"source_file": source_file,
"drawer_count": drawers_added,
"filed_at": datetime.now().isoformat(),
"normalize_version": miner.NORMALIZE_VERSION,
}
purge_file_closets(closets_col, source_file)
upsert_closet_lines(closets_col, closet_id_base, closet_lines, closet_meta)
return drawers_added, room
def mine_once(project_dir: str, palace_path: str, batched: bool) -> tuple[int, float]:
"""Mine a project dir with either the batched (new) or per-chunk (old) path."""
from mempalace import miner
from mempalace.miner import load_config, scan_project
from mempalace.palace import get_closets_collection, get_collection
project_path = Path(project_dir).resolve()
config = load_config(project_dir)
wing = config["wing"]
rooms = config.get("rooms", [])
files = scan_project(project_dir)
collection = get_collection(palace_path)
closets = get_closets_collection(palace_path)
total = 0
t0 = time.perf_counter()
for filepath in files:
if batched:
drawers, _ = miner.process_file(
filepath=filepath,
project_path=project_path,
collection=collection,
wing=wing,
rooms=rooms,
agent="bench",
dry_run=False,
closets_col=closets,
)
else:
drawers, _ = _process_file_unbatched(
filepath, project_path, collection, wing, rooms, "bench", closets
)
total += drawers
return total, time.perf_counter() - t0
def _reset_backend_caches() -> None:
"""Drop the in-process client cache so each run pays cold-open cost equally."""
from mempalace.palace import _DEFAULT_BACKEND
_DEFAULT_BACKEND._clients.clear()
_DEFAULT_BACKEND._freshness.clear()
def run_scenario(label: str, n_files: int, paragraphs_per_file: int, seed: int) -> dict:
"""Run one scenario under both code paths and return a result dict."""
print(f"\n=== {label}: {n_files} files × {paragraphs_per_file} paragraphs ===")
results = {}
for mode in ("unbatched", "batched"):
tmp = Path(tempfile.mkdtemp(prefix=f"mp_{mode}_"))
try:
proj = tmp / "proj"
palace = tmp / "palace"
build_corpus(proj, n_files, paragraphs_per_file, seed=seed)
_reset_backend_caches()
drawers, dt = mine_once(str(proj), str(palace), batched=(mode == "batched"))
rate = drawers / dt if dt > 0 else 0.0
results[mode] = (drawers, dt, rate)
print(f" {mode:10} {drawers:5} drawers in {dt:6.2f}s → {rate:7.1f} drawers/sec")
finally:
shutil.rmtree(tmp, ignore_errors=True)
_, t_u, r_u = results["unbatched"]
d_b, t_b, r_b = results["batched"]
speedup = t_u / t_b if t_b > 0 else 0.0
print(f" speedup: {speedup:.2f}× ({t_u:.2f}s → {t_b:.2f}s)")
return {
"label": label,
"n_files": n_files,
"paragraphs": paragraphs_per_file,
"drawers": d_b,
"unbatched_time": t_u,
"unbatched_rate": r_u,
"batched_time": t_b,
"batched_rate": r_b,
"speedup": speedup,
}
SCENARIOS = {
"small": ("Small files (~50 paragraphs)", 10, 50),
"medium": ("Medium files (~200 paragraphs)", 20, 200),
"large": ("Large files (~500 paragraphs)", 10, 500),
}
def _env_summary(device_label: str) -> list[str]:
"""Short hardware + version lines included with the printed table."""
import platform
try:
import chromadb
chromadb_v = chromadb.__version__
except Exception:
chromadb_v = "?"
try:
import onnxruntime as ort
ort_v = ort.__version__
providers = ",".join(p.replace("ExecutionProvider", "") for p in ort.get_available_providers())
except Exception:
ort_v = "?"
providers = "?"
return [
f"device: **{device_label}** (onnxruntime {ort_v}, providers={providers})",
f"chromadb {chromadb_v} · python {sys.version.split()[0]} · {platform.platform()}",
]
def main() -> None:
parser = argparse.ArgumentParser(description=__doc__.split("\n\n", 1)[0])
parser.add_argument(
"--device",
default=None,
help="Override MEMPALACE_EMBEDDING_DEVICE (cpu|cuda|coreml|dml|auto)",
)
parser.add_argument(
"--scenarios",
default="small,medium,large",
help="Comma-separated scenario names (default: all)",
)
parser.add_argument("--seed", type=int, default=42)
args = parser.parse_args()
if args.device:
os.environ["MEMPALACE_EMBEDDING_DEVICE"] = args.device
from mempalace.embedding import describe_device, get_embedding_function
device_label = describe_device()
print(f"Warming up ONNX model on device={device_label}...")
ef = get_embedding_function()
ef(["warmup sentence one", "warmup sentence two"])
picked = [s.strip() for s in args.scenarios.split(",") if s.strip()]
results = []
for key in picked:
if key not in SCENARIOS:
print(f"Unknown scenario {key!r}; choices: {sorted(SCENARIOS)}", file=sys.stderr)
sys.exit(2)
label, n_files, paras = SCENARIOS[key]
results.append(run_scenario(label, n_files, paras, args.seed))
print("\n\n## Mining benchmark\n")
for line in _env_summary(device_label):
print(line + " ")
print()
print("| Scenario | Files | Drawers | Per-chunk (old) | Batched (new) | Speedup |")
print("| --- | ---: | ---: | ---: | ---: | ---: |")
for r in results:
print(
f"| {r['label']} | {r['n_files']} | {r['drawers']} | "
f"{r['unbatched_time']:.2f}s · {r['unbatched_rate']:.0f} drw/s | "
f"{r['batched_time']:.2f}s · {r['batched_rate']:.0f} drw/s | "
f"**{r['speedup']:.2f}×** |"
)
if __name__ == "__main__":
main()
+25 -3
View File
@@ -405,6 +405,23 @@ class ChromaBackend(BaseBackend):
self._freshness: dict[str, tuple[int, float]] = {}
self._closed = False
@staticmethod
def _resolve_embedding_function():
"""Return the EF for the user's ``embedding_device`` setting.
Both ``get_collection`` and ``get_or_create_collection`` must receive
the EF explicitly — ChromaDB 1.x does not persist it with the
collection, so a reader that omits the argument silently gets the
library default and its queries won't match the writer's vectors.
"""
try:
from ..embedding import get_embedding_function
return get_embedding_function()
except Exception:
logger.exception("Failed to build embedding function; using chromadb default")
return None
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
@@ -532,12 +549,15 @@ class ChromaBackend(BaseBackend):
if options and isinstance(options, dict):
hnsw_space = options.get("hnsw_space", hnsw_space)
ef = self._resolve_embedding_function()
ef_kwargs = {"embedding_function": ef} if ef is not None else {}
if create:
collection = client.get_or_create_collection(
collection_name, metadata={"hnsw:space": hnsw_space}
collection_name, metadata={"hnsw:space": hnsw_space}, **ef_kwargs
)
else:
collection = client.get_collection(collection_name)
collection = client.get_collection(collection_name, **ef_kwargs)
return ChromaCollection(collection)
def close_palace(self, palace) -> None:
@@ -578,8 +598,10 @@ class ChromaBackend(BaseBackend):
self, palace_path: str, collection_name: str, hnsw_space: str = "cosine"
) -> ChromaCollection:
"""Create (not get-or-create) ``collection_name`` with the given HNSW space."""
ef = self._resolve_embedding_function()
ef_kwargs = {"embedding_function": ef} if ef is not None else {}
collection = self._client(palace_path).create_collection(
collection_name, metadata={"hnsw:space": hnsw_space}
collection_name, metadata={"hnsw:space": hnsw_space}, **ef_kwargs
)
return ChromaCollection(collection)
+17
View File
@@ -236,6 +236,23 @@ class MempalaceConfig:
pass
return normalized
@property
def embedding_device(self):
"""Hardware device for the ONNX embedding model.
Values: ``"auto"`` (default), ``"cpu"``, ``"cuda"``, ``"coreml"``,
``"dml"``. Read from env ``MEMPALACE_EMBEDDING_DEVICE`` first, then
``embedding_device`` in ``config.json``, then ``"auto"``.
``auto`` resolves to the first available accelerator at runtime via
:mod:`mempalace.embedding`; requesting an unavailable accelerator
logs a warning and falls back to CPU.
"""
env_val = os.environ.get("MEMPALACE_EMBEDDING_DEVICE")
if env_val:
return env_val.strip().lower()
return str(self._file_config.get("embedding_device", "auto")).strip().lower()
@property
def hook_silent_save(self):
"""Whether the stop hook saves directly (True) or blocks for MCP calls (False)."""
+35 -22
View File
@@ -55,6 +55,7 @@ CONVO_EXTENSIONS = {
MIN_CHUNK_SIZE = 30
CHUNK_SIZE = 800 # chars per drawer — align with miner.py
DRAWER_UPSERT_BATCH_SIZE = 1000
MAX_FILE_SIZE = 500 * 1024 * 1024 # 500 MB — skip files larger than this.
# Matches miner.py at 500 MB. Long Claude Code sessions, multi-year
# ChatGPT exports, and lifetime Slack dumps routinely exceed 10 MB; the
@@ -332,31 +333,43 @@ def _file_chunks_locked(collection, source_file, chunks, wing, room, agent, extr
except Exception:
pass
for chunk in chunks:
chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room
if extract_mode == "general":
room_counts_delta[chunk_room] += 1
drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}"
# Batch chunks into bounded upserts so large transcripts keep most of
# the embedding speedup without one huge Chroma/SQLite request. Keep
# one filed_at per source file so all transcript drawers share an
# ingest timestamp.
filed_at = datetime.now().isoformat()
for batch_start in range(0, len(chunks), DRAWER_UPSERT_BATCH_SIZE):
batch_docs: list = []
batch_ids: list = []
batch_metas: list = []
for chunk in chunks[batch_start : batch_start + DRAWER_UPSERT_BATCH_SIZE]:
chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room
if extract_mode == "general":
room_counts_delta[chunk_room] += 1
drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}"
batch_docs.append(chunk["content"])
batch_ids.append(drawer_id)
batch_metas.append(
{
"wing": wing,
"room": chunk_room,
"hall": _detect_hall_cached(chunk["content"]),
"source_file": source_file,
"chunk_index": chunk["chunk_index"],
"added_by": agent,
"filed_at": filed_at,
"ingest_mode": "convos",
"extract_mode": extract_mode,
"normalize_version": NORMALIZE_VERSION,
}
)
try:
collection.upsert(
documents=[chunk["content"]],
ids=[drawer_id],
metadatas=[
{
"wing": wing,
"room": chunk_room,
"hall": _detect_hall_cached(chunk["content"]),
"source_file": source_file,
"chunk_index": chunk["chunk_index"],
"added_by": agent,
"filed_at": datetime.now().isoformat(),
"ingest_mode": "convos",
"extract_mode": extract_mode,
"normalize_version": NORMALIZE_VERSION,
}
],
documents=batch_docs,
ids=batch_ids,
metadatas=batch_metas,
)
drawers_added += 1
drawers_added += len(batch_docs)
except Exception as e:
if "already exists" not in str(e).lower():
raise
+155
View File
@@ -0,0 +1,155 @@
"""Embedding function factory with hardware acceleration.
Returns a ChromaDB-compatible embedding function bound to a user-selected
ONNX Runtime execution provider. The same ``all-MiniLM-L6-v2`` model and
384-dim vectors ChromaDB ships by default are reused, so switching device
does not invalidate existing palaces.
Supported devices (env ``MEMPALACE_EMBEDDING_DEVICE`` or ``embedding_device``
in ``~/.mempalace/config.json``):
* ``auto`` — prefer CUDA ▸ CoreML ▸ DirectML, fall back to CPU
* ``cpu`` — force CPU (the historical default)
* ``cuda`` — NVIDIA GPU via ``onnxruntime-gpu`` (``pip install mempalace[gpu]``)
* ``coreml`` — Apple Neural Engine (macOS)
* ``dml`` — DirectML (Windows / AMD / Intel GPUs)
Requesting an unavailable accelerator emits a warning and falls back to CPU
rather than hard-failing — mining must still work on a laptop without CUDA.
"""
from __future__ import annotations
import logging
from typing import Optional
logger = logging.getLogger(__name__)
_PROVIDER_MAP = {
"cpu": ["CPUExecutionProvider"],
"cuda": ["CUDAExecutionProvider", "CPUExecutionProvider"],
"coreml": ["CoreMLExecutionProvider", "CPUExecutionProvider"],
"dml": ["DmlExecutionProvider", "CPUExecutionProvider"],
}
_DEVICE_EXTRA = {
"cuda": "mempalace[gpu]",
"coreml": "mempalace[coreml]",
"dml": "mempalace[dml]",
}
_AUTO_ORDER = [
("CUDAExecutionProvider", "cuda"),
("CoreMLExecutionProvider", "coreml"),
("DmlExecutionProvider", "dml"),
]
_EF_CACHE: dict = {}
_WARNED: set = set()
def _resolve_providers(device: str) -> tuple[list, str]:
"""Return ``(provider_list, effective_device)`` for ``device``.
Falls back to CPU (with a one-shot warning) when the requested
accelerator is not compiled into the installed ``onnxruntime``.
"""
device = (device or "auto").strip().lower()
try:
import onnxruntime as ort
available = set(ort.get_available_providers())
except ImportError:
return (["CPUExecutionProvider"], "cpu")
if device == "auto":
for provider, name in _AUTO_ORDER:
if provider in available:
return ([provider, "CPUExecutionProvider"], name)
return (["CPUExecutionProvider"], "cpu")
requested = _PROVIDER_MAP.get(device)
if requested is None:
if device not in _WARNED:
logger.warning("Unknown embedding_device %r — falling back to cpu", device)
_WARNED.add(device)
return (["CPUExecutionProvider"], "cpu")
preferred = requested[0]
if preferred == "CPUExecutionProvider":
return (requested, "cpu")
if preferred not in available:
if device not in _WARNED:
extra = _DEVICE_EXTRA.get(device, "the matching mempalace extra for your device")
logger.warning(
"embedding_device=%r requested but %s is not installed — "
"falling back to CPU. Install %s.",
device,
preferred,
extra,
)
_WARNED.add(device)
return (["CPUExecutionProvider"], "cpu")
return (requested, device)
def _build_ef_class():
"""Subclass ``ONNXMiniLM_L6_V2`` with name ``"default"``.
Why the rename: ChromaDB 1.5 persists the EF identity on the collection
and rejects reads that pass a differently-named EF (``onnx_mini_lm_l6_v2``
vs ``default``). The vectors and model are identical — only the
``name()`` tag differs — so spoofing the name lets one EF class serve
palaces created with ``DefaultEmbeddingFunction`` *and* palaces we
create ourselves, with the same GPU-capable ``preferred_providers``.
"""
from chromadb.utils.embedding_functions import ONNXMiniLM_L6_V2
class _MempalaceONNX(ONNXMiniLM_L6_V2):
@staticmethod
def name() -> str:
return "default"
return _MempalaceONNX
def get_embedding_function(device: Optional[str] = None):
"""Return a cached embedding function bound to the requested device.
``device=None`` reads from :class:`MempalaceConfig.embedding_device`.
The returned function is shared across calls with the same resolved
provider list so we only pay model-load cost once per process.
"""
if device is None:
from .config import MempalaceConfig
device = MempalaceConfig().embedding_device
providers, effective = _resolve_providers(device)
cache_key = tuple(providers)
cached = _EF_CACHE.get(cache_key)
if cached is not None:
return cached
ef_cls = _build_ef_class()
ef = ef_cls(preferred_providers=providers)
_EF_CACHE[cache_key] = ef
logger.info("Embedding function initialized (device=%s providers=%s)", effective, providers)
return ef
def describe_device(device: Optional[str] = None) -> str:
"""Return a short human-readable label for the resolved device.
Used by the miner CLI header so users can see at a glance whether GPU
acceleration actually engaged.
"""
if device is None:
from .config import MempalaceConfig
device = MempalaceConfig().embedding_device
_, effective = _resolve_providers(device)
return effective
+89 -40
View File
@@ -14,6 +14,7 @@ import fnmatch
from pathlib import Path
from datetime import datetime
from collections import defaultdict
from typing import Optional
from .palace import (
NORMALIZE_VERSION,
@@ -64,6 +65,7 @@ SKIP_FILENAMES = {
CHUNK_SIZE = 800 # chars per drawer
CHUNK_OVERLAP = 100 # overlap between chunks
MIN_CHUNK_SIZE = 50 # skip tiny chunks
DRAWER_UPSERT_BATCH_SIZE = 1000
MAX_FILE_SIZE = 500 * 1024 * 1024 # 500 MB — skip files larger than this.
# Long Claude Code sessions and large transcript exports routinely exceed
# 10 MB. The cap exists as a defensive rail against pathological binary
@@ -633,40 +635,62 @@ def _extract_entities_for_metadata(content: str) -> str:
return ";".join(capped)
def _build_drawer_metadata(
wing: str,
room: str,
source_file: str,
chunk_index: int,
agent: str,
content: str,
source_mtime: Optional[float],
) -> dict:
"""Build the metadata dict for one drawer without upserting.
Split out from ``add_drawer`` so ``process_file`` can batch all chunks
of a file into a single ``collection.upsert`` — one embedding forward
pass per batch instead of per chunk.
"""
metadata = {
"wing": wing,
"room": room,
"source_file": source_file,
"chunk_index": chunk_index,
"added_by": agent,
"filed_at": datetime.now().isoformat(),
"normalize_version": NORMALIZE_VERSION,
}
if source_mtime is not None:
metadata["source_mtime"] = source_mtime
metadata["hall"] = detect_hall(content)
entities = _extract_entities_for_metadata(content)
if entities:
metadata["entities"] = entities
return metadata
def add_drawer(
collection, wing: str, room: str, content: str, source_file: str, chunk_index: int, agent: str
):
"""Add one drawer to the palace."""
"""Add one drawer to the palace.
Kept for backward compatibility with external callers. In-tree the
miner uses ``_build_drawer_metadata`` + a batched ``collection.upsert``
to amortize the embedding model's forward-pass cost across chunks.
"""
drawer_id = f"drawer_{wing}_{room}_{hashlib.sha256((source_file + str(chunk_index)).encode()).hexdigest()[:24]}"
try:
metadata = {
"wing": wing,
"room": room,
"source_file": source_file,
"chunk_index": chunk_index,
"added_by": agent,
"filed_at": datetime.now().isoformat(),
"normalize_version": NORMALIZE_VERSION,
}
# Store file mtime so we can detect modifications later.
try:
metadata["source_mtime"] = os.path.getmtime(source_file)
except OSError:
pass
# Tag with hall for graph connectivity within wings
metadata["hall"] = detect_hall(content)
# Tag with entity names for filterable search
entities = _extract_entities_for_metadata(content)
if entities:
metadata["entities"] = entities
collection.upsert(
documents=[content],
ids=[drawer_id],
metadatas=[metadata],
)
return True
except Exception:
raise
source_mtime = os.path.getmtime(source_file)
except OSError:
source_mtime = None
metadata = _build_drawer_metadata(
wing, room, source_file, chunk_index, agent, content, source_mtime
)
collection.upsert(
documents=[content],
ids=[drawer_id],
metadatas=[metadata],
)
return True
# =============================================================================
@@ -725,19 +749,41 @@ def process_file(
except Exception:
pass
# Batch chunks into bounded upserts so the embedding model sees many
# chunks per forward pass without building one huge Chroma/SQLite
# request for pathological files. A bad chunk can fail its sub-batch;
# that is the deliberate trade-off for amortizing embedding overhead.
try:
source_mtime = os.path.getmtime(source_file)
except OSError:
source_mtime = None
drawers_added = 0
for chunk in chunks:
added = add_drawer(
collection=collection,
wing=wing,
room=room,
content=chunk["content"],
source_file=source_file,
chunk_index=chunk["chunk_index"],
agent=agent,
for batch_start in range(0, len(chunks), DRAWER_UPSERT_BATCH_SIZE):
batch_docs: list = []
batch_ids: list = []
batch_metas: list = []
for chunk in chunks[batch_start : batch_start + DRAWER_UPSERT_BATCH_SIZE]:
drawer_id = f"drawer_{wing}_{room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}"
batch_docs.append(chunk["content"])
batch_ids.append(drawer_id)
batch_metas.append(
_build_drawer_metadata(
wing,
room,
source_file,
chunk["chunk_index"],
agent,
chunk["content"],
source_mtime,
)
)
collection.upsert(
documents=batch_docs,
ids=batch_ids,
metadatas=batch_metas,
)
if added:
drawers_added += 1
drawers_added += len(batch_docs)
# Build closet — the searchable index pointing to these drawers.
# Purge first: a re-mine (mtime change or normalize_version bump) must
@@ -868,6 +914,8 @@ def mine(
if limit > 0:
files = files[:limit]
from .embedding import describe_device
print(f"\n{'=' * 55}")
print(" MemPalace Mine")
print(f"{'=' * 55}")
@@ -875,6 +923,7 @@ def mine(
print(f" Rooms: {', '.join(r['name'] for r in rooms)}")
print(f" Files: {len(files)}")
print(f" Palace: {palace_path}")
print(f" Device: {describe_device()}")
if dry_run:
print(" DRY RUN — nothing will be filed")
if not respect_gitignore:
+8
View File
@@ -53,6 +53,14 @@ chroma = "mempalace.backends.chroma:ChromaBackend"
[project.optional-dependencies]
dev = ["pytest>=7.0", "pytest-cov>=4.0", "ruff>=0.4.0", "psutil>=5.9"]
spellcheck = ["autocorrect>=2.0"]
# Hardware acceleration for the ONNX embedding model. Install exactly one:
# pip install mempalace[gpu] — NVIDIA CUDA
# pip install mempalace[dml] — DirectML (Windows AMD/Intel/NVIDIA)
# pip install mempalace[coreml] — macOS Neural Engine
# After install, set MEMPALACE_EMBEDDING_DEVICE=cuda|dml|coreml (or "auto").
gpu = ["onnxruntime-gpu>=1.16"]
dml = ["onnxruntime-directml>=1.16"]
coreml = ["onnxruntime>=1.16"]
[dependency-groups]
dev = ["pytest>=7.0", "pytest-cov>=4.0", "ruff>=0.4.0", "psutil>=5.9"]
+24
View File
@@ -20,6 +20,30 @@ def test_config_from_file():
assert cfg.palace_path == "/custom/palace"
def test_embedding_device_defaults_to_auto(monkeypatch):
monkeypatch.delenv("MEMPALACE_EMBEDDING_DEVICE", raising=False)
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
assert cfg.embedding_device == "auto"
def test_embedding_device_from_config_is_normalized(tmp_path, monkeypatch):
monkeypatch.delenv("MEMPALACE_EMBEDDING_DEVICE", raising=False)
with open(tmp_path / "config.json", "w") as f:
json.dump({"embedding_device": " CUDA "}, f)
cfg = MempalaceConfig(config_dir=str(tmp_path))
assert cfg.embedding_device == "cuda"
def test_embedding_device_env_overrides_config(tmp_path, monkeypatch):
with open(tmp_path / "config.json", "w") as f:
json.dump({"embedding_device": "cpu"}, f)
monkeypatch.setenv("MEMPALACE_EMBEDDING_DEVICE", " CoreML ")
cfg = MempalaceConfig(config_dir=str(tmp_path))
assert cfg.embedding_device == "coreml"
def test_env_override():
raw = "/env/palace"
os.environ["MEMPALACE_PALACE_PATH"] = raw
+36
View File
@@ -1,6 +1,9 @@
"""Unit tests for convo_miner pure functions (no chromadb needed)."""
import contextlib
from mempalace.convo_miner import (
_file_chunks_locked,
chunk_exchanges,
detect_convo_room,
scan_convos,
@@ -111,3 +114,36 @@ class TestScanConvos:
def test_scan_empty_dir(self, tmp_path):
files = scan_convos(str(tmp_path))
assert files == []
class TestFileChunksLocked:
def test_uses_bounded_upsert_batches(self, monkeypatch):
import mempalace.convo_miner as convo_miner
class FakeCol:
def __init__(self):
self.batch_sizes = []
def delete(self, *args, **kwargs):
pass
def upsert(self, documents, ids, metadatas):
self.batch_sizes.append(len(documents))
chunks = [{"content": f"chunk {i} " * 20, "chunk_index": i} for i in range(5)]
col = FakeCol()
monkeypatch.setattr(convo_miner, "DRAWER_UPSERT_BATCH_SIZE", 2)
monkeypatch.setattr(
convo_miner, "file_already_mined", lambda collection, source_file: False
)
monkeypatch.setattr(convo_miner, "mine_lock", lambda source_file: contextlib.nullcontext())
monkeypatch.setattr(convo_miner, "_detect_hall_cached", lambda content: "conversations")
drawers, room_counts, skipped = _file_chunks_locked(
col, "chat.txt", chunks, "wing", "general", "agent", "exchange"
)
assert drawers == 5
assert dict(room_counts) == {}
assert skipped is False
assert col.batch_sizes == [2, 2, 1]
+98
View File
@@ -0,0 +1,98 @@
import pytest
import mempalace.embedding as embedding
@pytest.fixture(autouse=True)
def isolate_embedding_state(monkeypatch):
monkeypatch.setattr(embedding, "_EF_CACHE", {})
monkeypatch.setattr(embedding, "_WARNED", set())
def test_auto_picks_cuda(monkeypatch):
monkeypatch.setattr(
"onnxruntime.get_available_providers",
lambda: ["CUDAExecutionProvider", "CPUExecutionProvider"],
)
assert embedding._resolve_providers("auto") == (
["CUDAExecutionProvider", "CPUExecutionProvider"],
"cuda",
)
def test_auto_falls_to_cpu(monkeypatch):
monkeypatch.setattr("onnxruntime.get_available_providers", lambda: ["CPUExecutionProvider"])
assert embedding._resolve_providers("auto") == (["CPUExecutionProvider"], "cpu")
def test_cuda_missing_warns_with_gpu_extra(monkeypatch, caplog):
monkeypatch.setattr("onnxruntime.get_available_providers", lambda: ["CPUExecutionProvider"])
assert embedding._resolve_providers("cuda") == (["CPUExecutionProvider"], "cpu")
assert "mempalace[gpu]" in caplog.text
def test_coreml_missing_warns_with_coreml_extra(monkeypatch, caplog):
monkeypatch.setattr("onnxruntime.get_available_providers", lambda: ["CPUExecutionProvider"])
assert embedding._resolve_providers("coreml") == (["CPUExecutionProvider"], "cpu")
assert "mempalace[coreml]" in caplog.text
def test_dml_missing_warns_with_dml_extra(monkeypatch, caplog):
monkeypatch.setattr("onnxruntime.get_available_providers", lambda: ["CPUExecutionProvider"])
assert embedding._resolve_providers("dml") == (["CPUExecutionProvider"], "cpu")
assert "mempalace[dml]" in caplog.text
def test_unknown_device_warns_once(monkeypatch, caplog):
monkeypatch.setattr("onnxruntime.get_available_providers", lambda: ["CPUExecutionProvider"])
assert embedding._resolve_providers("bogus") == (["CPUExecutionProvider"], "cpu")
assert embedding._resolve_providers("bogus") == (["CPUExecutionProvider"], "cpu")
assert caplog.text.count("Unknown embedding_device") == 1
def test_onnxruntime_import_error_falls_back_to_cpu(monkeypatch):
import builtins
real_import = builtins.__import__
def fake_import(name, *args, **kwargs):
if name == "onnxruntime":
raise ImportError("missing")
return real_import(name, *args, **kwargs)
monkeypatch.setattr(builtins, "__import__", fake_import)
assert embedding._resolve_providers("cuda") == (["CPUExecutionProvider"], "cpu")
def test_get_embedding_function_caches_by_resolved_provider_tuple(monkeypatch):
class DummyEF:
def __init__(self, preferred_providers):
self.preferred_providers = preferred_providers
monkeypatch.setattr(embedding, "_build_ef_class", lambda: DummyEF)
monkeypatch.setattr(
embedding, "_resolve_providers", lambda device: (["CPUExecutionProvider"], "cpu")
)
first = embedding.get_embedding_function("cpu")
second = embedding.get_embedding_function("auto")
assert first is second
assert first.preferred_providers == ["CPUExecutionProvider"]
def test_describe_device_uses_resolved_effective_device(monkeypatch):
monkeypatch.setattr(
embedding,
"_resolve_providers",
lambda device: (["CUDAExecutionProvider", "CPUExecutionProvider"], "cuda"),
)
assert embedding.describe_device("auto") == "cuda"
+40
View File
@@ -383,6 +383,46 @@ def test_status_handles_none_metadata_without_crash(tmp_path, capsys):
assert "WING: proj" in out
def test_process_file_uses_bounded_upsert_batches(tmp_path, monkeypatch):
from mempalace import miner
class FakeCol:
def __init__(self):
self.batch_sizes = []
def get(self, *args, **kwargs):
return {"ids": []}
def delete(self, *args, **kwargs):
pass
def upsert(self, documents, ids, metadatas):
self.batch_sizes.append(len(documents))
source = tmp_path / "src.py"
source.write_text("print('hello')\n" * 20, encoding="utf-8")
chunks = [{"content": f"chunk {i} " * 20, "chunk_index": i} for i in range(5)]
col = FakeCol()
monkeypatch.setattr(miner, "DRAWER_UPSERT_BATCH_SIZE", 2)
monkeypatch.setattr(miner, "chunk_text", lambda content, source_file: chunks)
monkeypatch.setattr(miner, "detect_hall", lambda content: "code")
monkeypatch.setattr(miner, "_extract_entities_for_metadata", lambda content: "")
drawers, room = miner.process_file(
source,
tmp_path,
col,
"wing",
[{"name": "general", "description": "General"}],
"agent",
False,
)
assert drawers == 5
assert room == "general"
assert col.batch_sizes == [2, 2, 1]
# ── normalize_version schema gate ───────────────────────────────────────
#
# When the normalization pipeline changes shape (e.g., strip_noise lands),
Generated
+167 -1
View File
@@ -1178,6 +1178,11 @@ dependencies = [
]
[package.optional-dependencies]
coreml = [
{ name = "onnxruntime", version = "1.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
{ name = "onnxruntime", version = "1.24.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
{ name = "onnxruntime", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
]
dev = [
{ name = "psutil" },
{ name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
@@ -1185,6 +1190,16 @@ dev = [
{ name = "pytest-cov" },
{ name = "ruff" },
]
dml = [
{ name = "onnxruntime-directml", version = "1.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
{ name = "onnxruntime-directml", version = "1.24.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
{ name = "onnxruntime-directml", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
]
gpu = [
{ name = "onnxruntime-gpu", version = "1.20.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
{ name = "onnxruntime-gpu", version = "1.24.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
{ name = "onnxruntime-gpu", version = "1.25.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
]
spellcheck = [
{ name = "autocorrect" },
]
@@ -1202,6 +1217,9 @@ dev = [
requires-dist = [
{ name = "autocorrect", marker = "extra == 'spellcheck'", specifier = ">=2.0" },
{ name = "chromadb", specifier = ">=1.5.4,<2" },
{ name = "onnxruntime", marker = "extra == 'coreml'", specifier = ">=1.16" },
{ name = "onnxruntime-directml", marker = "extra == 'dml'", specifier = ">=1.16" },
{ name = "onnxruntime-gpu", marker = "extra == 'gpu'", specifier = ">=1.16" },
{ name = "psutil", marker = "extra == 'dev'", specifier = ">=5.9" },
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0" },
{ name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0" },
@@ -1209,7 +1227,7 @@ requires-dist = [
{ name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4.0" },
{ name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0.0" },
]
provides-extras = ["dev", "spellcheck"]
provides-extras = ["dev", "spellcheck", "gpu", "dml", "coreml"]
[package.metadata.requires-dev]
dev = [
@@ -1815,6 +1833,154 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/6c/1d/1666dc64e78d8587d168fec4e3b7922b92eb286a2ddeebcf6acb55c7dc82/onnxruntime-1.24.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1cc6a518255f012134bc791975a6294806be9a3b20c4a54cca25194c90cf731", size = 17247021, upload-time = "2026-03-17T22:04:52.377Z" },
]
[[package]]
name = "onnxruntime-directml"
version = "1.20.1"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"python_full_version < '3.10'",
]
dependencies = [
{ name = "coloredlogs", marker = "python_full_version < '3.10'" },
{ name = "flatbuffers", marker = "python_full_version < '3.10'" },
{ name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
{ name = "packaging", marker = "python_full_version < '3.10'" },
{ name = "protobuf", marker = "python_full_version < '3.10'" },
{ name = "sympy", marker = "python_full_version < '3.10'" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/3c/4f/f433239b05304aa9af0217da20508abbbcec1dcd58ee821e3dab8939ecfe/onnxruntime_directml-1.20.1-cp310-cp310-win_amd64.whl", hash = "sha256:d4aa43694799559fb5570fdf0e96a154d4b4d0bb9b73c3e81744eb7fe0c0de8d", size = 22760521, upload-time = "2024-11-21T00:49:40.179Z" },
{ url = "https://files.pythonhosted.org/packages/df/5f/16337318bd99d2d837cbb2e91e8a12b0915cb80d7c1ae8f80ca2f5d47a09/onnxruntime_directml-1.20.1-cp311-cp311-win_amd64.whl", hash = "sha256:c7861057ad4caa64186c910efb3b54c1f575cd0e64732509c9bd927d2d20187b", size = 22762384, upload-time = "2024-11-21T00:49:44.01Z" },
{ url = "https://files.pythonhosted.org/packages/8f/50/4599c6573bd71cc0c80820c63dea599a0b489ce874f93a5e021ca20a9e1f/onnxruntime_directml-1.20.1-cp312-cp312-win_amd64.whl", hash = "sha256:4b9a9f8349d68eef947fc692b3572e7a6490cb95effb151ace1a6ffc15884940", size = 22764330, upload-time = "2024-11-21T00:49:47.264Z" },
{ url = "https://files.pythonhosted.org/packages/60/40/7d8489d9101b4aa7bae29227075ce31bc5764cbe87b78c995fdb296e3eff/onnxruntime_directml-1.20.1-cp313-cp313-win_amd64.whl", hash = "sha256:86a8c4b69e377bb18ed2a18aaf2337baa83a57ff87a97224d027e546dfa99fde", size = 22764517, upload-time = "2024-11-21T00:49:50.213Z" },
]
[[package]]
name = "onnxruntime-directml"
version = "1.24.3"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"python_full_version == '3.10.*'",
]
dependencies = [
{ name = "flatbuffers", marker = "python_full_version == '3.10.*'" },
{ name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
{ name = "packaging", marker = "python_full_version == '3.10.*'" },
{ name = "protobuf", marker = "python_full_version == '3.10.*'" },
{ name = "sympy", marker = "python_full_version == '3.10.*'" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/ed/65/36ce5a5e79fb5d7b4d7636bc6e6c4024f3ff0571789e8eedb7149bb7c538/onnxruntime_directml-1.24.3-cp311-cp311-win_amd64.whl", hash = "sha256:442fecea5d52df315b6cecfbcbb44aff6681880b6bbf23546a6c00125fec66f1", size = 25106769, upload-time = "2026-03-05T16:27:07.495Z" },
{ url = "https://files.pythonhosted.org/packages/05/40/c948c0ee42b7b6297dd45956092f5a53a6954610c3911a5847c7555b4930/onnxruntime_directml-1.24.3-cp312-cp312-win_amd64.whl", hash = "sha256:d889010e6ed2f30026522308173d295bcfdaf6f28d1df6054c748ffa750a7ad5", size = 25114531, upload-time = "2026-03-05T16:27:11.256Z" },
{ url = "https://files.pythonhosted.org/packages/56/f0/9de329f39a66142aab4c1d9a48edc0e432de27c6ba09e8039e0dc51885e7/onnxruntime_directml-1.24.3-cp313-cp313-win_amd64.whl", hash = "sha256:f684adcb29dd48ee172b52fcf1d19a1da1a67a051384ac3418b36d200d0d105c", size = 25114902, upload-time = "2026-03-05T16:27:13.925Z" },
{ url = "https://files.pythonhosted.org/packages/fe/7a/8b3014ca4065a32bd6672221bf4cb0b5b9a726d28a9caafdb86a076a5981/onnxruntime_directml-1.24.3-cp314-cp314-win_amd64.whl", hash = "sha256:42b17de7030445e75a7e83a4a317f9c655ed2dd7045fe79a7a21dce7b60103b6", size = 25570589, upload-time = "2026-03-05T16:27:17.278Z" },
]
[[package]]
name = "onnxruntime-directml"
version = "1.24.4"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"python_full_version >= '3.14'",
"python_full_version == '3.13.*'",
"python_full_version >= '3.11' and python_full_version < '3.13'",
]
dependencies = [
{ name = "flatbuffers", marker = "python_full_version >= '3.11'" },
{ name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
{ name = "packaging", marker = "python_full_version >= '3.11'" },
{ name = "protobuf", marker = "python_full_version >= '3.11'" },
{ name = "sympy", marker = "python_full_version >= '3.11'" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/be/90/99566dc6398028e7691a5b12720fd85f757a0901818b84599d28abb3f085/onnxruntime_directml-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:96642a787e5a6f33bf043521c0f06eb1eb663f6b830e5862a2026d03f9c90543", size = 25106000, upload-time = "2026-03-17T21:47:15.438Z" },
{ url = "https://files.pythonhosted.org/packages/88/ea/33814eb0ec96775eda4c1d30b0d86e91d7d2cd0d84c66d3915aef0e06fa3/onnxruntime_directml-1.24.4-cp312-cp312-win_amd64.whl", hash = "sha256:f2ecb68b7b7b259d2ef3112ae760149f9b5a1e7c0fbb73d539da6250a648a614", size = 25111930, upload-time = "2026-03-17T21:47:18.419Z" },
{ url = "https://files.pythonhosted.org/packages/60/53/2bd2696fac19cf8ca55496a0bcfe431f3aff9579eabbb0e231dc238acf6f/onnxruntime_directml-1.24.4-cp313-cp313-win_amd64.whl", hash = "sha256:2f1031cb2281e5b27cca9efe0b9399317c7286e4d226f7a79d4ab79bbd94d19e", size = 25112253, upload-time = "2026-03-17T21:47:22.043Z" },
{ url = "https://files.pythonhosted.org/packages/b7/04/816932a3ade867a687e406716ca76e0774c6b921545b45818e3ebfcc54ce/onnxruntime_directml-1.24.4-cp314-cp314-win_amd64.whl", hash = "sha256:51d86bb949488e572b00422f344990a4a81d982416d73b6c0e4ced2bcd423d19", size = 25571098, upload-time = "2026-03-17T21:47:25.461Z" },
]
[[package]]
name = "onnxruntime-gpu"
version = "1.20.2"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"python_full_version < '3.10'",
]
dependencies = [
{ name = "coloredlogs", marker = "python_full_version < '3.10'" },
{ name = "flatbuffers", marker = "python_full_version < '3.10'" },
{ name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
{ name = "packaging", marker = "python_full_version < '3.10'" },
{ name = "protobuf", marker = "python_full_version < '3.10'" },
{ name = "sympy", marker = "python_full_version < '3.10'" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/04/ad/4e5534dcaafe36f596792ebd0049177f7f0b7afa0f696505974ed1d6f72c/onnxruntime_gpu-1.20.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dfba508f110ec062dedfd3032e6eee8cde325026e9d7c5792884e8b9d4ebb9c3", size = 291522233, upload-time = "2025-03-07T05:46:08.901Z" },
{ url = "https://files.pythonhosted.org/packages/a5/2a/8afc5aee996fd33fb816bc3067fdbde96a2a7520d4c275fa502f3aef7e54/onnxruntime_gpu-1.20.2-cp310-cp310-win_amd64.whl", hash = "sha256:75a7557292b2741e63fb73236ee84faa08075cead52d9a8d302a67036fc64f16", size = 279696089, upload-time = "2025-03-07T05:39:24.924Z" },
{ url = "https://files.pythonhosted.org/packages/5e/53/9341b875b0ed29953485b43713e94b335a449c3770fed67dddb3c9b84af0/onnxruntime_gpu-1.20.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85057c7006457bee14fc2a57417b7e4f396f10d9c1b08b11aae08ac2b825eeda", size = 291518407, upload-time = "2025-03-07T05:46:22.943Z" },
{ url = "https://files.pythonhosted.org/packages/0b/7a/0999993ceae7bf191d5d63a4e1b2208596763d8e586aa7dc5cc091f960c0/onnxruntime_gpu-1.20.2-cp311-cp311-win_amd64.whl", hash = "sha256:d0eafd873e4336949c89e6c7429a68e7e1d0233d9cb363e9780ca76c3c6f865c", size = 279697437, upload-time = "2025-03-07T05:39:38.418Z" },
{ url = "https://files.pythonhosted.org/packages/5b/db/c1fcdf45cad147d3b3609cf66a1c6083b54382f58a41d7fc526cd5909090/onnxruntime_gpu-1.20.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aa66d2e6de13fe6f4d1554b1c219bd2e4778b540ed9d3dc62957c95a8af43d66", size = 291510804, upload-time = "2025-03-07T05:46:36.178Z" },
{ url = "https://files.pythonhosted.org/packages/27/67/4f979650557738a8b148dd7e0b82522d20ffcfb2c3964141c861a61e82c7/onnxruntime_gpu-1.20.2-cp312-cp312-win_amd64.whl", hash = "sha256:564a6a1187b208012f57c3bb3723ba65f6bc5cddff6e6b917ac96865768b39f5", size = 279699596, upload-time = "2025-03-07T05:39:50.858Z" },
{ url = "https://files.pythonhosted.org/packages/48/a4/60f0cf16b24f05d123f90525408a705741fa92e0c38ab122cdf1d239e3fe/onnxruntime_gpu-1.20.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6af5b30b9b0e729d3ca1dfff493a39771f143cfc22af1d77d487022033cae284", size = 291511859, upload-time = "2025-03-07T05:46:49.302Z" },
{ url = "https://files.pythonhosted.org/packages/ab/a2/0eb7a3fa417adc7af0be73b0ea35f1f0d6f92e3722eb6468e36dfe2e762d/onnxruntime_gpu-1.20.2-cp313-cp313-win_amd64.whl", hash = "sha256:6ffe5108d2dbd96a9a40bf76573219e04b67d0330aa93ca5114f1478185ade19", size = 279697061, upload-time = "2025-03-07T05:40:03.559Z" },
{ url = "https://files.pythonhosted.org/packages/4e/de/6c692ac8604a451011a2a01e35e94f84bea8775ef97f6830985bbe8de172/onnxruntime_gpu-1.20.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:407e5b7a21d656aac6f994d2e329f5577eb3d7f98b63aa1e49e71a702ffa1da1", size = 291502464, upload-time = "2025-03-07T05:47:03.191Z" },
]
[[package]]
name = "onnxruntime-gpu"
version = "1.24.3"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"python_full_version == '3.10.*'",
]
dependencies = [
{ name = "flatbuffers", marker = "python_full_version == '3.10.*'" },
{ name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
{ name = "packaging", marker = "python_full_version == '3.10.*'" },
{ name = "protobuf", marker = "python_full_version == '3.10.*'" },
{ name = "sympy", marker = "python_full_version == '3.10.*'" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/28/f4/c8050f3f4916ab6c75432724f0ba51c1548dc1c3d66d40c0f8a9611e370f/onnxruntime_gpu-1.24.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ac922633819e1cdc81c9b3a28b5e37d788805307bbaa708a01a3d7150e345625", size = 252750845, upload-time = "2026-03-05T16:35:33.604Z" },
{ url = "https://files.pythonhosted.org/packages/07/b7/81e8936354651915192a362a1718253c6d03da6b902a95237aa392b1d260/onnxruntime_gpu-1.24.3-cp311-cp311-win_amd64.whl", hash = "sha256:0fe6ece3042db149f36f4991cbebd19a690b7ffd82af89450a261b47f4704a37", size = 207192429, upload-time = "2026-03-05T16:39:57.015Z" },
{ url = "https://files.pythonhosted.org/packages/24/fa/58ceca812214c9c1a286407c376e42e0b7de3e2c6e14b61cdf3caf6d6d9c/onnxruntime_gpu-1.24.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:537bdd6d95006a9200ae81f2e73ba9e621e723fdf0deb5901e2e62fb2cccf876", size = 252756089, upload-time = "2026-03-05T16:35:46.004Z" },
{ url = "https://files.pythonhosted.org/packages/3c/07/2f36920b513bd8939e25591153e37d9cfda94115bd119f2874da0750fce2/onnxruntime_gpu-1.24.3-cp312-cp312-win_amd64.whl", hash = "sha256:d72065b3ab5fdaef74d8b6b8f39b7ce20d89731610e3e63cb40e997d3dce177e", size = 207197001, upload-time = "2026-03-05T16:40:05.691Z" },
{ url = "https://files.pythonhosted.org/packages/49/57/9e6206dac76e08f028d2ae95f2ab1b3a7c3317fb6c0374a530aad48dab5c/onnxruntime_gpu-1.24.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3242a70010934e5bb0aeaa9dde4c25c6c2da577b55c6308c0caa828ba3b7be23", size = 252753349, upload-time = "2026-03-05T16:35:58.09Z" },
{ url = "https://files.pythonhosted.org/packages/4e/ae/f0be395602c13a3a8d22fa6632133550a64536c58bc3623abbba5d0a575e/onnxruntime_gpu-1.24.3-cp313-cp313-win_amd64.whl", hash = "sha256:a423b164dbc26cb7f8736367b11698c2a7294748d3c144c39542ecac28d225c9", size = 207197331, upload-time = "2026-03-05T16:40:14.944Z" },
{ url = "https://files.pythonhosted.org/packages/b4/af/a64c9789769d8d7fabc6d35dcce2f2897b2d9e0fe113044efc2903f7cd07/onnxruntime_gpu-1.24.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9696d54974a1313ef0d87f4cbd04f9abfd13839194638d52bb5967a15615341d", size = 252762923, upload-time = "2026-03-05T16:36:10.043Z" },
{ url = "https://files.pythonhosted.org/packages/c1/bb/1cf7dffac2fb01e8de9f0882438165f7543f0aab57f86d1f587e6faa8528/onnxruntime_gpu-1.24.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8ca744f40b33380bc9136988213e574c927d2b919ed42149977e006b138f74f", size = 252754914, upload-time = "2026-03-05T16:36:30.739Z" },
{ url = "https://files.pythonhosted.org/packages/cf/39/3949d56103bd9cd9381de59b060f9bce8dc2c7363f465bf207ebd0c7a5d0/onnxruntime_gpu-1.24.3-cp314-cp314-win_amd64.whl", hash = "sha256:c60c44e2b388720e6670a948b52626f3d089e960ef7da66e4fa6b2b33a11116f", size = 209599131, upload-time = "2026-03-05T16:40:24.074Z" },
{ url = "https://files.pythonhosted.org/packages/f3/60/51bfbcf2d0540dbfa426a73a9b80046b71a63de7303d16c0f2682c8edfd2/onnxruntime_gpu-1.24.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29048407a2398361d93de5537c2d2079d79d720337a0743d4a2cc28db981e776", size = 252764115, upload-time = "2026-03-05T16:36:44.681Z" },
]
[[package]]
name = "onnxruntime-gpu"
version = "1.25.0"
source = { registry = "https://pypi.org/simple" }
resolution-markers = [
"python_full_version >= '3.14'",
"python_full_version == '3.13.*'",
"python_full_version >= '3.11' and python_full_version < '3.13'",
]
dependencies = [
{ name = "flatbuffers", marker = "python_full_version >= '3.11'" },
{ name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
{ name = "packaging", marker = "python_full_version >= '3.11'" },
{ name = "protobuf", marker = "python_full_version >= '3.11'" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/2d/7e/f58f8fc505a876b31fd2a34c1eb8f9863b75bf1589c3297c8efd48b93151/onnxruntime_gpu-1.25.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8625bb31ee2d88524414e7458cc604f4f958f323ef8832cc00882f6cd42b9a1", size = 270337732, upload-time = "2026-04-22T17:27:59.993Z" },
{ url = "https://files.pythonhosted.org/packages/55/5d/2561b3aa667d87a4ae9cd01c5a565955aab5a3d44a6076f723beb9cdde0a/onnxruntime_gpu-1.25.0-cp311-cp311-win_amd64.whl", hash = "sha256:2e702159a025aa5c69f0b747adf9a451e0c9e4b20120163a918c8459d3171b87", size = 220845585, upload-time = "2026-04-22T17:20:38.939Z" },
{ url = "https://files.pythonhosted.org/packages/1d/6d/2c13d3eff74caa9e59820a044a75becd34e9cbeeaf7617ad7679cdb1fdb7/onnxruntime_gpu-1.25.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f0c36c63c8b0eb4091f2567067f480f66f0aedc189eb009545c98ce7e919056", size = 270342429, upload-time = "2026-04-22T17:28:10.526Z" },
{ url = "https://files.pythonhosted.org/packages/8c/2e/9fc303ae59d4caeb85ec3cea6881b7de8ca1d2a07140fade39913cd7ff10/onnxruntime_gpu-1.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:61178cc4d84f59861714554531e01cccbd33ddf13cc0e87a3adea13b24d297ce", size = 220847708, upload-time = "2026-04-22T17:20:47.993Z" },
{ url = "https://files.pythonhosted.org/packages/f5/15/e63fe7b1abad6884bed07e9bb333e9f0ea48fbb8cbc1ea4a67ee6019d5d0/onnxruntime_gpu-1.25.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e462eb13ee9955117baec4f518916c1e7cb1a96001114105632bc6d454c6aee6", size = 270342324, upload-time = "2026-04-22T17:28:21.142Z" },
{ url = "https://files.pythonhosted.org/packages/21/10/b3533243d062b589d4b1f3ae26584af332c5cde618e7f6f5ff6fabbfd5f2/onnxruntime_gpu-1.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:9a3682158e5e911385252eb95d6332b6f525972746c582e10f8a78213b39e624", size = 220848188, upload-time = "2026-04-22T17:20:56.946Z" },
{ url = "https://files.pythonhosted.org/packages/35/6c/d7706dd1d0eaafdba44d5c89f8d952de41e425a1b0cbd3ecfa60f918c249/onnxruntime_gpu-1.25.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8514b92c5929c953850090d823d018770cba2a971efab5f8f69a3c4280cdc632", size = 270364210, upload-time = "2026-04-22T17:28:33.568Z" },
{ url = "https://files.pythonhosted.org/packages/37/01/9f1b16ea857e3a4b5e82a2d70b52ea46a0083569f737d840f74a1b86818f/onnxruntime_gpu-1.25.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ffe9df4016b061ec3a5565a4fc08cdb86808cd8b9c255c42301066c0c24a81b5", size = 270345126, upload-time = "2026-04-22T17:28:44.416Z" },
{ url = "https://files.pythonhosted.org/packages/56/c8/aae22f3c9cea9160d8d969734a1927720fcb4d4ad4abe269c407c1d2b63c/onnxruntime_gpu-1.25.0-cp314-cp314-win_amd64.whl", hash = "sha256:2173b71631208177fe704ce2d92eac3acbf758285327247ea40a31a9f0bcc073", size = 223385369, upload-time = "2026-04-22T17:21:06.026Z" },
{ url = "https://files.pythonhosted.org/packages/ed/0a/79fba6a1a32803a2bf8b99187e0ea5d5d69ffe0c5c0f469bde232ceb8327/onnxruntime_gpu-1.25.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8576c721c600cc669717a2ae49af30fdfff230480099653adc7b79d58a240852", size = 270364130, upload-time = "2026-04-22T17:28:54.708Z" },
]
[[package]]
name = "opentelemetry-api"
version = "1.40.0"