Merge pull request #1185 from MemPalace/perf/batched-upsert-gpu

perf(mining): batch per-chunk upserts + optional GPU acceleration
2026-04-24 20:34:28 -03:00
parent 7a757916b3 031512438e
commit ed2ba726c9
12 changed files with 995 additions and 66 deletions
@@ -0,0 +1,301 @@
+"""Mining throughput benchmark: per-chunk vs batched upsert, CPU vs GPU.
+
+Compares the legacy per-chunk ``add_drawer`` loop against the batched
+``collection.upsert`` path introduced in the "batched upsert + GPU" PR.
+Runs both paths on an identical seeded synthetic corpus, reports
+wall-clock time + drawers/sec, and prints a markdown table suitable
+for pasting into a PR description.
+
+Usage
+-----
+
+    # CPU (whatever onnxruntime is installed — CPU if you don't have
+    # onnxruntime-gpu):
+    uv run python benchmarks/mine_bench.py
+
+    # GPU (NVIDIA):
+    uv venv /tmp/gpu && source /tmp/gpu/bin/activate
+    uv pip install -e '.[gpu]' 'nvidia-cudnn-cu12>=9,<10' \\
+        'nvidia-cuda-runtime-cu12' 'nvidia-cublas-cu12'
+    export LD_LIBRARY_PATH=$(python -c "import nvidia.cudnn, os; \\
+        print(os.path.dirname(nvidia.cudnn.__file__)+'/lib')"):$LD_LIBRARY_PATH
+    MEMPALACE_EMBEDDING_DEVICE=cuda python benchmarks/mine_bench.py
+
+Flags
+-----
+
+    --device cpu|cuda|coreml|dml|auto   Override MEMPALACE_EMBEDDING_DEVICE
+    --scenarios small,medium,large      Which scenarios to run
+    --seed 42                           RNG seed for reproducibility
+"""
+
+from __future__ import annotations
+
+import argparse
+import hashlib
+import os
+import random
+import shutil
+import string
+import sys
+import tempfile
+import time
+from datetime import datetime
+from pathlib import Path
+
+
+def build_corpus(dest: Path, n_files: int, paragraphs_per_file: int, seed: int) -> None:
+    """Generate ``n_files`` markdown files of random words under ``dest``."""
+    rng = random.Random(seed)
+    dest.mkdir(parents=True, exist_ok=True)
+    for i in range(n_files):
+        paragraphs = []
+        for _ in range(paragraphs_per_file):
+            words = [
+                "".join(rng.choices(string.ascii_lowercase, k=rng.randint(3, 10)))
+                for _ in range(12)
+            ]
+            paragraphs.append(" ".join(words))
+        (dest / f"doc_{i:03d}.md").write_text("\n\n".join(paragraphs))
+    (dest / "mempalace.yaml").write_text(
+        "wing: bench\n"
+        "rooms:\n"
+        "  - name: general\n"
+        "    description: all\n"
+        "    keywords: [general]\n"
+    )
+
+
+def _process_file_unbatched(filepath, project_path, collection, wing, rooms, agent, closets_col):
+    """Legacy per-chunk upsert path (pre-batching).
+
+    Reproduces the exact loop shape the miner used before this PR so the
+    comparison is apples-to-apples; only the upsert granularity differs.
+    """
+    from mempalace import miner
+    from mempalace.palace import (
+        build_closet_lines,
+        file_already_mined,
+        mine_lock,
+        purge_file_closets,
+        upsert_closet_lines,
+    )
+
+    source_file = str(filepath)
+    if file_already_mined(collection, source_file, check_mtime=True):
+        return 0, "general"
+    try:
+        content = filepath.read_text(encoding="utf-8", errors="replace")
+    except OSError:
+        return 0, "general"
+    content = content.strip()
+    if len(content) < miner.MIN_CHUNK_SIZE:
+        return 0, "general"
+    room = miner.detect_room(filepath, content, rooms, project_path)
+    chunks = miner.chunk_text(content, source_file)
+
+    with mine_lock(source_file):
+        if file_already_mined(collection, source_file, check_mtime=True):
+            return 0, room
+        try:
+            collection.delete(where={"source_file": source_file})
+        except Exception:
+            pass
+        drawers_added = 0
+        for chunk in chunks:
+            miner.add_drawer(
+                collection=collection,
+                wing=wing,
+                room=room,
+                content=chunk["content"],
+                source_file=source_file,
+                chunk_index=chunk["chunk_index"],
+                agent=agent,
+            )
+            drawers_added += 1
+        if closets_col and drawers_added > 0:
+            drawer_ids = [
+                f"drawer_{wing}_{room}_"
+                f"{hashlib.sha256((source_file + str(c['chunk_index'])).encode()).hexdigest()[:24]}"
+                for c in chunks
+            ]
+            closet_lines = build_closet_lines(source_file, drawer_ids, content, wing, room)
+            closet_id_base = (
+                f"closet_{wing}_{room}_"
+                f"{hashlib.sha256(source_file.encode()).hexdigest()[:24]}"
+            )
+            closet_meta = {
+                "wing": wing,
+                "room": room,
+                "source_file": source_file,
+                "drawer_count": drawers_added,
+                "filed_at": datetime.now().isoformat(),
+                "normalize_version": miner.NORMALIZE_VERSION,
+            }
+            purge_file_closets(closets_col, source_file)
+            upsert_closet_lines(closets_col, closet_id_base, closet_lines, closet_meta)
+    return drawers_added, room
+
+
+def mine_once(project_dir: str, palace_path: str, batched: bool) -> tuple[int, float]:
+    """Mine a project dir with either the batched (new) or per-chunk (old) path."""
+    from mempalace import miner
+    from mempalace.miner import load_config, scan_project
+    from mempalace.palace import get_closets_collection, get_collection
+
+    project_path = Path(project_dir).resolve()
+    config = load_config(project_dir)
+    wing = config["wing"]
+    rooms = config.get("rooms", [])
+    files = scan_project(project_dir)
+    collection = get_collection(palace_path)
+    closets = get_closets_collection(palace_path)
+
+    total = 0
+    t0 = time.perf_counter()
+    for filepath in files:
+        if batched:
+            drawers, _ = miner.process_file(
+                filepath=filepath,
+                project_path=project_path,
+                collection=collection,
+                wing=wing,
+                rooms=rooms,
+                agent="bench",
+                dry_run=False,
+                closets_col=closets,
+            )
+        else:
+            drawers, _ = _process_file_unbatched(
+                filepath, project_path, collection, wing, rooms, "bench", closets
+            )
+        total += drawers
+    return total, time.perf_counter() - t0
+
+
+def _reset_backend_caches() -> None:
+    """Drop the in-process client cache so each run pays cold-open cost equally."""
+    from mempalace.palace import _DEFAULT_BACKEND
+
+    _DEFAULT_BACKEND._clients.clear()
+    _DEFAULT_BACKEND._freshness.clear()
+
+
+def run_scenario(label: str, n_files: int, paragraphs_per_file: int, seed: int) -> dict:
+    """Run one scenario under both code paths and return a result dict."""
+    print(f"\n=== {label}: {n_files} files × {paragraphs_per_file} paragraphs ===")
+    results = {}
+    for mode in ("unbatched", "batched"):
+        tmp = Path(tempfile.mkdtemp(prefix=f"mp_{mode}_"))
+        try:
+            proj = tmp / "proj"
+            palace = tmp / "palace"
+            build_corpus(proj, n_files, paragraphs_per_file, seed=seed)
+            _reset_backend_caches()
+            drawers, dt = mine_once(str(proj), str(palace), batched=(mode == "batched"))
+            rate = drawers / dt if dt > 0 else 0.0
+            results[mode] = (drawers, dt, rate)
+            print(f"  {mode:10} {drawers:5} drawers in {dt:6.2f}s  →  {rate:7.1f} drawers/sec")
+        finally:
+            shutil.rmtree(tmp, ignore_errors=True)
+
+    _, t_u, r_u = results["unbatched"]
+    d_b, t_b, r_b = results["batched"]
+    speedup = t_u / t_b if t_b > 0 else 0.0
+    print(f"  speedup:   {speedup:.2f}× ({t_u:.2f}s → {t_b:.2f}s)")
+    return {
+        "label": label,
+        "n_files": n_files,
+        "paragraphs": paragraphs_per_file,
+        "drawers": d_b,
+        "unbatched_time": t_u,
+        "unbatched_rate": r_u,
+        "batched_time": t_b,
+        "batched_rate": r_b,
+        "speedup": speedup,
+    }
+
+
+SCENARIOS = {
+    "small":  ("Small files (~50 paragraphs)",  10, 50),
+    "medium": ("Medium files (~200 paragraphs)", 20, 200),
+    "large":  ("Large files (~500 paragraphs)",  10, 500),
+}
+
+
+def _env_summary(device_label: str) -> list[str]:
+    """Short hardware + version lines included with the printed table."""
+    import platform
+
+    try:
+        import chromadb
+
+        chromadb_v = chromadb.__version__
+    except Exception:
+        chromadb_v = "?"
+    try:
+        import onnxruntime as ort
+
+        ort_v = ort.__version__
+        providers = ",".join(p.replace("ExecutionProvider", "") for p in ort.get_available_providers())
+    except Exception:
+        ort_v = "?"
+        providers = "?"
+
+    return [
+        f"device: **{device_label}** (onnxruntime {ort_v}, providers={providers})",
+        f"chromadb {chromadb_v} · python {sys.version.split()[0]} · {platform.platform()}",
+    ]
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description=__doc__.split("\n\n", 1)[0])
+    parser.add_argument(
+        "--device",
+        default=None,
+        help="Override MEMPALACE_EMBEDDING_DEVICE (cpu|cuda|coreml|dml|auto)",
+    )
+    parser.add_argument(
+        "--scenarios",
+        default="small,medium,large",
+        help="Comma-separated scenario names (default: all)",
+    )
+    parser.add_argument("--seed", type=int, default=42)
+    args = parser.parse_args()
+
+    if args.device:
+        os.environ["MEMPALACE_EMBEDDING_DEVICE"] = args.device
+
+    from mempalace.embedding import describe_device, get_embedding_function
+
+    device_label = describe_device()
+    print(f"Warming up ONNX model on device={device_label}...")
+    ef = get_embedding_function()
+    ef(["warmup sentence one", "warmup sentence two"])
+
+    picked = [s.strip() for s in args.scenarios.split(",") if s.strip()]
+    results = []
+    for key in picked:
+        if key not in SCENARIOS:
+            print(f"Unknown scenario {key!r}; choices: {sorted(SCENARIOS)}", file=sys.stderr)
+            sys.exit(2)
+        label, n_files, paras = SCENARIOS[key]
+        results.append(run_scenario(label, n_files, paras, args.seed))
+
+    print("\n\n## Mining benchmark\n")
+    for line in _env_summary(device_label):
+        print(line + "  ")
+    print()
+    print("| Scenario | Files | Drawers | Per-chunk (old) | Batched (new) | Speedup |")
+    print("| --- | ---: | ---: | ---: | ---: | ---: |")
+    for r in results:
+        print(
+            f"| {r['label']} | {r['n_files']} | {r['drawers']} | "
+            f"{r['unbatched_time']:.2f}s · {r['unbatched_rate']:.0f} drw/s | "
+            f"{r['batched_time']:.2f}s · {r['batched_rate']:.0f} drw/s | "
+            f"**{r['speedup']:.2f}×** |"
+        )
+
+
+if __name__ == "__main__":
+    main()
@@ -405,6 +405,23 @@ class ChromaBackend(BaseBackend):
        self._freshness: dict[str, tuple[int, float]] = {}
        self._closed = False

+    @staticmethod
+    def _resolve_embedding_function():
+        """Return the EF for the user's ``embedding_device`` setting.
+
+        Both ``get_collection`` and ``get_or_create_collection`` must receive
+        the EF explicitly — ChromaDB 1.x does not persist it with the
+        collection, so a reader that omits the argument silently gets the
+        library default and its queries won't match the writer's vectors.
+        """
+        try:
+            from ..embedding import get_embedding_function
+
+            return get_embedding_function()
+        except Exception:
+            logger.exception("Failed to build embedding function; using chromadb default")
+            return None
+
    # ------------------------------------------------------------------
    # Internal helpers
    # ------------------------------------------------------------------
@@ -532,12 +549,15 @@ class ChromaBackend(BaseBackend):
        if options and isinstance(options, dict):
            hnsw_space = options.get("hnsw_space", hnsw_space)

+        ef = self._resolve_embedding_function()
+        ef_kwargs = {"embedding_function": ef} if ef is not None else {}
+
        if create:
            collection = client.get_or_create_collection(
-                collection_name, metadata={"hnsw:space": hnsw_space}
+                collection_name, metadata={"hnsw:space": hnsw_space}, **ef_kwargs
            )
        else:
-            collection = client.get_collection(collection_name)
+            collection = client.get_collection(collection_name, **ef_kwargs)
        return ChromaCollection(collection)

    def close_palace(self, palace) -> None:
@@ -578,8 +598,10 @@ class ChromaBackend(BaseBackend):
        self, palace_path: str, collection_name: str, hnsw_space: str = "cosine"
    ) -> ChromaCollection:
        """Create (not get-or-create) ``collection_name`` with the given HNSW space."""
+        ef = self._resolve_embedding_function()
+        ef_kwargs = {"embedding_function": ef} if ef is not None else {}
        collection = self._client(palace_path).create_collection(
-            collection_name, metadata={"hnsw:space": hnsw_space}
+            collection_name, metadata={"hnsw:space": hnsw_space}, **ef_kwargs
        )
        return ChromaCollection(collection)

@@ -236,6 +236,23 @@ class MempalaceConfig:
            pass
        return normalized

+    @property
+    def embedding_device(self):
+        """Hardware device for the ONNX embedding model.
+
+        Values: ``"auto"`` (default), ``"cpu"``, ``"cuda"``, ``"coreml"``,
+        ``"dml"``. Read from env ``MEMPALACE_EMBEDDING_DEVICE`` first, then
+        ``embedding_device`` in ``config.json``, then ``"auto"``.
+
+        ``auto`` resolves to the first available accelerator at runtime via
+        :mod:`mempalace.embedding`; requesting an unavailable accelerator
+        logs a warning and falls back to CPU.
+        """
+        env_val = os.environ.get("MEMPALACE_EMBEDDING_DEVICE")
+        if env_val:
+            return env_val.strip().lower()
+        return str(self._file_config.get("embedding_device", "auto")).strip().lower()
+
    @property
    def hook_silent_save(self):
        """Whether the stop hook saves directly (True) or blocks for MCP calls (False)."""
@@ -55,6 +55,7 @@ CONVO_EXTENSIONS = {

 MIN_CHUNK_SIZE = 30
 CHUNK_SIZE = 800  # chars per drawer — align with miner.py
+DRAWER_UPSERT_BATCH_SIZE = 1000
 MAX_FILE_SIZE = 500 * 1024 * 1024  # 500 MB — skip files larger than this.
 # Matches miner.py at 500 MB. Long Claude Code sessions, multi-year
 # ChatGPT exports, and lifetime Slack dumps routinely exceed 10 MB; the
@@ -332,31 +333,43 @@ def _file_chunks_locked(collection, source_file, chunks, wing, room, agent, extr
        except Exception:
            pass

-        for chunk in chunks:
-            chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room
-            if extract_mode == "general":
-                room_counts_delta[chunk_room] += 1
-            drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}"
+        # Batch chunks into bounded upserts so large transcripts keep most of
+        # the embedding speedup without one huge Chroma/SQLite request. Keep
+        # one filed_at per source file so all transcript drawers share an
+        # ingest timestamp.
+        filed_at = datetime.now().isoformat()
+        for batch_start in range(0, len(chunks), DRAWER_UPSERT_BATCH_SIZE):
+            batch_docs: list = []
+            batch_ids: list = []
+            batch_metas: list = []
+            for chunk in chunks[batch_start : batch_start + DRAWER_UPSERT_BATCH_SIZE]:
+                chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room
+                if extract_mode == "general":
+                    room_counts_delta[chunk_room] += 1
+                drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}"
+                batch_docs.append(chunk["content"])
+                batch_ids.append(drawer_id)
+                batch_metas.append(
+                    {
+                        "wing": wing,
+                        "room": chunk_room,
+                        "hall": _detect_hall_cached(chunk["content"]),
+                        "source_file": source_file,
+                        "chunk_index": chunk["chunk_index"],
+                        "added_by": agent,
+                        "filed_at": filed_at,
+                        "ingest_mode": "convos",
+                        "extract_mode": extract_mode,
+                        "normalize_version": NORMALIZE_VERSION,
+                    }
+                )
            try:
                collection.upsert(
-                    documents=[chunk["content"]],
-                    ids=[drawer_id],
-                    metadatas=[
-                        {
-                            "wing": wing,
-                            "room": chunk_room,
-                            "hall": _detect_hall_cached(chunk["content"]),
-                            "source_file": source_file,
-                            "chunk_index": chunk["chunk_index"],
-                            "added_by": agent,
-                            "filed_at": datetime.now().isoformat(),
-                            "ingest_mode": "convos",
-                            "extract_mode": extract_mode,
-                            "normalize_version": NORMALIZE_VERSION,
-                        }
-                    ],
+                    documents=batch_docs,
+                    ids=batch_ids,
+                    metadatas=batch_metas,
                )
-                drawers_added += 1
+                drawers_added += len(batch_docs)
            except Exception as e:
                if "already exists" not in str(e).lower():
                    raise
@@ -0,0 +1,155 @@
+"""Embedding function factory with hardware acceleration.
+
+Returns a ChromaDB-compatible embedding function bound to a user-selected
+ONNX Runtime execution provider. The same ``all-MiniLM-L6-v2`` model and
+384-dim vectors ChromaDB ships by default are reused, so switching device
+does not invalidate existing palaces.
+
+Supported devices (env ``MEMPALACE_EMBEDDING_DEVICE`` or ``embedding_device``
+in ``~/.mempalace/config.json``):
+
+* ``auto`` — prefer CUDA ▸ CoreML ▸ DirectML, fall back to CPU
+* ``cpu`` — force CPU (the historical default)
+* ``cuda`` — NVIDIA GPU via ``onnxruntime-gpu`` (``pip install mempalace[gpu]``)
+* ``coreml`` — Apple Neural Engine (macOS)
+* ``dml`` — DirectML (Windows / AMD / Intel GPUs)
+
+Requesting an unavailable accelerator emits a warning and falls back to CPU
+rather than hard-failing — mining must still work on a laptop without CUDA.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+_PROVIDER_MAP = {
+    "cpu": ["CPUExecutionProvider"],
+    "cuda": ["CUDAExecutionProvider", "CPUExecutionProvider"],
+    "coreml": ["CoreMLExecutionProvider", "CPUExecutionProvider"],
+    "dml": ["DmlExecutionProvider", "CPUExecutionProvider"],
+}
+
+_DEVICE_EXTRA = {
+    "cuda": "mempalace[gpu]",
+    "coreml": "mempalace[coreml]",
+    "dml": "mempalace[dml]",
+}
+
+_AUTO_ORDER = [
+    ("CUDAExecutionProvider", "cuda"),
+    ("CoreMLExecutionProvider", "coreml"),
+    ("DmlExecutionProvider", "dml"),
+]
+
+_EF_CACHE: dict = {}
+_WARNED: set = set()
+
+
+def _resolve_providers(device: str) -> tuple[list, str]:
+    """Return ``(provider_list, effective_device)`` for ``device``.
+
+    Falls back to CPU (with a one-shot warning) when the requested
+    accelerator is not compiled into the installed ``onnxruntime``.
+    """
+    device = (device or "auto").strip().lower()
+
+    try:
+        import onnxruntime as ort
+
+        available = set(ort.get_available_providers())
+    except ImportError:
+        return (["CPUExecutionProvider"], "cpu")
+
+    if device == "auto":
+        for provider, name in _AUTO_ORDER:
+            if provider in available:
+                return ([provider, "CPUExecutionProvider"], name)
+        return (["CPUExecutionProvider"], "cpu")
+
+    requested = _PROVIDER_MAP.get(device)
+    if requested is None:
+        if device not in _WARNED:
+            logger.warning("Unknown embedding_device %r — falling back to cpu", device)
+            _WARNED.add(device)
+        return (["CPUExecutionProvider"], "cpu")
+
+    preferred = requested[0]
+    if preferred == "CPUExecutionProvider":
+        return (requested, "cpu")
+
+    if preferred not in available:
+        if device not in _WARNED:
+            extra = _DEVICE_EXTRA.get(device, "the matching mempalace extra for your device")
+            logger.warning(
+                "embedding_device=%r requested but %s is not installed — "
+                "falling back to CPU. Install %s.",
+                device,
+                preferred,
+                extra,
+            )
+            _WARNED.add(device)
+        return (["CPUExecutionProvider"], "cpu")
+
+    return (requested, device)
+
+
+def _build_ef_class():
+    """Subclass ``ONNXMiniLM_L6_V2`` with name ``"default"``.
+
+    Why the rename: ChromaDB 1.5 persists the EF identity on the collection
+    and rejects reads that pass a differently-named EF (``onnx_mini_lm_l6_v2``
+    vs ``default``). The vectors and model are identical — only the
+    ``name()`` tag differs — so spoofing the name lets one EF class serve
+    palaces created with ``DefaultEmbeddingFunction`` *and* palaces we
+    create ourselves, with the same GPU-capable ``preferred_providers``.
+    """
+    from chromadb.utils.embedding_functions import ONNXMiniLM_L6_V2
+
+    class _MempalaceONNX(ONNXMiniLM_L6_V2):
+        @staticmethod
+        def name() -> str:
+            return "default"
+
+    return _MempalaceONNX
+
+
+def get_embedding_function(device: Optional[str] = None):
+    """Return a cached embedding function bound to the requested device.
+
+    ``device=None`` reads from :class:`MempalaceConfig.embedding_device`.
+    The returned function is shared across calls with the same resolved
+    provider list so we only pay model-load cost once per process.
+    """
+    if device is None:
+        from .config import MempalaceConfig
+
+        device = MempalaceConfig().embedding_device
+
+    providers, effective = _resolve_providers(device)
+    cache_key = tuple(providers)
+    cached = _EF_CACHE.get(cache_key)
+    if cached is not None:
+        return cached
+
+    ef_cls = _build_ef_class()
+    ef = ef_cls(preferred_providers=providers)
+    _EF_CACHE[cache_key] = ef
+    logger.info("Embedding function initialized (device=%s providers=%s)", effective, providers)
+    return ef
+
+
+def describe_device(device: Optional[str] = None) -> str:
+    """Return a short human-readable label for the resolved device.
+
+    Used by the miner CLI header so users can see at a glance whether GPU
+    acceleration actually engaged.
+    """
+    if device is None:
+        from .config import MempalaceConfig
+
+        device = MempalaceConfig().embedding_device
+    _, effective = _resolve_providers(device)
+    return effective
@@ -14,6 +14,7 @@ import fnmatch
 from pathlib import Path
 from datetime import datetime
 from collections import defaultdict
+from typing import Optional

 from .palace import (
    NORMALIZE_VERSION,
@@ -64,6 +65,7 @@ SKIP_FILENAMES = {
 CHUNK_SIZE = 800  # chars per drawer
 CHUNK_OVERLAP = 100  # overlap between chunks
 MIN_CHUNK_SIZE = 50  # skip tiny chunks
+DRAWER_UPSERT_BATCH_SIZE = 1000
 MAX_FILE_SIZE = 500 * 1024 * 1024  # 500 MB — skip files larger than this.
 # Long Claude Code sessions and large transcript exports routinely exceed
 # 10 MB. The cap exists as a defensive rail against pathological binary
@@ -633,40 +635,62 @@ def _extract_entities_for_metadata(content: str) -> str:
    return ";".join(capped)


+def _build_drawer_metadata(
+    wing: str,
+    room: str,
+    source_file: str,
+    chunk_index: int,
+    agent: str,
+    content: str,
+    source_mtime: Optional[float],
+) -> dict:
+    """Build the metadata dict for one drawer without upserting.
+
+    Split out from ``add_drawer`` so ``process_file`` can batch all chunks
+    of a file into a single ``collection.upsert`` — one embedding forward
+    pass per batch instead of per chunk.
+    """
+    metadata = {
+        "wing": wing,
+        "room": room,
+        "source_file": source_file,
+        "chunk_index": chunk_index,
+        "added_by": agent,
+        "filed_at": datetime.now().isoformat(),
+        "normalize_version": NORMALIZE_VERSION,
+    }
+    if source_mtime is not None:
+        metadata["source_mtime"] = source_mtime
+    metadata["hall"] = detect_hall(content)
+    entities = _extract_entities_for_metadata(content)
+    if entities:
+        metadata["entities"] = entities
+    return metadata
+
+
 def add_drawer(
    collection, wing: str, room: str, content: str, source_file: str, chunk_index: int, agent: str
 ):
-    """Add one drawer to the palace."""
+    """Add one drawer to the palace.
+
+    Kept for backward compatibility with external callers. In-tree the
+    miner uses ``_build_drawer_metadata`` + a batched ``collection.upsert``
+    to amortize the embedding model's forward-pass cost across chunks.
+    """
    drawer_id = f"drawer_{wing}_{room}_{hashlib.sha256((source_file + str(chunk_index)).encode()).hexdigest()[:24]}"
    try:
-        metadata = {
-            "wing": wing,
-            "room": room,
-            "source_file": source_file,
-            "chunk_index": chunk_index,
-            "added_by": agent,
-            "filed_at": datetime.now().isoformat(),
-            "normalize_version": NORMALIZE_VERSION,
-        }
-        # Store file mtime so we can detect modifications later.
-        try:
-            metadata["source_mtime"] = os.path.getmtime(source_file)
-        except OSError:
-            pass
-        # Tag with hall for graph connectivity within wings
-        metadata["hall"] = detect_hall(content)
-        # Tag with entity names for filterable search
-        entities = _extract_entities_for_metadata(content)
-        if entities:
-            metadata["entities"] = entities
-        collection.upsert(
-            documents=[content],
-            ids=[drawer_id],
-            metadatas=[metadata],
-        )
-        return True
-    except Exception:
-        raise
+        source_mtime = os.path.getmtime(source_file)
+    except OSError:
+        source_mtime = None
+    metadata = _build_drawer_metadata(
+        wing, room, source_file, chunk_index, agent, content, source_mtime
+    )
+    collection.upsert(
+        documents=[content],
+        ids=[drawer_id],
+        metadatas=[metadata],
+    )
+    return True


 # =============================================================================
@@ -725,19 +749,41 @@ def process_file(
        except Exception:
            pass

+        # Batch chunks into bounded upserts so the embedding model sees many
+        # chunks per forward pass without building one huge Chroma/SQLite
+        # request for pathological files. A bad chunk can fail its sub-batch;
+        # that is the deliberate trade-off for amortizing embedding overhead.
+        try:
+            source_mtime = os.path.getmtime(source_file)
+        except OSError:
+            source_mtime = None
+
        drawers_added = 0
-        for chunk in chunks:
-            added = add_drawer(
-                collection=collection,
-                wing=wing,
-                room=room,
-                content=chunk["content"],
-                source_file=source_file,
-                chunk_index=chunk["chunk_index"],
-                agent=agent,
+        for batch_start in range(0, len(chunks), DRAWER_UPSERT_BATCH_SIZE):
+            batch_docs: list = []
+            batch_ids: list = []
+            batch_metas: list = []
+            for chunk in chunks[batch_start : batch_start + DRAWER_UPSERT_BATCH_SIZE]:
+                drawer_id = f"drawer_{wing}_{room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}"
+                batch_docs.append(chunk["content"])
+                batch_ids.append(drawer_id)
+                batch_metas.append(
+                    _build_drawer_metadata(
+                        wing,
+                        room,
+                        source_file,
+                        chunk["chunk_index"],
+                        agent,
+                        chunk["content"],
+                        source_mtime,
+                    )
+                )
+            collection.upsert(
+                documents=batch_docs,
+                ids=batch_ids,
+                metadatas=batch_metas,
            )
-            if added:
-                drawers_added += 1
+            drawers_added += len(batch_docs)

        # Build closet — the searchable index pointing to these drawers.
        # Purge first: a re-mine (mtime change or normalize_version bump) must
@@ -868,6 +914,8 @@ def mine(
    if limit > 0:
        files = files[:limit]

+    from .embedding import describe_device
+
    print(f"\n{'=' * 55}")
    print("  MemPalace Mine")
    print(f"{'=' * 55}")
@@ -875,6 +923,7 @@ def mine(
    print(f"  Rooms:   {', '.join(r['name'] for r in rooms)}")
    print(f"  Files:   {len(files)}")
    print(f"  Palace:  {palace_path}")
+    print(f"  Device:  {describe_device()}")
    if dry_run:
        print("  DRY RUN — nothing will be filed")
    if not respect_gitignore:
@@ -53,6 +53,14 @@ chroma = "mempalace.backends.chroma:ChromaBackend"
 [project.optional-dependencies]
 dev = ["pytest>=7.0", "pytest-cov>=4.0", "ruff>=0.4.0", "psutil>=5.9"]
 spellcheck = ["autocorrect>=2.0"]
+# Hardware acceleration for the ONNX embedding model. Install exactly one:
+#   pip install mempalace[gpu]       — NVIDIA CUDA
+#   pip install mempalace[dml]       — DirectML (Windows AMD/Intel/NVIDIA)
+#   pip install mempalace[coreml]    — macOS Neural Engine
+# After install, set MEMPALACE_EMBEDDING_DEVICE=cuda|dml|coreml (or "auto").
+gpu = ["onnxruntime-gpu>=1.16"]
+dml = ["onnxruntime-directml>=1.16"]
+coreml = ["onnxruntime>=1.16"]

 [dependency-groups]
 dev = ["pytest>=7.0", "pytest-cov>=4.0", "ruff>=0.4.0", "psutil>=5.9"]
@@ -20,6 +20,30 @@ def test_config_from_file():
    assert cfg.palace_path == "/custom/palace"


+def test_embedding_device_defaults_to_auto(monkeypatch):
+    monkeypatch.delenv("MEMPALACE_EMBEDDING_DEVICE", raising=False)
+    cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
+    assert cfg.embedding_device == "auto"
+
+
+def test_embedding_device_from_config_is_normalized(tmp_path, monkeypatch):
+    monkeypatch.delenv("MEMPALACE_EMBEDDING_DEVICE", raising=False)
+    with open(tmp_path / "config.json", "w") as f:
+        json.dump({"embedding_device": "  CUDA  "}, f)
+
+    cfg = MempalaceConfig(config_dir=str(tmp_path))
+    assert cfg.embedding_device == "cuda"
+
+
+def test_embedding_device_env_overrides_config(tmp_path, monkeypatch):
+    with open(tmp_path / "config.json", "w") as f:
+        json.dump({"embedding_device": "cpu"}, f)
+    monkeypatch.setenv("MEMPALACE_EMBEDDING_DEVICE", "  CoreML  ")
+
+    cfg = MempalaceConfig(config_dir=str(tmp_path))
+    assert cfg.embedding_device == "coreml"
+
+
 def test_env_override():
    raw = "/env/palace"
    os.environ["MEMPALACE_PALACE_PATH"] = raw
@@ -1,6 +1,9 @@
 """Unit tests for convo_miner pure functions (no chromadb needed)."""

+import contextlib
+
 from mempalace.convo_miner import (
+    _file_chunks_locked,
    chunk_exchanges,
    detect_convo_room,
    scan_convos,
@@ -111,3 +114,36 @@ class TestScanConvos:
    def test_scan_empty_dir(self, tmp_path):
        files = scan_convos(str(tmp_path))
        assert files == []
+
+
+class TestFileChunksLocked:
+    def test_uses_bounded_upsert_batches(self, monkeypatch):
+        import mempalace.convo_miner as convo_miner
+
+        class FakeCol:
+            def __init__(self):
+                self.batch_sizes = []
+
+            def delete(self, *args, **kwargs):
+                pass
+
+            def upsert(self, documents, ids, metadatas):
+                self.batch_sizes.append(len(documents))
+
+        chunks = [{"content": f"chunk {i} " * 20, "chunk_index": i} for i in range(5)]
+        col = FakeCol()
+        monkeypatch.setattr(convo_miner, "DRAWER_UPSERT_BATCH_SIZE", 2)
+        monkeypatch.setattr(
+            convo_miner, "file_already_mined", lambda collection, source_file: False
+        )
+        monkeypatch.setattr(convo_miner, "mine_lock", lambda source_file: contextlib.nullcontext())
+        monkeypatch.setattr(convo_miner, "_detect_hall_cached", lambda content: "conversations")
+
+        drawers, room_counts, skipped = _file_chunks_locked(
+            col, "chat.txt", chunks, "wing", "general", "agent", "exchange"
+        )
+
+        assert drawers == 5
+        assert dict(room_counts) == {}
+        assert skipped is False
+        assert col.batch_sizes == [2, 2, 1]
@@ -0,0 +1,98 @@
+import pytest
+
+import mempalace.embedding as embedding
+
+
+@pytest.fixture(autouse=True)
+def isolate_embedding_state(monkeypatch):
+    monkeypatch.setattr(embedding, "_EF_CACHE", {})
+    monkeypatch.setattr(embedding, "_WARNED", set())
+
+
+def test_auto_picks_cuda(monkeypatch):
+    monkeypatch.setattr(
+        "onnxruntime.get_available_providers",
+        lambda: ["CUDAExecutionProvider", "CPUExecutionProvider"],
+    )
+
+    assert embedding._resolve_providers("auto") == (
+        ["CUDAExecutionProvider", "CPUExecutionProvider"],
+        "cuda",
+    )
+
+
+def test_auto_falls_to_cpu(monkeypatch):
+    monkeypatch.setattr("onnxruntime.get_available_providers", lambda: ["CPUExecutionProvider"])
+
+    assert embedding._resolve_providers("auto") == (["CPUExecutionProvider"], "cpu")
+
+
+def test_cuda_missing_warns_with_gpu_extra(monkeypatch, caplog):
+    monkeypatch.setattr("onnxruntime.get_available_providers", lambda: ["CPUExecutionProvider"])
+
+    assert embedding._resolve_providers("cuda") == (["CPUExecutionProvider"], "cpu")
+    assert "mempalace[gpu]" in caplog.text
+
+
+def test_coreml_missing_warns_with_coreml_extra(monkeypatch, caplog):
+    monkeypatch.setattr("onnxruntime.get_available_providers", lambda: ["CPUExecutionProvider"])
+
+    assert embedding._resolve_providers("coreml") == (["CPUExecutionProvider"], "cpu")
+    assert "mempalace[coreml]" in caplog.text
+
+
+def test_dml_missing_warns_with_dml_extra(monkeypatch, caplog):
+    monkeypatch.setattr("onnxruntime.get_available_providers", lambda: ["CPUExecutionProvider"])
+
+    assert embedding._resolve_providers("dml") == (["CPUExecutionProvider"], "cpu")
+    assert "mempalace[dml]" in caplog.text
+
+
+def test_unknown_device_warns_once(monkeypatch, caplog):
+    monkeypatch.setattr("onnxruntime.get_available_providers", lambda: ["CPUExecutionProvider"])
+
+    assert embedding._resolve_providers("bogus") == (["CPUExecutionProvider"], "cpu")
+    assert embedding._resolve_providers("bogus") == (["CPUExecutionProvider"], "cpu")
+    assert caplog.text.count("Unknown embedding_device") == 1
+
+
+def test_onnxruntime_import_error_falls_back_to_cpu(monkeypatch):
+    import builtins
+
+    real_import = builtins.__import__
+
+    def fake_import(name, *args, **kwargs):
+        if name == "onnxruntime":
+            raise ImportError("missing")
+        return real_import(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, "__import__", fake_import)
+
+    assert embedding._resolve_providers("cuda") == (["CPUExecutionProvider"], "cpu")
+
+
+def test_get_embedding_function_caches_by_resolved_provider_tuple(monkeypatch):
+    class DummyEF:
+        def __init__(self, preferred_providers):
+            self.preferred_providers = preferred_providers
+
+    monkeypatch.setattr(embedding, "_build_ef_class", lambda: DummyEF)
+    monkeypatch.setattr(
+        embedding, "_resolve_providers", lambda device: (["CPUExecutionProvider"], "cpu")
+    )
+
+    first = embedding.get_embedding_function("cpu")
+    second = embedding.get_embedding_function("auto")
+
+    assert first is second
+    assert first.preferred_providers == ["CPUExecutionProvider"]
+
+
+def test_describe_device_uses_resolved_effective_device(monkeypatch):
+    monkeypatch.setattr(
+        embedding,
+        "_resolve_providers",
+        lambda device: (["CUDAExecutionProvider", "CPUExecutionProvider"], "cuda"),
+    )
+
+    assert embedding.describe_device("auto") == "cuda"
@@ -383,6 +383,46 @@ def test_status_handles_none_metadata_without_crash(tmp_path, capsys):
    assert "WING: proj" in out


+def test_process_file_uses_bounded_upsert_batches(tmp_path, monkeypatch):
+    from mempalace import miner
+
+    class FakeCol:
+        def __init__(self):
+            self.batch_sizes = []
+
+        def get(self, *args, **kwargs):
+            return {"ids": []}
+
+        def delete(self, *args, **kwargs):
+            pass
+
+        def upsert(self, documents, ids, metadatas):
+            self.batch_sizes.append(len(documents))
+
+    source = tmp_path / "src.py"
+    source.write_text("print('hello')\n" * 20, encoding="utf-8")
+    chunks = [{"content": f"chunk {i} " * 20, "chunk_index": i} for i in range(5)]
+    col = FakeCol()
+    monkeypatch.setattr(miner, "DRAWER_UPSERT_BATCH_SIZE", 2)
+    monkeypatch.setattr(miner, "chunk_text", lambda content, source_file: chunks)
+    monkeypatch.setattr(miner, "detect_hall", lambda content: "code")
+    monkeypatch.setattr(miner, "_extract_entities_for_metadata", lambda content: "")
+
+    drawers, room = miner.process_file(
+        source,
+        tmp_path,
+        col,
+        "wing",
+        [{"name": "general", "description": "General"}],
+        "agent",
+        False,
+    )
+
+    assert drawers == 5
+    assert room == "general"
+    assert col.batch_sizes == [2, 2, 1]
+
+
 # ── normalize_version schema gate ───────────────────────────────────────
 #
 # When the normalization pipeline changes shape (e.g., strip_noise lands),
@@ -1178,6 +1178,11 @@ dependencies = [
 ]

 [package.optional-dependencies]
+coreml = [
+    { name = "onnxruntime", version = "1.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "onnxruntime", version = "1.24.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
+    { name = "onnxruntime", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+]
 dev = [
    { name = "psutil" },
    { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
@@ -1185,6 +1190,16 @@ dev = [
    { name = "pytest-cov" },
    { name = "ruff" },
 ]
+dml = [
+    { name = "onnxruntime-directml", version = "1.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "onnxruntime-directml", version = "1.24.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
+    { name = "onnxruntime-directml", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+]
+gpu = [
+    { name = "onnxruntime-gpu", version = "1.20.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "onnxruntime-gpu", version = "1.24.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
+    { name = "onnxruntime-gpu", version = "1.25.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+]
 spellcheck = [
    { name = "autocorrect" },
 ]
@@ -1202,6 +1217,9 @@ dev = [
 requires-dist = [
    { name = "autocorrect", marker = "extra == 'spellcheck'", specifier = ">=2.0" },
    { name = "chromadb", specifier = ">=1.5.4,<2" },
+    { name = "onnxruntime", marker = "extra == 'coreml'", specifier = ">=1.16" },
+    { name = "onnxruntime-directml", marker = "extra == 'dml'", specifier = ">=1.16" },
+    { name = "onnxruntime-gpu", marker = "extra == 'gpu'", specifier = ">=1.16" },
    { name = "psutil", marker = "extra == 'dev'", specifier = ">=5.9" },
    { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0" },
    { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0" },
@@ -1209,7 +1227,7 @@ requires-dist = [
    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4.0" },
    { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0.0" },
 ]
-provides-extras = ["dev", "spellcheck"]
+provides-extras = ["dev", "spellcheck", "gpu", "dml", "coreml"]

 [package.metadata.requires-dev]
 dev = [
@@ -1815,6 +1833,154 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/6c/1d/1666dc64e78d8587d168fec4e3b7922b92eb286a2ddeebcf6acb55c7dc82/onnxruntime-1.24.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1cc6a518255f012134bc791975a6294806be9a3b20c4a54cca25194c90cf731", size = 17247021, upload-time = "2026-03-17T22:04:52.377Z" },
 ]

+[[package]]
+name = "onnxruntime-directml"
+version = "1.20.1"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+dependencies = [
+    { name = "coloredlogs", marker = "python_full_version < '3.10'" },
+    { name = "flatbuffers", marker = "python_full_version < '3.10'" },
+    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "packaging", marker = "python_full_version < '3.10'" },
+    { name = "protobuf", marker = "python_full_version < '3.10'" },
+    { name = "sympy", marker = "python_full_version < '3.10'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3c/4f/f433239b05304aa9af0217da20508abbbcec1dcd58ee821e3dab8939ecfe/onnxruntime_directml-1.20.1-cp310-cp310-win_amd64.whl", hash = "sha256:d4aa43694799559fb5570fdf0e96a154d4b4d0bb9b73c3e81744eb7fe0c0de8d", size = 22760521, upload-time = "2024-11-21T00:49:40.179Z" },
+    { url = "https://files.pythonhosted.org/packages/df/5f/16337318bd99d2d837cbb2e91e8a12b0915cb80d7c1ae8f80ca2f5d47a09/onnxruntime_directml-1.20.1-cp311-cp311-win_amd64.whl", hash = "sha256:c7861057ad4caa64186c910efb3b54c1f575cd0e64732509c9bd927d2d20187b", size = 22762384, upload-time = "2024-11-21T00:49:44.01Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/50/4599c6573bd71cc0c80820c63dea599a0b489ce874f93a5e021ca20a9e1f/onnxruntime_directml-1.20.1-cp312-cp312-win_amd64.whl", hash = "sha256:4b9a9f8349d68eef947fc692b3572e7a6490cb95effb151ace1a6ffc15884940", size = 22764330, upload-time = "2024-11-21T00:49:47.264Z" },
+    { url = "https://files.pythonhosted.org/packages/60/40/7d8489d9101b4aa7bae29227075ce31bc5764cbe87b78c995fdb296e3eff/onnxruntime_directml-1.20.1-cp313-cp313-win_amd64.whl", hash = "sha256:86a8c4b69e377bb18ed2a18aaf2337baa83a57ff87a97224d027e546dfa99fde", size = 22764517, upload-time = "2024-11-21T00:49:50.213Z" },
+]
+
+[[package]]
+name = "onnxruntime-directml"
+version = "1.24.3"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version == '3.10.*'",
+]
+dependencies = [
+    { name = "flatbuffers", marker = "python_full_version == '3.10.*'" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
+    { name = "packaging", marker = "python_full_version == '3.10.*'" },
+    { name = "protobuf", marker = "python_full_version == '3.10.*'" },
+    { name = "sympy", marker = "python_full_version == '3.10.*'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ed/65/36ce5a5e79fb5d7b4d7636bc6e6c4024f3ff0571789e8eedb7149bb7c538/onnxruntime_directml-1.24.3-cp311-cp311-win_amd64.whl", hash = "sha256:442fecea5d52df315b6cecfbcbb44aff6681880b6bbf23546a6c00125fec66f1", size = 25106769, upload-time = "2026-03-05T16:27:07.495Z" },
+    { url = "https://files.pythonhosted.org/packages/05/40/c948c0ee42b7b6297dd45956092f5a53a6954610c3911a5847c7555b4930/onnxruntime_directml-1.24.3-cp312-cp312-win_amd64.whl", hash = "sha256:d889010e6ed2f30026522308173d295bcfdaf6f28d1df6054c748ffa750a7ad5", size = 25114531, upload-time = "2026-03-05T16:27:11.256Z" },
+    { url = "https://files.pythonhosted.org/packages/56/f0/9de329f39a66142aab4c1d9a48edc0e432de27c6ba09e8039e0dc51885e7/onnxruntime_directml-1.24.3-cp313-cp313-win_amd64.whl", hash = "sha256:f684adcb29dd48ee172b52fcf1d19a1da1a67a051384ac3418b36d200d0d105c", size = 25114902, upload-time = "2026-03-05T16:27:13.925Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/7a/8b3014ca4065a32bd6672221bf4cb0b5b9a726d28a9caafdb86a076a5981/onnxruntime_directml-1.24.3-cp314-cp314-win_amd64.whl", hash = "sha256:42b17de7030445e75a7e83a4a317f9c655ed2dd7045fe79a7a21dce7b60103b6", size = 25570589, upload-time = "2026-03-05T16:27:17.278Z" },
+]
+
+[[package]]
+name = "onnxruntime-directml"
+version = "1.24.4"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14'",
+    "python_full_version == '3.13.*'",
+    "python_full_version >= '3.11' and python_full_version < '3.13'",
+]
+dependencies = [
+    { name = "flatbuffers", marker = "python_full_version >= '3.11'" },
+    { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "packaging", marker = "python_full_version >= '3.11'" },
+    { name = "protobuf", marker = "python_full_version >= '3.11'" },
+    { name = "sympy", marker = "python_full_version >= '3.11'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/be/90/99566dc6398028e7691a5b12720fd85f757a0901818b84599d28abb3f085/onnxruntime_directml-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:96642a787e5a6f33bf043521c0f06eb1eb663f6b830e5862a2026d03f9c90543", size = 25106000, upload-time = "2026-03-17T21:47:15.438Z" },
+    { url = "https://files.pythonhosted.org/packages/88/ea/33814eb0ec96775eda4c1d30b0d86e91d7d2cd0d84c66d3915aef0e06fa3/onnxruntime_directml-1.24.4-cp312-cp312-win_amd64.whl", hash = "sha256:f2ecb68b7b7b259d2ef3112ae760149f9b5a1e7c0fbb73d539da6250a648a614", size = 25111930, upload-time = "2026-03-17T21:47:18.419Z" },
+    { url = "https://files.pythonhosted.org/packages/60/53/2bd2696fac19cf8ca55496a0bcfe431f3aff9579eabbb0e231dc238acf6f/onnxruntime_directml-1.24.4-cp313-cp313-win_amd64.whl", hash = "sha256:2f1031cb2281e5b27cca9efe0b9399317c7286e4d226f7a79d4ab79bbd94d19e", size = 25112253, upload-time = "2026-03-17T21:47:22.043Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/04/816932a3ade867a687e406716ca76e0774c6b921545b45818e3ebfcc54ce/onnxruntime_directml-1.24.4-cp314-cp314-win_amd64.whl", hash = "sha256:51d86bb949488e572b00422f344990a4a81d982416d73b6c0e4ced2bcd423d19", size = 25571098, upload-time = "2026-03-17T21:47:25.461Z" },
+]
+
+[[package]]
+name = "onnxruntime-gpu"
+version = "1.20.2"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+dependencies = [
+    { name = "coloredlogs", marker = "python_full_version < '3.10'" },
+    { name = "flatbuffers", marker = "python_full_version < '3.10'" },
+    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "packaging", marker = "python_full_version < '3.10'" },
+    { name = "protobuf", marker = "python_full_version < '3.10'" },
+    { name = "sympy", marker = "python_full_version < '3.10'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/ad/4e5534dcaafe36f596792ebd0049177f7f0b7afa0f696505974ed1d6f72c/onnxruntime_gpu-1.20.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dfba508f110ec062dedfd3032e6eee8cde325026e9d7c5792884e8b9d4ebb9c3", size = 291522233, upload-time = "2025-03-07T05:46:08.901Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/2a/8afc5aee996fd33fb816bc3067fdbde96a2a7520d4c275fa502f3aef7e54/onnxruntime_gpu-1.20.2-cp310-cp310-win_amd64.whl", hash = "sha256:75a7557292b2741e63fb73236ee84faa08075cead52d9a8d302a67036fc64f16", size = 279696089, upload-time = "2025-03-07T05:39:24.924Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/53/9341b875b0ed29953485b43713e94b335a449c3770fed67dddb3c9b84af0/onnxruntime_gpu-1.20.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85057c7006457bee14fc2a57417b7e4f396f10d9c1b08b11aae08ac2b825eeda", size = 291518407, upload-time = "2025-03-07T05:46:22.943Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/7a/0999993ceae7bf191d5d63a4e1b2208596763d8e586aa7dc5cc091f960c0/onnxruntime_gpu-1.20.2-cp311-cp311-win_amd64.whl", hash = "sha256:d0eafd873e4336949c89e6c7429a68e7e1d0233d9cb363e9780ca76c3c6f865c", size = 279697437, upload-time = "2025-03-07T05:39:38.418Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/db/c1fcdf45cad147d3b3609cf66a1c6083b54382f58a41d7fc526cd5909090/onnxruntime_gpu-1.20.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aa66d2e6de13fe6f4d1554b1c219bd2e4778b540ed9d3dc62957c95a8af43d66", size = 291510804, upload-time = "2025-03-07T05:46:36.178Z" },
+    { url = "https://files.pythonhosted.org/packages/27/67/4f979650557738a8b148dd7e0b82522d20ffcfb2c3964141c861a61e82c7/onnxruntime_gpu-1.20.2-cp312-cp312-win_amd64.whl", hash = "sha256:564a6a1187b208012f57c3bb3723ba65f6bc5cddff6e6b917ac96865768b39f5", size = 279699596, upload-time = "2025-03-07T05:39:50.858Z" },
+    { url = "https://files.pythonhosted.org/packages/48/a4/60f0cf16b24f05d123f90525408a705741fa92e0c38ab122cdf1d239e3fe/onnxruntime_gpu-1.20.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6af5b30b9b0e729d3ca1dfff493a39771f143cfc22af1d77d487022033cae284", size = 291511859, upload-time = "2025-03-07T05:46:49.302Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/a2/0eb7a3fa417adc7af0be73b0ea35f1f0d6f92e3722eb6468e36dfe2e762d/onnxruntime_gpu-1.20.2-cp313-cp313-win_amd64.whl", hash = "sha256:6ffe5108d2dbd96a9a40bf76573219e04b67d0330aa93ca5114f1478185ade19", size = 279697061, upload-time = "2025-03-07T05:40:03.559Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/de/6c692ac8604a451011a2a01e35e94f84bea8775ef97f6830985bbe8de172/onnxruntime_gpu-1.20.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:407e5b7a21d656aac6f994d2e329f5577eb3d7f98b63aa1e49e71a702ffa1da1", size = 291502464, upload-time = "2025-03-07T05:47:03.191Z" },
+]
+
+[[package]]
+name = "onnxruntime-gpu"
+version = "1.24.3"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version == '3.10.*'",
+]
+dependencies = [
+    { name = "flatbuffers", marker = "python_full_version == '3.10.*'" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
+    { name = "packaging", marker = "python_full_version == '3.10.*'" },
+    { name = "protobuf", marker = "python_full_version == '3.10.*'" },
+    { name = "sympy", marker = "python_full_version == '3.10.*'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/28/f4/c8050f3f4916ab6c75432724f0ba51c1548dc1c3d66d40c0f8a9611e370f/onnxruntime_gpu-1.24.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ac922633819e1cdc81c9b3a28b5e37d788805307bbaa708a01a3d7150e345625", size = 252750845, upload-time = "2026-03-05T16:35:33.604Z" },
+    { url = "https://files.pythonhosted.org/packages/07/b7/81e8936354651915192a362a1718253c6d03da6b902a95237aa392b1d260/onnxruntime_gpu-1.24.3-cp311-cp311-win_amd64.whl", hash = "sha256:0fe6ece3042db149f36f4991cbebd19a690b7ffd82af89450a261b47f4704a37", size = 207192429, upload-time = "2026-03-05T16:39:57.015Z" },
+    { url = "https://files.pythonhosted.org/packages/24/fa/58ceca812214c9c1a286407c376e42e0b7de3e2c6e14b61cdf3caf6d6d9c/onnxruntime_gpu-1.24.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:537bdd6d95006a9200ae81f2e73ba9e621e723fdf0deb5901e2e62fb2cccf876", size = 252756089, upload-time = "2026-03-05T16:35:46.004Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/07/2f36920b513bd8939e25591153e37d9cfda94115bd119f2874da0750fce2/onnxruntime_gpu-1.24.3-cp312-cp312-win_amd64.whl", hash = "sha256:d72065b3ab5fdaef74d8b6b8f39b7ce20d89731610e3e63cb40e997d3dce177e", size = 207197001, upload-time = "2026-03-05T16:40:05.691Z" },
+    { url = "https://files.pythonhosted.org/packages/49/57/9e6206dac76e08f028d2ae95f2ab1b3a7c3317fb6c0374a530aad48dab5c/onnxruntime_gpu-1.24.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3242a70010934e5bb0aeaa9dde4c25c6c2da577b55c6308c0caa828ba3b7be23", size = 252753349, upload-time = "2026-03-05T16:35:58.09Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/ae/f0be395602c13a3a8d22fa6632133550a64536c58bc3623abbba5d0a575e/onnxruntime_gpu-1.24.3-cp313-cp313-win_amd64.whl", hash = "sha256:a423b164dbc26cb7f8736367b11698c2a7294748d3c144c39542ecac28d225c9", size = 207197331, upload-time = "2026-03-05T16:40:14.944Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/af/a64c9789769d8d7fabc6d35dcce2f2897b2d9e0fe113044efc2903f7cd07/onnxruntime_gpu-1.24.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9696d54974a1313ef0d87f4cbd04f9abfd13839194638d52bb5967a15615341d", size = 252762923, upload-time = "2026-03-05T16:36:10.043Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/bb/1cf7dffac2fb01e8de9f0882438165f7543f0aab57f86d1f587e6faa8528/onnxruntime_gpu-1.24.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8ca744f40b33380bc9136988213e574c927d2b919ed42149977e006b138f74f", size = 252754914, upload-time = "2026-03-05T16:36:30.739Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/39/3949d56103bd9cd9381de59b060f9bce8dc2c7363f465bf207ebd0c7a5d0/onnxruntime_gpu-1.24.3-cp314-cp314-win_amd64.whl", hash = "sha256:c60c44e2b388720e6670a948b52626f3d089e960ef7da66e4fa6b2b33a11116f", size = 209599131, upload-time = "2026-03-05T16:40:24.074Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/60/51bfbcf2d0540dbfa426a73a9b80046b71a63de7303d16c0f2682c8edfd2/onnxruntime_gpu-1.24.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29048407a2398361d93de5537c2d2079d79d720337a0743d4a2cc28db981e776", size = 252764115, upload-time = "2026-03-05T16:36:44.681Z" },
+]
+
+[[package]]
+name = "onnxruntime-gpu"
+version = "1.25.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14'",
+    "python_full_version == '3.13.*'",
+    "python_full_version >= '3.11' and python_full_version < '3.13'",
+]
+dependencies = [
+    { name = "flatbuffers", marker = "python_full_version >= '3.11'" },
+    { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "packaging", marker = "python_full_version >= '3.11'" },
+    { name = "protobuf", marker = "python_full_version >= '3.11'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2d/7e/f58f8fc505a876b31fd2a34c1eb8f9863b75bf1589c3297c8efd48b93151/onnxruntime_gpu-1.25.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8625bb31ee2d88524414e7458cc604f4f958f323ef8832cc00882f6cd42b9a1", size = 270337732, upload-time = "2026-04-22T17:27:59.993Z" },
+    { url = "https://files.pythonhosted.org/packages/55/5d/2561b3aa667d87a4ae9cd01c5a565955aab5a3d44a6076f723beb9cdde0a/onnxruntime_gpu-1.25.0-cp311-cp311-win_amd64.whl", hash = "sha256:2e702159a025aa5c69f0b747adf9a451e0c9e4b20120163a918c8459d3171b87", size = 220845585, upload-time = "2026-04-22T17:20:38.939Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/6d/2c13d3eff74caa9e59820a044a75becd34e9cbeeaf7617ad7679cdb1fdb7/onnxruntime_gpu-1.25.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f0c36c63c8b0eb4091f2567067f480f66f0aedc189eb009545c98ce7e919056", size = 270342429, upload-time = "2026-04-22T17:28:10.526Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/2e/9fc303ae59d4caeb85ec3cea6881b7de8ca1d2a07140fade39913cd7ff10/onnxruntime_gpu-1.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:61178cc4d84f59861714554531e01cccbd33ddf13cc0e87a3adea13b24d297ce", size = 220847708, upload-time = "2026-04-22T17:20:47.993Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/15/e63fe7b1abad6884bed07e9bb333e9f0ea48fbb8cbc1ea4a67ee6019d5d0/onnxruntime_gpu-1.25.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e462eb13ee9955117baec4f518916c1e7cb1a96001114105632bc6d454c6aee6", size = 270342324, upload-time = "2026-04-22T17:28:21.142Z" },
+    { url = "https://files.pythonhosted.org/packages/21/10/b3533243d062b589d4b1f3ae26584af332c5cde618e7f6f5ff6fabbfd5f2/onnxruntime_gpu-1.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:9a3682158e5e911385252eb95d6332b6f525972746c582e10f8a78213b39e624", size = 220848188, upload-time = "2026-04-22T17:20:56.946Z" },
+    { url = "https://files.pythonhosted.org/packages/35/6c/d7706dd1d0eaafdba44d5c89f8d952de41e425a1b0cbd3ecfa60f918c249/onnxruntime_gpu-1.25.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8514b92c5929c953850090d823d018770cba2a971efab5f8f69a3c4280cdc632", size = 270364210, upload-time = "2026-04-22T17:28:33.568Z" },
+    { url = "https://files.pythonhosted.org/packages/37/01/9f1b16ea857e3a4b5e82a2d70b52ea46a0083569f737d840f74a1b86818f/onnxruntime_gpu-1.25.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ffe9df4016b061ec3a5565a4fc08cdb86808cd8b9c255c42301066c0c24a81b5", size = 270345126, upload-time = "2026-04-22T17:28:44.416Z" },
+    { url = "https://files.pythonhosted.org/packages/56/c8/aae22f3c9cea9160d8d969734a1927720fcb4d4ad4abe269c407c1d2b63c/onnxruntime_gpu-1.25.0-cp314-cp314-win_amd64.whl", hash = "sha256:2173b71631208177fe704ce2d92eac3acbf758285327247ea40a31a9f0bcc073", size = 223385369, upload-time = "2026-04-22T17:21:06.026Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/0a/79fba6a1a32803a2bf8b99187e0ea5d5d69ffe0c5c0f469bde232ceb8327/onnxruntime_gpu-1.25.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8576c721c600cc669717a2ae49af30fdfff230480099653adc7b79d58a240852", size = 270364130, upload-time = "2026-04-22T17:28:54.708Z" },
+]
+
 [[package]]
 name = "opentelemetry-api"
 version = "1.40.0"