diff --git a/benchmarks/mine_bench.py b/benchmarks/mine_bench.py
new file mode 100644
index 0000000..43b08ed
--- /dev/null
+++ b/benchmarks/mine_bench.py
@@ -0,0 +1,301 @@
+"""Mining throughput benchmark: per-chunk vs batched upsert, CPU vs GPU.
+
+Compares the legacy per-chunk ``add_drawer`` loop against the batched
+``collection.upsert`` path introduced in the "batched upsert + GPU" PR.
+Runs both paths on an identical seeded synthetic corpus, reports
+wall-clock time + drawers/sec, and prints a markdown table suitable
+for pasting into a PR description.
+
+Usage
+-----
+
+    # CPU (whatever onnxruntime is installed — CPU if you don't have
+    # onnxruntime-gpu):
+    uv run python benchmarks/mine_bench.py
+
+    # GPU (NVIDIA):
+    uv venv /tmp/gpu && source /tmp/gpu/bin/activate
+    uv pip install -e '.[gpu]' 'nvidia-cudnn-cu12>=9,<10' \\
+        'nvidia-cuda-runtime-cu12' 'nvidia-cublas-cu12'
+    export LD_LIBRARY_PATH=$(python -c "import nvidia.cudnn, os; \\
+        print(os.path.dirname(nvidia.cudnn.__file__)+'/lib')"):$LD_LIBRARY_PATH
+    MEMPALACE_EMBEDDING_DEVICE=cuda python benchmarks/mine_bench.py
+
+Flags
+-----
+
+    --device cpu|cuda|coreml|dml|auto   Override MEMPALACE_EMBEDDING_DEVICE
+    --scenarios small,medium,large      Which scenarios to run
+    --seed 42                           RNG seed for reproducibility
+"""
+
+from __future__ import annotations
+
+import argparse
+import hashlib
+import os
+import random
+import shutil
+import string
+import sys
+import tempfile
+import time
+from datetime import datetime
+from pathlib import Path
+
+
+def build_corpus(dest: Path, n_files: int, paragraphs_per_file: int, seed: int) -> None:
+    """Generate ``n_files`` markdown files of random words under ``dest``."""
+    rng = random.Random(seed)
+    dest.mkdir(parents=True, exist_ok=True)
+    for i in range(n_files):
+        paragraphs = []
+        for _ in range(paragraphs_per_file):
+            words = [
+                "".join(rng.choices(string.ascii_lowercase, k=rng.randint(3, 10)))
+                for _ in range(12)
+            ]
+            paragraphs.append(" ".join(words))
+        (dest / f"doc_{i:03d}.md").write_text("\n\n".join(paragraphs))
+    (dest / "mempalace.yaml").write_text(
+        "wing: bench\n"
+        "rooms:\n"
+        "  - name: general\n"
+        "    description: all\n"
+        "    keywords: [general]\n"
+    )
+
+
+def _process_file_unbatched(filepath, project_path, collection, wing, rooms, agent, closets_col):
+    """Legacy per-chunk upsert path (pre-batching).
+
+    Reproduces the exact loop shape the miner used before this PR so the
+    comparison is apples-to-apples; only the upsert granularity differs.
+    """
+    from mempalace import miner
+    from mempalace.palace import (
+        build_closet_lines,
+        file_already_mined,
+        mine_lock,
+        purge_file_closets,
+        upsert_closet_lines,
+    )
+
+    source_file = str(filepath)
+    if file_already_mined(collection, source_file, check_mtime=True):
+        return 0, "general"
+    try:
+        content = filepath.read_text(encoding="utf-8", errors="replace")
+    except OSError:
+        return 0, "general"
+    content = content.strip()
+    if len(content) < miner.MIN_CHUNK_SIZE:
+        return 0, "general"
+    room = miner.detect_room(filepath, content, rooms, project_path)
+    chunks = miner.chunk_text(content, source_file)
+
+    with mine_lock(source_file):
+        if file_already_mined(collection, source_file, check_mtime=True):
+            return 0, room
+        try:
+            collection.delete(where={"source_file": source_file})
+        except Exception:
+            pass
+        drawers_added = 0
+        for chunk in chunks:
+            miner.add_drawer(
+                collection=collection,
+                wing=wing,
+                room=room,
+                content=chunk["content"],
+                source_file=source_file,
+                chunk_index=chunk["chunk_index"],
+                agent=agent,
+            )
+            drawers_added += 1
+        if closets_col and drawers_added > 0:
+            drawer_ids = [
+                f"drawer_{wing}_{room}_"
+                f"{hashlib.sha256((source_file + str(c['chunk_index'])).encode()).hexdigest()[:24]}"
+                for c in chunks
+            ]
+            closet_lines = build_closet_lines(source_file, drawer_ids, content, wing, room)
+            closet_id_base = (
+                f"closet_{wing}_{room}_"
+                f"{hashlib.sha256(source_file.encode()).hexdigest()[:24]}"
+            )
+            closet_meta = {
+                "wing": wing,
+                "room": room,
+                "source_file": source_file,
+                "drawer_count": drawers_added,
+                "filed_at": datetime.now().isoformat(),
+                "normalize_version": miner.NORMALIZE_VERSION,
+            }
+            purge_file_closets(closets_col, source_file)
+            upsert_closet_lines(closets_col, closet_id_base, closet_lines, closet_meta)
+    return drawers_added, room
+
+
+def mine_once(project_dir: str, palace_path: str, batched: bool) -> tuple[int, float]:
+    """Mine a project dir with either the batched (new) or per-chunk (old) path."""
+    from mempalace import miner
+    from mempalace.miner import load_config, scan_project
+    from mempalace.palace import get_closets_collection, get_collection
+
+    project_path = Path(project_dir).resolve()
+    config = load_config(project_dir)
+    wing = config["wing"]
+    rooms = config.get("rooms", [])
+    files = scan_project(project_dir)
+    collection = get_collection(palace_path)
+    closets = get_closets_collection(palace_path)
+
+    total = 0
+    t0 = time.perf_counter()
+    for filepath in files:
+        if batched:
+            drawers, _ = miner.process_file(
+                filepath=filepath,
+                project_path=project_path,
+                collection=collection,
+                wing=wing,
+                rooms=rooms,
+                agent="bench",
+                dry_run=False,
+                closets_col=closets,
+            )
+        else:
+            drawers, _ = _process_file_unbatched(
+                filepath, project_path, collection, wing, rooms, "bench", closets
+            )
+        total += drawers
+    return total, time.perf_counter() - t0
+
+
+def _reset_backend_caches() -> None:
+    """Drop the in-process client cache so each run pays cold-open cost equally."""
+    from mempalace.palace import _DEFAULT_BACKEND
+
+    _DEFAULT_BACKEND._clients.clear()
+    _DEFAULT_BACKEND._freshness.clear()
+
+
+def run_scenario(label: str, n_files: int, paragraphs_per_file: int, seed: int) -> dict:
+    """Run one scenario under both code paths and return a result dict."""
+    print(f"\n=== {label}: {n_files} files × {paragraphs_per_file} paragraphs ===")
+    results = {}
+    for mode in ("unbatched", "batched"):
+        tmp = Path(tempfile.mkdtemp(prefix=f"mp_{mode}_"))
+        try:
+            proj = tmp / "proj"
+            palace = tmp / "palace"
+            build_corpus(proj, n_files, paragraphs_per_file, seed=seed)
+            _reset_backend_caches()
+            drawers, dt = mine_once(str(proj), str(palace), batched=(mode == "batched"))
+            rate = drawers / dt if dt > 0 else 0.0
+            results[mode] = (drawers, dt, rate)
+            print(f"  {mode:10} {drawers:5} drawers in {dt:6.2f}s  →  {rate:7.1f} drawers/sec")
+        finally:
+            shutil.rmtree(tmp, ignore_errors=True)
+
+    _, t_u, r_u = results["unbatched"]
+    d_b, t_b, r_b = results["batched"]
+    speedup = t_u / t_b if t_b > 0 else 0.0
+    print(f"  speedup:   {speedup:.2f}× ({t_u:.2f}s → {t_b:.2f}s)")
+    return {
+        "label": label,
+        "n_files": n_files,
+        "paragraphs": paragraphs_per_file,
+        "drawers": d_b,
+        "unbatched_time": t_u,
+        "unbatched_rate": r_u,
+        "batched_time": t_b,
+        "batched_rate": r_b,
+        "speedup": speedup,
+    }
+
+
+SCENARIOS = {
+    "small":  ("Small files (~50 paragraphs)",  10, 50),
+    "medium": ("Medium files (~200 paragraphs)", 20, 200),
+    "large":  ("Large files (~500 paragraphs)",  10, 500),
+}
+
+
+def _env_summary(device_label: str) -> list[str]:
+    """Short hardware + version lines included with the printed table."""
+    import platform
+
+    try:
+        import chromadb
+
+        chromadb_v = chromadb.__version__
+    except Exception:
+        chromadb_v = "?"
+    try:
+        import onnxruntime as ort
+
+        ort_v = ort.__version__
+        providers = ",".join(p.replace("ExecutionProvider", "") for p in ort.get_available_providers())
+    except Exception:
+        ort_v = "?"
+        providers = "?"
+
+    return [
+        f"device: **{device_label}** (onnxruntime {ort_v}, providers={providers})",
+        f"chromadb {chromadb_v} · python {sys.version.split()[0]} · {platform.platform()}",
+    ]
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description=__doc__.split("\n\n", 1)[0])
+    parser.add_argument(
+        "--device",
+        default=None,
+        help="Override MEMPALACE_EMBEDDING_DEVICE (cpu|cuda|coreml|dml|auto)",
+    )
+    parser.add_argument(
+        "--scenarios",
+        default="small,medium,large",
+        help="Comma-separated scenario names (default: all)",
+    )
+    parser.add_argument("--seed", type=int, default=42)
+    args = parser.parse_args()
+
+    if args.device:
+        os.environ["MEMPALACE_EMBEDDING_DEVICE"] = args.device
+
+    from mempalace.embedding import describe_device, get_embedding_function
+
+    device_label = describe_device()
+    print(f"Warming up ONNX model on device={device_label}...")
+    ef = get_embedding_function()
+    ef(["warmup sentence one", "warmup sentence two"])
+
+    picked = [s.strip() for s in args.scenarios.split(",") if s.strip()]
+    results = []
+    for key in picked:
+        if key not in SCENARIOS:
+            print(f"Unknown scenario {key!r}; choices: {sorted(SCENARIOS)}", file=sys.stderr)
+            sys.exit(2)
+        label, n_files, paras = SCENARIOS[key]
+        results.append(run_scenario(label, n_files, paras, args.seed))
+
+    print("\n\n## Mining benchmark\n")
+    for line in _env_summary(device_label):
+        print(line + "  ")
+    print()
+    print("| Scenario | Files | Drawers | Per-chunk (old) | Batched (new) | Speedup |")
+    print("| --- | ---: | ---: | ---: | ---: | ---: |")
+    for r in results:
+        print(
+            f"| {r['label']} | {r['n_files']} | {r['drawers']} | "
+            f"{r['unbatched_time']:.2f}s · {r['unbatched_rate']:.0f} drw/s | "
+            f"{r['batched_time']:.2f}s · {r['batched_rate']:.0f} drw/s | "
+            f"**{r['speedup']:.2f}×** |"
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/mempalace/backends/chroma.py b/mempalace/backends/chroma.py
index 3a0d2c3..8ba2279 100644
--- a/mempalace/backends/chroma.py
+++ b/mempalace/backends/chroma.py
@@ -405,6 +405,23 @@ class ChromaBackend(BaseBackend):
         self._freshness: dict[str, tuple[int, float]] = {}
         self._closed = False
 
+    @staticmethod
+    def _resolve_embedding_function():
+        """Return the EF for the user's ``embedding_device`` setting.
+
+        Both ``get_collection`` and ``get_or_create_collection`` must receive
+        the EF explicitly — ChromaDB 1.x does not persist it with the
+        collection, so a reader that omits the argument silently gets the
+        library default and its queries won't match the writer's vectors.
+        """
+        try:
+            from ..embedding import get_embedding_function
+
+            return get_embedding_function()
+        except Exception:
+            logger.exception("Failed to build embedding function; using chromadb default")
+            return None
+
     # ------------------------------------------------------------------
     # Internal helpers
     # ------------------------------------------------------------------
@@ -532,12 +549,15 @@ class ChromaBackend(BaseBackend):
         if options and isinstance(options, dict):
             hnsw_space = options.get("hnsw_space", hnsw_space)
 
+        ef = self._resolve_embedding_function()
+        ef_kwargs = {"embedding_function": ef} if ef is not None else {}
+
         if create:
             collection = client.get_or_create_collection(
-                collection_name, metadata={"hnsw:space": hnsw_space}
+                collection_name, metadata={"hnsw:space": hnsw_space}, **ef_kwargs
             )
         else:
-            collection = client.get_collection(collection_name)
+            collection = client.get_collection(collection_name, **ef_kwargs)
         return ChromaCollection(collection)
 
     def close_palace(self, palace) -> None:
@@ -578,8 +598,10 @@ class ChromaBackend(BaseBackend):
         self, palace_path: str, collection_name: str, hnsw_space: str = "cosine"
     ) -> ChromaCollection:
         """Create (not get-or-create) ``collection_name`` with the given HNSW space."""
+        ef = self._resolve_embedding_function()
+        ef_kwargs = {"embedding_function": ef} if ef is not None else {}
         collection = self._client(palace_path).create_collection(
-            collection_name, metadata={"hnsw:space": hnsw_space}
+            collection_name, metadata={"hnsw:space": hnsw_space}, **ef_kwargs
         )
         return ChromaCollection(collection)
 
diff --git a/mempalace/config.py b/mempalace/config.py
index 616334e..86aa90f 100644
--- a/mempalace/config.py
+++ b/mempalace/config.py
@@ -236,6 +236,23 @@ class MempalaceConfig:
             pass
         return normalized
 
+    @property
+    def embedding_device(self):
+        """Hardware device for the ONNX embedding model.
+
+        Values: ``"auto"`` (default), ``"cpu"``, ``"cuda"``, ``"coreml"``,
+        ``"dml"``. Read from env ``MEMPALACE_EMBEDDING_DEVICE`` first, then
+        ``embedding_device`` in ``config.json``, then ``"auto"``.
+
+        ``auto`` resolves to the first available accelerator at runtime via
+        :mod:`mempalace.embedding`; requesting an unavailable accelerator
+        logs a warning and falls back to CPU.
+        """
+        env_val = os.environ.get("MEMPALACE_EMBEDDING_DEVICE")
+        if env_val:
+            return env_val.strip().lower()
+        return str(self._file_config.get("embedding_device", "auto")).strip().lower()
+
     @property
     def hook_silent_save(self):
         """Whether the stop hook saves directly (True) or blocks for MCP calls (False)."""
diff --git a/mempalace/convo_miner.py b/mempalace/convo_miner.py
index 521d8fb..76891f9 100644
--- a/mempalace/convo_miner.py
+++ b/mempalace/convo_miner.py
@@ -332,31 +332,44 @@ def _file_chunks_locked(collection, source_file, chunks, wing, room, agent, extr
         except Exception:
             pass
 
+        # Batch the whole file into one upsert so the embedding model runs
+        # a single forward pass for all chunks — dramatically faster than
+        # one call per chunk, especially on GPU where per-call overhead
+        # dominates over the actual matmul.
+        batch_docs: list = []
+        batch_ids: list = []
+        batch_metas: list = []
+        filed_at = datetime.now().isoformat()
         for chunk in chunks:
             chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room
             if extract_mode == "general":
                 room_counts_delta[chunk_room] += 1
             drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}"
+            batch_docs.append(chunk["content"])
+            batch_ids.append(drawer_id)
+            batch_metas.append(
+                {
+                    "wing": wing,
+                    "room": chunk_room,
+                    "hall": _detect_hall_cached(chunk["content"]),
+                    "source_file": source_file,
+                    "chunk_index": chunk["chunk_index"],
+                    "added_by": agent,
+                    "filed_at": filed_at,
+                    "ingest_mode": "convos",
+                    "extract_mode": extract_mode,
+                    "normalize_version": NORMALIZE_VERSION,
+                }
+            )
+
+        if batch_docs:
             try:
                 collection.upsert(
-                    documents=[chunk["content"]],
-                    ids=[drawer_id],
-                    metadatas=[
-                        {
-                            "wing": wing,
-                            "room": chunk_room,
-                            "hall": _detect_hall_cached(chunk["content"]),
-                            "source_file": source_file,
-                            "chunk_index": chunk["chunk_index"],
-                            "added_by": agent,
-                            "filed_at": datetime.now().isoformat(),
-                            "ingest_mode": "convos",
-                            "extract_mode": extract_mode,
-                            "normalize_version": NORMALIZE_VERSION,
-                        }
-                    ],
+                    documents=batch_docs,
+                    ids=batch_ids,
+                    metadatas=batch_metas,
                 )
-                drawers_added += 1
+                drawers_added = len(batch_docs)
             except Exception as e:
                 if "already exists" not in str(e).lower():
                     raise
diff --git a/mempalace/embedding.py b/mempalace/embedding.py
new file mode 100644
index 0000000..139ded7
--- /dev/null
+++ b/mempalace/embedding.py
@@ -0,0 +1,147 @@
+"""Embedding function factory with hardware acceleration.
+
+Returns a ChromaDB-compatible embedding function bound to a user-selected
+ONNX Runtime execution provider. The same ``all-MiniLM-L6-v2`` model and
+384-dim vectors ChromaDB ships by default are reused, so switching device
+does not invalidate existing palaces.
+
+Supported devices (env ``MEMPALACE_EMBEDDING_DEVICE`` or ``embedding_device``
+in ``~/.mempalace/config.json``):
+
+* ``auto`` — prefer CUDA ▸ CoreML ▸ DirectML, fall back to CPU
+* ``cpu`` — force CPU (the historical default)
+* ``cuda`` — NVIDIA GPU via ``onnxruntime-gpu`` (``pip install mempalace[gpu]``)
+* ``coreml`` — Apple Neural Engine (macOS)
+* ``dml`` — DirectML (Windows / AMD / Intel GPUs)
+
+Requesting an unavailable accelerator emits a warning and falls back to CPU
+rather than hard-failing — mining must still work on a laptop without CUDA.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+_PROVIDER_MAP = {
+    "cpu": ["CPUExecutionProvider"],
+    "cuda": ["CUDAExecutionProvider", "CPUExecutionProvider"],
+    "coreml": ["CoreMLExecutionProvider", "CPUExecutionProvider"],
+    "dml": ["DmlExecutionProvider", "CPUExecutionProvider"],
+}
+
+_AUTO_ORDER = [
+    ("CUDAExecutionProvider", "cuda"),
+    ("CoreMLExecutionProvider", "coreml"),
+    ("DmlExecutionProvider", "dml"),
+]
+
+_EF_CACHE: dict = {}
+_WARNED: set = set()
+
+
+def _resolve_providers(device: str) -> tuple[list, str]:
+    """Return ``(provider_list, effective_device)`` for ``device``.
+
+    Falls back to CPU (with a one-shot warning) when the requested
+    accelerator is not compiled into the installed ``onnxruntime``.
+    """
+    device = (device or "auto").strip().lower()
+
+    try:
+        import onnxruntime as ort
+
+        available = set(ort.get_available_providers())
+    except ImportError:
+        return (["CPUExecutionProvider"], "cpu")
+
+    if device == "auto":
+        for provider, name in _AUTO_ORDER:
+            if provider in available:
+                return ([provider, "CPUExecutionProvider"], name)
+        return (["CPUExecutionProvider"], "cpu")
+
+    requested = _PROVIDER_MAP.get(device)
+    if requested is None:
+        if device not in _WARNED:
+            logger.warning("Unknown embedding_device %r — falling back to cpu", device)
+            _WARNED.add(device)
+        return (["CPUExecutionProvider"], "cpu")
+
+    preferred = requested[0]
+    if preferred == "CPUExecutionProvider":
+        return (requested, "cpu")
+
+    if preferred not in available:
+        if device not in _WARNED:
+            logger.warning(
+                "embedding_device=%r requested but %s is not installed — "
+                "falling back to CPU. Install mempalace[gpu] for CUDA.",
+                device,
+                preferred,
+            )
+            _WARNED.add(device)
+        return (["CPUExecutionProvider"], "cpu")
+
+    return (requested, device)
+
+
+def _build_ef_class():
+    """Subclass ``ONNXMiniLM_L6_V2`` with name ``"default"``.
+
+    Why the rename: ChromaDB 1.5 persists the EF identity on the collection
+    and rejects reads that pass a differently-named EF (``onnx_mini_lm_l6_v2``
+    vs ``default``). The vectors and model are identical — only the
+    ``name()`` tag differs — so spoofing the name lets one EF class serve
+    palaces created with ``DefaultEmbeddingFunction`` *and* palaces we
+    create ourselves, with the same GPU-capable ``preferred_providers``.
+    """
+    from chromadb.utils.embedding_functions import ONNXMiniLM_L6_V2
+
+    class _MempalaceONNX(ONNXMiniLM_L6_V2):
+        @staticmethod
+        def name() -> str:
+            return "default"
+
+    return _MempalaceONNX
+
+
+def get_embedding_function(device: Optional[str] = None):
+    """Return a cached embedding function bound to the requested device.
+
+    ``device=None`` reads from :class:`MempalaceConfig.embedding_device`.
+    The returned function is shared across calls with the same resolved
+    provider list so we only pay model-load cost once per process.
+    """
+    if device is None:
+        from .config import MempalaceConfig
+
+        device = MempalaceConfig().embedding_device
+
+    providers, effective = _resolve_providers(device)
+    cache_key = tuple(providers)
+    cached = _EF_CACHE.get(cache_key)
+    if cached is not None:
+        return cached
+
+    ef_cls = _build_ef_class()
+    ef = ef_cls(preferred_providers=providers)
+    _EF_CACHE[cache_key] = ef
+    logger.info("Embedding function initialized (device=%s providers=%s)", effective, providers)
+    return ef
+
+
+def describe_device(device: Optional[str] = None) -> str:
+    """Return a short human-readable label for the resolved device.
+
+    Used by the miner CLI header so users can see at a glance whether GPU
+    acceleration actually engaged.
+    """
+    if device is None:
+        from .config import MempalaceConfig
+
+        device = MempalaceConfig().embedding_device
+    _, effective = _resolve_providers(device)
+    return effective
diff --git a/mempalace/miner.py b/mempalace/miner.py
index 9e8ff5e..dc7cc70 100644
--- a/mempalace/miner.py
+++ b/mempalace/miner.py
@@ -14,6 +14,7 @@ import fnmatch
 from pathlib import Path
 from datetime import datetime
 from collections import defaultdict
+from typing import Optional
 
 from .palace import (
     NORMALIZE_VERSION,
@@ -633,40 +634,62 @@ def _extract_entities_for_metadata(content: str) -> str:
     return ";".join(capped)
 
 
+def _build_drawer_metadata(
+    wing: str,
+    room: str,
+    source_file: str,
+    chunk_index: int,
+    agent: str,
+    content: str,
+    source_mtime: Optional[float],
+) -> dict:
+    """Build the metadata dict for one drawer without upserting.
+
+    Split out from ``add_drawer`` so ``process_file`` can batch all chunks
+    of a file into a single ``collection.upsert`` — one embedding forward
+    pass per batch instead of per chunk.
+    """
+    metadata = {
+        "wing": wing,
+        "room": room,
+        "source_file": source_file,
+        "chunk_index": chunk_index,
+        "added_by": agent,
+        "filed_at": datetime.now().isoformat(),
+        "normalize_version": NORMALIZE_VERSION,
+    }
+    if source_mtime is not None:
+        metadata["source_mtime"] = source_mtime
+    metadata["hall"] = detect_hall(content)
+    entities = _extract_entities_for_metadata(content)
+    if entities:
+        metadata["entities"] = entities
+    return metadata
+
+
 def add_drawer(
     collection, wing: str, room: str, content: str, source_file: str, chunk_index: int, agent: str
 ):
-    """Add one drawer to the palace."""
+    """Add one drawer to the palace.
+
+    Kept for backward compatibility with external callers. In-tree the
+    miner uses ``_build_drawer_metadata`` + a batched ``collection.upsert``
+    to amortize the embedding model's forward-pass cost across chunks.
+    """
     drawer_id = f"drawer_{wing}_{room}_{hashlib.sha256((source_file + str(chunk_index)).encode()).hexdigest()[:24]}"
     try:
-        metadata = {
-            "wing": wing,
-            "room": room,
-            "source_file": source_file,
-            "chunk_index": chunk_index,
-            "added_by": agent,
-            "filed_at": datetime.now().isoformat(),
-            "normalize_version": NORMALIZE_VERSION,
-        }
-        # Store file mtime so we can detect modifications later.
-        try:
-            metadata["source_mtime"] = os.path.getmtime(source_file)
-        except OSError:
-            pass
-        # Tag with hall for graph connectivity within wings
-        metadata["hall"] = detect_hall(content)
-        # Tag with entity names for filterable search
-        entities = _extract_entities_for_metadata(content)
-        if entities:
-            metadata["entities"] = entities
-        collection.upsert(
-            documents=[content],
-            ids=[drawer_id],
-            metadatas=[metadata],
-        )
-        return True
-    except Exception:
-        raise
+        source_mtime = os.path.getmtime(source_file)
+    except OSError:
+        source_mtime = None
+    metadata = _build_drawer_metadata(
+        wing, room, source_file, chunk_index, agent, content, source_mtime
+    )
+    collection.upsert(
+        documents=[content],
+        ids=[drawer_id],
+        metadatas=[metadata],
+    )
+    return True
 
 
 # =============================================================================
@@ -725,19 +748,42 @@ def process_file(
         except Exception:
             pass
 
-        drawers_added = 0
+        # Batch all chunks of this file into a single upsert so the embedding
+        # model runs one forward pass over the whole file instead of N passes
+        # of one chunk each. On CPU this is typically a 10-30x speedup; on
+        # GPU the speedup is larger because per-call overhead dominates.
+        try:
+            source_mtime = os.path.getmtime(source_file)
+        except OSError:
+            source_mtime = None
+
+        batch_docs: list = []
+        batch_ids: list = []
+        batch_metas: list = []
         for chunk in chunks:
-            added = add_drawer(
-                collection=collection,
-                wing=wing,
-                room=room,
-                content=chunk["content"],
-                source_file=source_file,
-                chunk_index=chunk["chunk_index"],
-                agent=agent,
+            drawer_id = f"drawer_{wing}_{room}_{hashlib.sha256((source_file + str(chunk['chunk_index'])).encode()).hexdigest()[:24]}"
+            batch_docs.append(chunk["content"])
+            batch_ids.append(drawer_id)
+            batch_metas.append(
+                _build_drawer_metadata(
+                    wing,
+                    room,
+                    source_file,
+                    chunk["chunk_index"],
+                    agent,
+                    chunk["content"],
+                    source_mtime,
+                )
             )
-            if added:
-                drawers_added += 1
+
+        drawers_added = 0
+        if batch_docs:
+            collection.upsert(
+                documents=batch_docs,
+                ids=batch_ids,
+                metadatas=batch_metas,
+            )
+            drawers_added = len(batch_docs)
 
         # Build closet — the searchable index pointing to these drawers.
         # Purge first: a re-mine (mtime change or normalize_version bump) must
@@ -868,6 +914,8 @@ def mine(
     if limit > 0:
         files = files[:limit]
 
+    from .embedding import describe_device
+
     print(f"\n{'=' * 55}")
     print("  MemPalace Mine")
     print(f"{'=' * 55}")
@@ -875,6 +923,7 @@ def mine(
     print(f"  Rooms:   {', '.join(r['name'] for r in rooms)}")
     print(f"  Files:   {len(files)}")
     print(f"  Palace:  {palace_path}")
+    print(f"  Device:  {describe_device()}")
     if dry_run:
         print("  DRY RUN — nothing will be filed")
     if not respect_gitignore:
diff --git a/pyproject.toml b/pyproject.toml
index 617c067..18228d7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,6 +53,14 @@ chroma = "mempalace.backends.chroma:ChromaBackend"
 [project.optional-dependencies]
 dev = ["pytest>=7.0", "pytest-cov>=4.0", "ruff>=0.4.0", "psutil>=5.9"]
 spellcheck = ["autocorrect>=2.0"]
+# Hardware acceleration for the ONNX embedding model. Install exactly one:
+#   pip install mempalace[gpu]       — NVIDIA CUDA
+#   pip install mempalace[dml]       — DirectML (Windows AMD/Intel/NVIDIA)
+#   pip install mempalace[coreml]    — macOS Neural Engine
+# After install, set MEMPALACE_EMBEDDING_DEVICE=cuda|dml|coreml (or "auto").
+gpu = ["onnxruntime-gpu>=1.16"]
+dml = ["onnxruntime-directml>=1.16"]
+coreml = ["onnxruntime>=1.16"]
 
 [dependency-groups]
 dev = ["pytest>=7.0", "pytest-cov>=4.0", "ruff>=0.4.0", "psutil>=5.9"]
diff --git a/uv.lock b/uv.lock
index f102d43..ef1a706 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1178,6 +1178,11 @@ dependencies = [
 ]
 
 [package.optional-dependencies]
+coreml = [
+    { name = "onnxruntime", version = "1.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "onnxruntime", version = "1.24.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
+    { name = "onnxruntime", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+]
 dev = [
     { name = "psutil" },
     { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
@@ -1185,6 +1190,16 @@ dev = [
     { name = "pytest-cov" },
     { name = "ruff" },
 ]
+dml = [
+    { name = "onnxruntime-directml", version = "1.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "onnxruntime-directml", version = "1.24.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
+    { name = "onnxruntime-directml", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+]
+gpu = [
+    { name = "onnxruntime-gpu", version = "1.20.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "onnxruntime-gpu", version = "1.24.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
+    { name = "onnxruntime-gpu", version = "1.25.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+]
 spellcheck = [
     { name = "autocorrect" },
 ]
@@ -1202,6 +1217,9 @@ dev = [
 requires-dist = [
     { name = "autocorrect", marker = "extra == 'spellcheck'", specifier = ">=2.0" },
     { name = "chromadb", specifier = ">=1.5.4,<2" },
+    { name = "onnxruntime", marker = "extra == 'coreml'", specifier = ">=1.16" },
+    { name = "onnxruntime-directml", marker = "extra == 'dml'", specifier = ">=1.16" },
+    { name = "onnxruntime-gpu", marker = "extra == 'gpu'", specifier = ">=1.16" },
     { name = "psutil", marker = "extra == 'dev'", specifier = ">=5.9" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0" },
     { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0" },
@@ -1209,7 +1227,7 @@ requires-dist = [
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4.0" },
     { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0.0" },
 ]
-provides-extras = ["dev", "spellcheck"]
+provides-extras = ["dev", "spellcheck", "gpu", "dml", "coreml"]
 
 [package.metadata.requires-dev]
 dev = [
@@ -1815,6 +1833,154 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6c/1d/1666dc64e78d8587d168fec4e3b7922b92eb286a2ddeebcf6acb55c7dc82/onnxruntime-1.24.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1cc6a518255f012134bc791975a6294806be9a3b20c4a54cca25194c90cf731", size = 17247021, upload-time = "2026-03-17T22:04:52.377Z" },
 ]
 
+[[package]]
+name = "onnxruntime-directml"
+version = "1.20.1"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+dependencies = [
+    { name = "coloredlogs", marker = "python_full_version < '3.10'" },
+    { name = "flatbuffers", marker = "python_full_version < '3.10'" },
+    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "packaging", marker = "python_full_version < '3.10'" },
+    { name = "protobuf", marker = "python_full_version < '3.10'" },
+    { name = "sympy", marker = "python_full_version < '3.10'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3c/4f/f433239b05304aa9af0217da20508abbbcec1dcd58ee821e3dab8939ecfe/onnxruntime_directml-1.20.1-cp310-cp310-win_amd64.whl", hash = "sha256:d4aa43694799559fb5570fdf0e96a154d4b4d0bb9b73c3e81744eb7fe0c0de8d", size = 22760521, upload-time = "2024-11-21T00:49:40.179Z" },
+    { url = "https://files.pythonhosted.org/packages/df/5f/16337318bd99d2d837cbb2e91e8a12b0915cb80d7c1ae8f80ca2f5d47a09/onnxruntime_directml-1.20.1-cp311-cp311-win_amd64.whl", hash = "sha256:c7861057ad4caa64186c910efb3b54c1f575cd0e64732509c9bd927d2d20187b", size = 22762384, upload-time = "2024-11-21T00:49:44.01Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/50/4599c6573bd71cc0c80820c63dea599a0b489ce874f93a5e021ca20a9e1f/onnxruntime_directml-1.20.1-cp312-cp312-win_amd64.whl", hash = "sha256:4b9a9f8349d68eef947fc692b3572e7a6490cb95effb151ace1a6ffc15884940", size = 22764330, upload-time = "2024-11-21T00:49:47.264Z" },
+    { url = "https://files.pythonhosted.org/packages/60/40/7d8489d9101b4aa7bae29227075ce31bc5764cbe87b78c995fdb296e3eff/onnxruntime_directml-1.20.1-cp313-cp313-win_amd64.whl", hash = "sha256:86a8c4b69e377bb18ed2a18aaf2337baa83a57ff87a97224d027e546dfa99fde", size = 22764517, upload-time = "2024-11-21T00:49:50.213Z" },
+]
+
+[[package]]
+name = "onnxruntime-directml"
+version = "1.24.3"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version == '3.10.*'",
+]
+dependencies = [
+    { name = "flatbuffers", marker = "python_full_version == '3.10.*'" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
+    { name = "packaging", marker = "python_full_version == '3.10.*'" },
+    { name = "protobuf", marker = "python_full_version == '3.10.*'" },
+    { name = "sympy", marker = "python_full_version == '3.10.*'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ed/65/36ce5a5e79fb5d7b4d7636bc6e6c4024f3ff0571789e8eedb7149bb7c538/onnxruntime_directml-1.24.3-cp311-cp311-win_amd64.whl", hash = "sha256:442fecea5d52df315b6cecfbcbb44aff6681880b6bbf23546a6c00125fec66f1", size = 25106769, upload-time = "2026-03-05T16:27:07.495Z" },
+    { url = "https://files.pythonhosted.org/packages/05/40/c948c0ee42b7b6297dd45956092f5a53a6954610c3911a5847c7555b4930/onnxruntime_directml-1.24.3-cp312-cp312-win_amd64.whl", hash = "sha256:d889010e6ed2f30026522308173d295bcfdaf6f28d1df6054c748ffa750a7ad5", size = 25114531, upload-time = "2026-03-05T16:27:11.256Z" },
+    { url = "https://files.pythonhosted.org/packages/56/f0/9de329f39a66142aab4c1d9a48edc0e432de27c6ba09e8039e0dc51885e7/onnxruntime_directml-1.24.3-cp313-cp313-win_amd64.whl", hash = "sha256:f684adcb29dd48ee172b52fcf1d19a1da1a67a051384ac3418b36d200d0d105c", size = 25114902, upload-time = "2026-03-05T16:27:13.925Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/7a/8b3014ca4065a32bd6672221bf4cb0b5b9a726d28a9caafdb86a076a5981/onnxruntime_directml-1.24.3-cp314-cp314-win_amd64.whl", hash = "sha256:42b17de7030445e75a7e83a4a317f9c655ed2dd7045fe79a7a21dce7b60103b6", size = 25570589, upload-time = "2026-03-05T16:27:17.278Z" },
+]
+
+[[package]]
+name = "onnxruntime-directml"
+version = "1.24.4"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14'",
+    "python_full_version == '3.13.*'",
+    "python_full_version >= '3.11' and python_full_version < '3.13'",
+]
+dependencies = [
+    { name = "flatbuffers", marker = "python_full_version >= '3.11'" },
+    { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "packaging", marker = "python_full_version >= '3.11'" },
+    { name = "protobuf", marker = "python_full_version >= '3.11'" },
+    { name = "sympy", marker = "python_full_version >= '3.11'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/be/90/99566dc6398028e7691a5b12720fd85f757a0901818b84599d28abb3f085/onnxruntime_directml-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:96642a787e5a6f33bf043521c0f06eb1eb663f6b830e5862a2026d03f9c90543", size = 25106000, upload-time = "2026-03-17T21:47:15.438Z" },
+    { url = "https://files.pythonhosted.org/packages/88/ea/33814eb0ec96775eda4c1d30b0d86e91d7d2cd0d84c66d3915aef0e06fa3/onnxruntime_directml-1.24.4-cp312-cp312-win_amd64.whl", hash = "sha256:f2ecb68b7b7b259d2ef3112ae760149f9b5a1e7c0fbb73d539da6250a648a614", size = 25111930, upload-time = "2026-03-17T21:47:18.419Z" },
+    { url = "https://files.pythonhosted.org/packages/60/53/2bd2696fac19cf8ca55496a0bcfe431f3aff9579eabbb0e231dc238acf6f/onnxruntime_directml-1.24.4-cp313-cp313-win_amd64.whl", hash = "sha256:2f1031cb2281e5b27cca9efe0b9399317c7286e4d226f7a79d4ab79bbd94d19e", size = 25112253, upload-time = "2026-03-17T21:47:22.043Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/04/816932a3ade867a687e406716ca76e0774c6b921545b45818e3ebfcc54ce/onnxruntime_directml-1.24.4-cp314-cp314-win_amd64.whl", hash = "sha256:51d86bb949488e572b00422f344990a4a81d982416d73b6c0e4ced2bcd423d19", size = 25571098, upload-time = "2026-03-17T21:47:25.461Z" },
+]
+
+[[package]]
+name = "onnxruntime-gpu"
+version = "1.20.2"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+dependencies = [
+    { name = "coloredlogs", marker = "python_full_version < '3.10'" },
+    { name = "flatbuffers", marker = "python_full_version < '3.10'" },
+    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "packaging", marker = "python_full_version < '3.10'" },
+    { name = "protobuf", marker = "python_full_version < '3.10'" },
+    { name = "sympy", marker = "python_full_version < '3.10'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/ad/4e5534dcaafe36f596792ebd0049177f7f0b7afa0f696505974ed1d6f72c/onnxruntime_gpu-1.20.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dfba508f110ec062dedfd3032e6eee8cde325026e9d7c5792884e8b9d4ebb9c3", size = 291522233, upload-time = "2025-03-07T05:46:08.901Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/2a/8afc5aee996fd33fb816bc3067fdbde96a2a7520d4c275fa502f3aef7e54/onnxruntime_gpu-1.20.2-cp310-cp310-win_amd64.whl", hash = "sha256:75a7557292b2741e63fb73236ee84faa08075cead52d9a8d302a67036fc64f16", size = 279696089, upload-time = "2025-03-07T05:39:24.924Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/53/9341b875b0ed29953485b43713e94b335a449c3770fed67dddb3c9b84af0/onnxruntime_gpu-1.20.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85057c7006457bee14fc2a57417b7e4f396f10d9c1b08b11aae08ac2b825eeda", size = 291518407, upload-time = "2025-03-07T05:46:22.943Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/7a/0999993ceae7bf191d5d63a4e1b2208596763d8e586aa7dc5cc091f960c0/onnxruntime_gpu-1.20.2-cp311-cp311-win_amd64.whl", hash = "sha256:d0eafd873e4336949c89e6c7429a68e7e1d0233d9cb363e9780ca76c3c6f865c", size = 279697437, upload-time = "2025-03-07T05:39:38.418Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/db/c1fcdf45cad147d3b3609cf66a1c6083b54382f58a41d7fc526cd5909090/onnxruntime_gpu-1.20.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aa66d2e6de13fe6f4d1554b1c219bd2e4778b540ed9d3dc62957c95a8af43d66", size = 291510804, upload-time = "2025-03-07T05:46:36.178Z" },
+    { url = "https://files.pythonhosted.org/packages/27/67/4f979650557738a8b148dd7e0b82522d20ffcfb2c3964141c861a61e82c7/onnxruntime_gpu-1.20.2-cp312-cp312-win_amd64.whl", hash = "sha256:564a6a1187b208012f57c3bb3723ba65f6bc5cddff6e6b917ac96865768b39f5", size = 279699596, upload-time = "2025-03-07T05:39:50.858Z" },
+    { url = "https://files.pythonhosted.org/packages/48/a4/60f0cf16b24f05d123f90525408a705741fa92e0c38ab122cdf1d239e3fe/onnxruntime_gpu-1.20.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6af5b30b9b0e729d3ca1dfff493a39771f143cfc22af1d77d487022033cae284", size = 291511859, upload-time = "2025-03-07T05:46:49.302Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/a2/0eb7a3fa417adc7af0be73b0ea35f1f0d6f92e3722eb6468e36dfe2e762d/onnxruntime_gpu-1.20.2-cp313-cp313-win_amd64.whl", hash = "sha256:6ffe5108d2dbd96a9a40bf76573219e04b67d0330aa93ca5114f1478185ade19", size = 279697061, upload-time = "2025-03-07T05:40:03.559Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/de/6c692ac8604a451011a2a01e35e94f84bea8775ef97f6830985bbe8de172/onnxruntime_gpu-1.20.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:407e5b7a21d656aac6f994d2e329f5577eb3d7f98b63aa1e49e71a702ffa1da1", size = 291502464, upload-time = "2025-03-07T05:47:03.191Z" },
+]
+
+[[package]]
+name = "onnxruntime-gpu"
+version = "1.24.3"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version == '3.10.*'",
+]
+dependencies = [
+    { name = "flatbuffers", marker = "python_full_version == '3.10.*'" },
+    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
+    { name = "packaging", marker = "python_full_version == '3.10.*'" },
+    { name = "protobuf", marker = "python_full_version == '3.10.*'" },
+    { name = "sympy", marker = "python_full_version == '3.10.*'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/28/f4/c8050f3f4916ab6c75432724f0ba51c1548dc1c3d66d40c0f8a9611e370f/onnxruntime_gpu-1.24.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ac922633819e1cdc81c9b3a28b5e37d788805307bbaa708a01a3d7150e345625", size = 252750845, upload-time = "2026-03-05T16:35:33.604Z" },
+    { url = "https://files.pythonhosted.org/packages/07/b7/81e8936354651915192a362a1718253c6d03da6b902a95237aa392b1d260/onnxruntime_gpu-1.24.3-cp311-cp311-win_amd64.whl", hash = "sha256:0fe6ece3042db149f36f4991cbebd19a690b7ffd82af89450a261b47f4704a37", size = 207192429, upload-time = "2026-03-05T16:39:57.015Z" },
+    { url = "https://files.pythonhosted.org/packages/24/fa/58ceca812214c9c1a286407c376e42e0b7de3e2c6e14b61cdf3caf6d6d9c/onnxruntime_gpu-1.24.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:537bdd6d95006a9200ae81f2e73ba9e621e723fdf0deb5901e2e62fb2cccf876", size = 252756089, upload-time = "2026-03-05T16:35:46.004Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/07/2f36920b513bd8939e25591153e37d9cfda94115bd119f2874da0750fce2/onnxruntime_gpu-1.24.3-cp312-cp312-win_amd64.whl", hash = "sha256:d72065b3ab5fdaef74d8b6b8f39b7ce20d89731610e3e63cb40e997d3dce177e", size = 207197001, upload-time = "2026-03-05T16:40:05.691Z" },
+    { url = "https://files.pythonhosted.org/packages/49/57/9e6206dac76e08f028d2ae95f2ab1b3a7c3317fb6c0374a530aad48dab5c/onnxruntime_gpu-1.24.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3242a70010934e5bb0aeaa9dde4c25c6c2da577b55c6308c0caa828ba3b7be23", size = 252753349, upload-time = "2026-03-05T16:35:58.09Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/ae/f0be395602c13a3a8d22fa6632133550a64536c58bc3623abbba5d0a575e/onnxruntime_gpu-1.24.3-cp313-cp313-win_amd64.whl", hash = "sha256:a423b164dbc26cb7f8736367b11698c2a7294748d3c144c39542ecac28d225c9", size = 207197331, upload-time = "2026-03-05T16:40:14.944Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/af/a64c9789769d8d7fabc6d35dcce2f2897b2d9e0fe113044efc2903f7cd07/onnxruntime_gpu-1.24.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9696d54974a1313ef0d87f4cbd04f9abfd13839194638d52bb5967a15615341d", size = 252762923, upload-time = "2026-03-05T16:36:10.043Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/bb/1cf7dffac2fb01e8de9f0882438165f7543f0aab57f86d1f587e6faa8528/onnxruntime_gpu-1.24.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8ca744f40b33380bc9136988213e574c927d2b919ed42149977e006b138f74f", size = 252754914, upload-time = "2026-03-05T16:36:30.739Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/39/3949d56103bd9cd9381de59b060f9bce8dc2c7363f465bf207ebd0c7a5d0/onnxruntime_gpu-1.24.3-cp314-cp314-win_amd64.whl", hash = "sha256:c60c44e2b388720e6670a948b52626f3d089e960ef7da66e4fa6b2b33a11116f", size = 209599131, upload-time = "2026-03-05T16:40:24.074Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/60/51bfbcf2d0540dbfa426a73a9b80046b71a63de7303d16c0f2682c8edfd2/onnxruntime_gpu-1.24.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29048407a2398361d93de5537c2d2079d79d720337a0743d4a2cc28db981e776", size = 252764115, upload-time = "2026-03-05T16:36:44.681Z" },
+]
+
+[[package]]
+name = "onnxruntime-gpu"
+version = "1.25.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14'",
+    "python_full_version == '3.13.*'",
+    "python_full_version >= '3.11' and python_full_version < '3.13'",
+]
+dependencies = [
+    { name = "flatbuffers", marker = "python_full_version >= '3.11'" },
+    { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "packaging", marker = "python_full_version >= '3.11'" },
+    { name = "protobuf", marker = "python_full_version >= '3.11'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2d/7e/f58f8fc505a876b31fd2a34c1eb8f9863b75bf1589c3297c8efd48b93151/onnxruntime_gpu-1.25.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8625bb31ee2d88524414e7458cc604f4f958f323ef8832cc00882f6cd42b9a1", size = 270337732, upload-time = "2026-04-22T17:27:59.993Z" },
+    { url = "https://files.pythonhosted.org/packages/55/5d/2561b3aa667d87a4ae9cd01c5a565955aab5a3d44a6076f723beb9cdde0a/onnxruntime_gpu-1.25.0-cp311-cp311-win_amd64.whl", hash = "sha256:2e702159a025aa5c69f0b747adf9a451e0c9e4b20120163a918c8459d3171b87", size = 220845585, upload-time = "2026-04-22T17:20:38.939Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/6d/2c13d3eff74caa9e59820a044a75becd34e9cbeeaf7617ad7679cdb1fdb7/onnxruntime_gpu-1.25.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f0c36c63c8b0eb4091f2567067f480f66f0aedc189eb009545c98ce7e919056", size = 270342429, upload-time = "2026-04-22T17:28:10.526Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/2e/9fc303ae59d4caeb85ec3cea6881b7de8ca1d2a07140fade39913cd7ff10/onnxruntime_gpu-1.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:61178cc4d84f59861714554531e01cccbd33ddf13cc0e87a3adea13b24d297ce", size = 220847708, upload-time = "2026-04-22T17:20:47.993Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/15/e63fe7b1abad6884bed07e9bb333e9f0ea48fbb8cbc1ea4a67ee6019d5d0/onnxruntime_gpu-1.25.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e462eb13ee9955117baec4f518916c1e7cb1a96001114105632bc6d454c6aee6", size = 270342324, upload-time = "2026-04-22T17:28:21.142Z" },
+    { url = "https://files.pythonhosted.org/packages/21/10/b3533243d062b589d4b1f3ae26584af332c5cde618e7f6f5ff6fabbfd5f2/onnxruntime_gpu-1.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:9a3682158e5e911385252eb95d6332b6f525972746c582e10f8a78213b39e624", size = 220848188, upload-time = "2026-04-22T17:20:56.946Z" },
+    { url = "https://files.pythonhosted.org/packages/35/6c/d7706dd1d0eaafdba44d5c89f8d952de41e425a1b0cbd3ecfa60f918c249/onnxruntime_gpu-1.25.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8514b92c5929c953850090d823d018770cba2a971efab5f8f69a3c4280cdc632", size = 270364210, upload-time = "2026-04-22T17:28:33.568Z" },
+    { url = "https://files.pythonhosted.org/packages/37/01/9f1b16ea857e3a4b5e82a2d70b52ea46a0083569f737d840f74a1b86818f/onnxruntime_gpu-1.25.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ffe9df4016b061ec3a5565a4fc08cdb86808cd8b9c255c42301066c0c24a81b5", size = 270345126, upload-time = "2026-04-22T17:28:44.416Z" },
+    { url = "https://files.pythonhosted.org/packages/56/c8/aae22f3c9cea9160d8d969734a1927720fcb4d4ad4abe269c407c1d2b63c/onnxruntime_gpu-1.25.0-cp314-cp314-win_amd64.whl", hash = "sha256:2173b71631208177fe704ce2d92eac3acbf758285327247ea40a31a9f0bcc073", size = 223385369, upload-time = "2026-04-22T17:21:06.026Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/0a/79fba6a1a32803a2bf8b99187e0ea5d5d69ffe0c5c0f469bde232ceb8327/onnxruntime_gpu-1.25.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8576c721c600cc669717a2ae49af30fdfff230480099653adc7b79d58a240852", size = 270364130, upload-time = "2026-04-22T17:28:54.708Z" },
+]
+
 [[package]]
 name = "opentelemetry-api"
 version = "1.40.0"