phase 0

2026-06-17 16:03:26 -05:00
parent fa1e9b68c7
commit c1abe36822
99 changed files with 1562887 additions and 0 deletions
@@ -0,0 +1 @@
+# step-processor modules package
@@ -0,0 +1,275 @@
+"""
+bom.py — BOM extraction from STEP assembly tree.
+
+Primary: build123d assembly traversal.
+Fallback: STEP ISO 10303-21 text parser for PRODUCT entities.
+Always produces a complete DataFrame; saved as MPM-branded Excel (.xlsx).
+"""
+import logging
+import math
+import re
+from collections import Counter
+from pathlib import Path
+
+import pandas as pd
+
+from .loader import StepModel
+
+logger = logging.getLogger("step_processor.bom")
+
+BOM_COLUMNS = [
+    "part_number", "part_name_original", "part_name_english",
+    "quantity", "level", "parent",
+    "bbox_x_mm", "bbox_y_mm", "bbox_z_mm", "notes"
+]
+
+# ── Excel output — MPM brand palette (hex, no #) ─────────────────────────────
+_MPM_DARK_SHADE     = "232022"   # header background + body text
+_MPM_LIGHT_SHADE    = "F5F1EC"   # header text
+_MPM_WARM_OFF_WHITE = "FAF7F2"   # alternating row tint
+_MPM_MIDDLE_GOLD    = "DCBB4F"   # accent border under header row
+
+# Column rename + reorder for stakeholder-facing Excel output.
+# Internal processing always uses BOM_COLUMNS names.
+_XLSX_RENAME = {
+    "part_name_english":  "part_description",
+    "part_name_original": "part_name_supplier",
+}
+_XLSX_ORDER = [
+    "part_number", "part_description", "quantity", "level", "parent",
+    "bbox_x_mm", "bbox_y_mm", "bbox_z_mm", "notes", "part_name_supplier",
+]
+_XLSX_HEADERS = {
+    "part_number":        "Part #",
+    "part_description":   "Part Description",
+    "quantity":           "Qty",
+    "level":              "Level",
+    "parent":             "Parent",
+    "bbox_x_mm":          "X (mm)",
+    "bbox_y_mm":          "Y (mm)",
+    "bbox_z_mm":          "Z (mm)",
+    "notes":              "Notes",
+    "part_name_supplier": "Supplier Part Name",
+}
+_XLSX_WIDTHS = {
+    "part_number": 12, "part_description": 40, "quantity": 8,
+    "level": 7, "parent": 22, "bbox_x_mm": 11, "bbox_y_mm": 11,
+    "bbox_z_mm": 11, "notes": 34, "part_name_supplier": 40,
+}
+
+
+def _safe(v):
+    """Convert NaN/None → None so openpyxl writes blank cells."""
+    if v is None:
+        return None
+    try:
+        if isinstance(v, float) and math.isnan(v):
+            return None
+    except Exception:
+        pass
+    return v
+
+
+def extract_bom(model: StepModel) -> pd.DataFrame:
+    """Extract BOM from a loaded StepModel. Returns DataFrame with BOM_COLUMNS.
+
+    Name-extraction strategy
+    ------------------------
+    The STEP text parser is always the primary source for part_name_original.
+    It reads raw bytes with GBK/UTF-8 encoding detection, correctly decoding
+    Chinese CAD part labels.
+
+    OCC's STEP reader (used by build123d) applies an internal codec that maps
+    each 2-byte GBK sequence to an incorrect Unicode codepoint — the resulting
+    strings cannot be recovered. We therefore never rely on child.label for
+    part names when the file may contain CJK characters.
+
+    OCC assembly walk (_bom_from_parts) is kept as a fallback only for files
+    where the text parser returns nothing (e.g., non-PRODUCT-entity STEP files).
+    """
+    rows = []
+
+    # Primary: STEP text parser — encoding-aware, correct for ASCII and CJK files
+    rows = _bom_from_step_text(model.path)
+
+    if not rows and model.backend == "build123d" and model.parts:
+        # Fallback: OCC assembly walk (CJK names will be garbled but structure intact)
+        logger.debug("STEP text parser empty — falling back to OCC assembly walk")
+        rows = _bom_from_parts(model.parts)
+
+    if not rows:
+        logger.info("No assembly structure — treating as single part")
+        stem = model.path.stem
+        rows = [{"part_number": "001", "part_name_original": stem,
+                 "part_name_english": stem, "quantity": 1, "level": 0,
+                 "parent": "", "bbox_x_mm": None, "bbox_y_mm": None,
+                 "bbox_z_mm": None, "notes": "single-body file"}]
+    df = pd.DataFrame(rows, columns=BOM_COLUMNS)
+    if model.backend == "build123d":
+        df = _enrich_bboxes(model, df)
+    logger.info(f"BOM extracted: {len(df)} parts")
+    return df
+
+
+def _bom_from_parts(parts: list) -> list:
+    name_counts = Counter(p["name"] for p in parts)
+    seen = set()
+    rows = []
+    for i, p in enumerate(parts):
+        name = p["name"]
+        if name in seen:
+            continue
+        seen.add(name)
+        rows.append({
+            "part_number": f"{len(rows)+1:03d}",
+            "part_name_original": name,
+            "part_name_english": name,
+            "quantity": name_counts[name],
+            "level": p.get("level", 0),
+            "parent": p.get("parent", ""),
+            "bbox_x_mm": None, "bbox_y_mm": None, "bbox_z_mm": None,
+            "notes": "",
+        })
+    return rows
+
+
+def _read_step_text(step_path: Path) -> str:
+    """Read STEP file text with CJK-aware encoding detection.
+
+    STEP files from Chinese manufacturers embed raw GBK bytes in name strings.
+    Strategy: try UTF-8 first (correct for modern files); if replacement chars
+    appear, retry as GBK (covers Chinese CAD exports); fall back to latin-1
+    which never fails (may contain mojibake, but at least it's readable).
+    """
+    for enc in ('utf-8', 'gbk'):
+        try:
+            text = step_path.read_text(encoding=enc)
+            if enc == 'utf-8' and '�' in text:
+                # Replacement chars detected — GBK bytes can't be UTF-8
+                continue
+            return text
+        except (UnicodeDecodeError, LookupError):
+            continue
+    return step_path.read_text(encoding='latin-1', errors='replace')
+
+
+def _bom_from_step_text(step_path: Path) -> list:
+    """Parse STEP ISO 10303-21 PRODUCT entities directly."""
+    try:
+        text = _read_step_text(step_path)
+    except Exception as e:
+        logger.warning(f"Could not read STEP text: {e}")
+        return []
+    pattern = re.compile(r"#\d+\s*=\s*PRODUCT\s*\(\s*'([^']*)'", re.IGNORECASE)
+    seen = {}
+    for match in pattern.finditer(text):
+        name = match.group(1).strip()
+        if not name or name.upper() in ("", "NONE"):
+            continue
+        if name in seen:
+            seen[name]["quantity"] += 1
+        else:
+            seen[name] = {
+                "part_number": f"{len(seen)+1:03d}",
+                "part_name_original": name, "part_name_english": name,
+                "quantity": 1, "level": 0, "parent": "",
+                "bbox_x_mm": None, "bbox_y_mm": None, "bbox_z_mm": None,
+                "notes": "parsed from STEP text",
+            }
+    rows = list(seen.values())
+    if rows:
+        logger.info(f"STEP text parser found {len(rows)} unique part names")
+    return rows
+
+
+def _enrich_bboxes(model: StepModel, df: pd.DataFrame) -> pd.DataFrame:
+    """Add bounding box dims per part from build123d. Best-effort."""
+    try:
+        bb = model.shape.bounding_box()
+        if len(df) == 1:
+            df.at[0, "bbox_x_mm"] = round(bb.size.X, 2)
+            df.at[0, "bbox_y_mm"] = round(bb.size.Y, 2)
+            df.at[0, "bbox_z_mm"] = round(bb.size.Z, 2)
+        else:
+            children = getattr(model.shape, "children", []) or []
+            for i, child in enumerate(children):
+                if i >= len(df):
+                    break
+                try:
+                    cb = child.bounding_box()
+                    df.at[i, "bbox_x_mm"] = round(cb.size.X, 2)
+                    df.at[i, "bbox_y_mm"] = round(cb.size.Y, 2)
+                    df.at[i, "bbox_z_mm"] = round(cb.size.Z, 2)
+                except Exception:
+                    pass
+    except Exception as e:
+        logger.debug(f"bbox enrichment skipped: {e}")
+    return df
+
+
+def save_bom_csv(df: pd.DataFrame, step_path: Path) -> Path:
+    """Write BOM DataFrame to CSV (legacy fallback)."""
+    out_path = step_path.parent / f"{step_path.stem}_bom.csv"
+    df.to_csv(out_path, index=False)
+    logger.info(f"BOM CSV → {out_path.name}")
+    return out_path
+
+
+def save_bom_xlsx(df: pd.DataFrame, step_path: Path) -> Path:
+    """Write BOM DataFrame to an MPM-branded Excel workbook.
+
+    Column changes vs internal schema (BOM_COLUMNS):
+      part_name_english  → Part Description  (column 2)
+      part_name_original → Supplier Part Name (last column)
+    Falls back to CSV if openpyxl is unavailable.
+    """
+    out_path = step_path.parent / f"{step_path.stem}_bom.xlsx"
+    try:
+        from openpyxl import Workbook
+        from openpyxl.styles import Alignment, Border, Font, PatternFill, Side
+        from openpyxl.utils import get_column_letter
+    except ImportError:
+        logger.warning("openpyxl not installed — falling back to CSV")
+        return save_bom_csv(df, step_path)
+
+    # Build display DataFrame
+    disp = df.rename(columns=_XLSX_RENAME).copy()
+    for col in _XLSX_ORDER:
+        if col not in disp.columns:
+            disp[col] = None
+    disp = disp[_XLSX_ORDER]
+
+    wb = Workbook()
+    ws = wb.active
+    ws.title = "Bill of Materials"
+
+    gold_border = Border(bottom=Side(style="medium", color=_MPM_MIDDLE_GOLD))
+    hdr_fill    = PatternFill("solid", fgColor=_MPM_DARK_SHADE)
+    hdr_font    = Font(name="Montserrat", bold=True, color=_MPM_LIGHT_SHADE, size=10)
+    hdr_align   = Alignment(horizontal="center", vertical="center", wrap_text=True)
+
+    # Header row
+    for c, col in enumerate(_XLSX_ORDER, 1):
+        cell = ws.cell(row=1, column=c, value=_XLSX_HEADERS.get(col, col))
+        cell.font      = hdr_font
+        cell.fill      = hdr_fill
+        cell.alignment = hdr_align
+        cell.border    = gold_border
+        ws.column_dimensions[get_column_letter(c)].width = _XLSX_WIDTHS.get(col, 15)
+    ws.row_dimensions[1].height = 28
+
+    # Data rows
+    body_font  = Font(name="Open Sans", size=10, color=_MPM_DARK_SHADE)
+    body_align = Alignment(horizontal="left", vertical="center")
+    for r, (_, row) in enumerate(disp.iterrows(), 2):
+        fill = PatternFill("solid", fgColor=_MPM_WARM_OFF_WHITE if r % 2 == 0 else "FFFFFF")
+        for c, col in enumerate(_XLSX_ORDER, 1):
+            cell = ws.cell(row=r, column=c, value=_safe(row[col]))
+            cell.font      = body_font
+            cell.fill      = fill
+            cell.alignment = body_align
+
+    ws.freeze_panes = "A2"
+    wb.save(str(out_path))
+    logger.info(f"BOM XLSX → {out_path.name}")
+    return out_path
@@ -0,0 +1,187 @@
+"""
+loader.py — STEP file loading with build123d primary and FreeCAD fallback.
+
+Returns a StepModel dataclass used by all other modules.
+FreeCAD fallback invokes the signed app bundle Python to avoid
+Gatekeeper issues on macOS 15 Sequoia.
+"""
+
+import json
+import logging
+import subprocess
+import tempfile
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Optional
+
+logger = logging.getLogger("step_processor.loader")
+
+FREECAD_PYTHON = "/Applications/FreeCAD.app/Contents/Resources/bin/python"
+FREECAD_LIB    = "/Applications/FreeCAD.app/Contents/Resources/lib"
+FREECAD_CMD    = "/Applications/FreeCAD.app/Contents/Resources/bin/freecadcmd"
+
+
+@dataclass
+class StepModel:
+    """Unified model object returned by load_step(). Used by all modules."""
+    shape: Any
+    backend: str            # "build123d" | "freecad"
+    path: Path
+    parts: list = field(default_factory=list)
+    face_count: int = 0
+    metadata: dict = field(default_factory=dict)
+
+
+def load_step(filepath) -> Optional["StepModel"]:
+    """Load a STEP file. Tries build123d first; falls back to FreeCAD."""
+    step_path = Path(filepath).expanduser().resolve()
+    if not step_path.exists():
+        logger.error(f"File not found: {step_path}")
+        return None
+    try:
+        return _load_via_build123d(step_path)
+    except ImportError:
+        logger.warning("build123d not available — falling back to FreeCAD")
+        return _load_via_freecad(step_path)
+    except Exception as e:
+        logger.warning(f"build123d failed ({type(e).__name__}: {e}) — falling back to FreeCAD")
+        return _load_via_freecad(step_path)
+
+
+def _load_via_build123d(step_path: Path) -> "StepModel":
+    """Load using build123d. Raises on failure."""
+    from build123d import import_step
+    logger.info(f"[build123d] Loading: {step_path.name}")
+    shape = import_step(str(step_path))
+    face_count = 0
+    try:
+        face_count = sum(1 for _ in shape.faces())
+    except Exception:
+        pass
+    parts = _extract_parts_build123d(shape)
+    logger.info(f"[build123d] Loaded: {step_path.name} | {face_count} faces | {len(parts)} parts")
+    return StepModel(shape=shape, backend="build123d", path=step_path,
+                     parts=parts, face_count=face_count)
+
+
+def _fix_gbk_mojibake(s: str) -> str:
+    """
+    Recover Chinese text stored as mojibake in STEP part labels.
+
+    STEP files from Chinese CAD tools (SolidWorks CN, etc.) embed raw GBK bytes
+    in PRODUCT name strings. OpenCASCADE reads STEP strings as latin-1, which
+    re-interprets those GBK bytes as latin-1 code points — classic mojibake.
+
+    Fix: re-encode the string to latin-1 (restoring the original GBK byte
+    sequence) then decode as GBK to get correct Unicode Chinese characters.
+
+    If the string is pure ASCII, or the round-trip fails (already valid Unicode
+    or a non-GBK extended char), returns the original string unchanged.
+    """
+    if not s or all(ord(c) < 128 for c in s):
+        return s  # pure ASCII: nothing to fix
+    try:
+        return s.encode('latin-1').decode('gbk')
+    except (UnicodeDecodeError, UnicodeEncodeError):
+        return s  # not GBK mojibake — leave original
+
+
+def _extract_parts_build123d(shape) -> list:
+    """Walk build123d compound tree and extract named parts."""
+    parts = []
+    def _walk(compound, level=0, parent_name=""):
+        children = []
+        try:
+            children = compound.children if hasattr(compound, "children") else []
+        except Exception:
+            pass
+        if children:
+            for child in children:
+                raw = (getattr(child, "label", "") or
+                       getattr(child, "name", "") or f"Part_{level}")
+                name = _fix_gbk_mojibake(raw)
+                parts.append({"name": name, "level": level, "parent": parent_name})
+                _walk(child, level + 1, name)
+        else:
+            raw = (getattr(compound, "label", "") or
+                   getattr(compound, "name", "") or "")
+            if raw:
+                name = _fix_gbk_mojibake(raw)
+                parts.append({"name": name, "level": level, "parent": parent_name})
+    _walk(shape)
+    return parts
+
+
+def _load_via_freecad(step_path: Path) -> Optional["StepModel"]:
+    """Load using FreeCAD app bundle Python via subprocess."""
+    if not Path(FREECAD_PYTHON).exists():
+        logger.error(f"FreeCAD Python not found at {FREECAD_PYTHON}. Install FreeCAD.app.")
+        return None
+    logger.info(f"[FreeCAD] Loading: {step_path.name}")
+    script = f"""
+import sys, json
+sys.path.insert(0, {repr(FREECAD_LIB)})
+import FreeCAD, Part
+try:
+    shape = Part.read({repr(str(step_path))})
+    bb = shape.BoundBox
+    sub = shape.SubShapes if hasattr(shape, 'SubShapes') else []
+    parts = [{{"name": f"Part_{{i}}", "level": 1, "parent": "root"}}
+             for i in range(len(sub))]
+    print(json.dumps({{"ok": True, "face_count": len(shape.Faces),
+        "parts": parts,
+        "bbox": {{"XMin": bb.XMin, "XMax": bb.XMax,
+                 "YMin": bb.YMin, "YMax": bb.YMax,
+                 "ZMin": bb.ZMin, "ZMax": bb.ZMax}}}}))
+except Exception as e:
+    print(json.dumps({{"ok": False, "error": str(e)}}))
+"""
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
+        f.write(script)
+        script_path = f.name
+
+
+    try:
+        proc = subprocess.run([FREECAD_PYTHON, script_path],
+                              capture_output=True, text=True, timeout=120)
+        json_line = next((l.strip() for l in proc.stdout.splitlines()
+                          if l.strip().startswith("{")), None)
+        if not json_line:
+            logger.error(f"[FreeCAD] No JSON output. stderr: {proc.stderr[:300]}")
+            return None
+        data = json.loads(json_line)
+        if not data.get("ok"):
+            logger.error(f"[FreeCAD] Load failed: {data.get('error')}")
+            return None
+        proxy = _FreeCADShapeProxy(data["bbox"], data["face_count"])
+        logger.info(f"[FreeCAD] Loaded: {step_path.name} | {data['face_count']} faces")
+        return StepModel(shape=proxy, backend="freecad", path=step_path,
+                         parts=data.get("parts", []),
+                         face_count=data["face_count"],
+                         metadata={"bbox": data["bbox"]})
+    except subprocess.TimeoutExpired:
+        logger.error("[FreeCAD] Load timed out after 120s")
+        return None
+    except Exception as e:
+        logger.error(f"[FreeCAD] Unexpected error: {e}")
+        return None
+    finally:
+        Path(script_path).unlink(missing_ok=True)
+
+
+class _FreeCADShapeProxy:
+    """Proxy carrying FreeCAD geometry data extracted via subprocess."""
+    def __init__(self, bbox_dict: dict, face_count: int):
+        self.BoundBox = _BoundBox(bbox_dict)
+        self.face_count = face_count
+        self.Faces = [None] * face_count
+    def faces(self):
+        for _ in range(self.face_count):
+            yield object()
+
+
+class _BoundBox:
+    def __init__(self, d: dict):
+        self.XMin = d.get("XMin", 0); self.XMax = d.get("XMax", 0)
+        self.YMin = d.get("YMin", 0); self.YMax = d.get("YMax", 0)
+        self.ZMin = d.get("ZMin", 0); self.ZMax = d.get("ZMax", 0)
@@ -0,0 +1,335 @@
+"""
+query_engine.py — Natural language geometric query handler.
+
+Supports both single-query (--query "...") and interactive REPL (--repl).
+REPL keeps the model in memory between queries for speed.
+All output is formatted ASCII tables.
+
+Supported query types (see SKILL.md for full reference):
+  bounding box          overall model extents
+  face count            total faces by type
+  all parts             full assembly listing
+  list all holes        all cylindrical through-features
+  list all mounting holes   cylinders dia < 15mm, axis ⊥ primary face
+  holes diameter N      filter by diameter
+  wall thickness        min distance between opposing parallel faces
+  largest face          largest planar face area
+  help                  list supported queries
+  exit / quit           exit REPL
+"""
+import logging
+import re
+import textwrap
+from typing import Optional
+
+from .loader import StepModel
+
+logger = logging.getLogger("step_processor.query")
+
+# Regex patterns for query routing
+_BBOX_RE      = re.compile(r"\b(bounding.?box|extents|dimensions|overall.?size)\b", re.I)
+_FACECOUNT_RE = re.compile(r"\b(face.?count|how many faces|number of faces)\b", re.I)
+_PARTS_RE     = re.compile(r"\ball.?parts|part.?list|assembly|components\b", re.I)
+_HOLES_RE     = re.compile(r"\bholes?\b", re.I)
+_MOUNTING_RE  = re.compile(r"\bmounting\b", re.I)
+_DIA_RE       = re.compile(r"diameter\s+([\d.]+)\s*mm?", re.I)
+_WALL_RE      = re.compile(r"\bwall.?thickness\b", re.I)
+_LARGEST_RE   = re.compile(r"\blargest.?face\b", re.I)
+_HELP_RE      = re.compile(r"\bhelp\b", re.I)
+_EXIT_RE      = re.compile(r"\b(exit|quit|q)\b", re.I)
+
+
+def run_query(model: StepModel, query: str) -> str:
+    """Dispatch a query string and return formatted output."""
+    q = query.strip()
+    if _EXIT_RE.search(q):
+        return "EXIT"
+    if _HELP_RE.search(q):
+        return _help_text()
+    if _BBOX_RE.search(q):
+        return _query_bounding_box(model)
+    if _FACECOUNT_RE.search(q):
+        return _query_face_count(model)
+    if _PARTS_RE.search(q):
+        return _query_all_parts(model)
+    if _HOLES_RE.search(q):
+        dia_match = _DIA_RE.search(q)
+        dia_filter = float(dia_match.group(1)) if dia_match else None
+        mounting_only = bool(_MOUNTING_RE.search(q))
+        return _query_holes(model, mounting_only=mounting_only, dia_filter=dia_filter)
+    if _WALL_RE.search(q):
+        return _query_wall_thickness(model)
+    if _LARGEST_RE.search(q):
+        return _query_largest_face(model)
+    return (f"Query not recognized: '{q}'\n"
+            f"Type 'help' to see supported queries.")
+
+
+def repl(model: StepModel, step_path):
+    """Launch interactive REPL. Returns when user types exit/quit."""
+    print(f"\nSTEP Query REPL — {step_path.name}")
+    print("Type 'help' for supported queries, 'exit' to quit.\n")
+    while True:
+        try:
+            q = input("> ").strip()
+        except (EOFError, KeyboardInterrupt):
+            print()
+            break
+        if not q:
+            continue
+        result = run_query(model, q)
+        if result == "EXIT":
+            break
+        print(result)
+        print()
+
+
+# ── Query implementations ─────────────────────────────────────────────────────
+
+def _query_bounding_box(model: StepModel) -> str:
+    try:
+        if model.backend == "build123d":
+            bb = model.shape.bounding_box()
+            x = round(bb.size.X, 2)
+            y = round(bb.size.Y, 2)
+            z = round(bb.size.Z, 2)
+        else:
+            bb = model.shape.bounding_box
+            x = round(bb.XMax - bb.XMin, 2)
+            y = round(bb.YMax - bb.YMin, 2)
+            z = round(bb.ZMax - bb.ZMin, 2)
+        return _table(
+            f"BOUNDING BOX — {model.path.name}",
+            ["Axis", "Dimension"],
+            [["Width (X)",  f"{x} mm ({x/25.4:.3f} in)"],
+             ["Depth (Y)",  f"{y} mm ({y/25.4:.3f} in)"],
+             ["Height (Z)", f"{z} mm ({z/25.4:.3f} in)"]]
+        )
+    except Exception as e:
+        return f"Bounding box query failed: {e}"
+
+
+def _query_face_count(model: StepModel) -> str:
+    if model.backend != "build123d":
+        return f"Face count query requires build123d (loaded via {model.backend})"
+    try:
+        from build123d import Compound
+        all_faces = model.shape.faces()
+        planar = sum(1 for f in all_faces if f.geom_type() == "PLANE")
+        cylindrical = sum(1 for f in all_faces if f.geom_type() == "CYLINDER")
+        other = len(all_faces) - planar - cylindrical
+        return _table(
+            f"FACE COUNT — {model.path.name}",
+            ["Type", "Count"],
+            [["Planar", str(planar)],
+             ["Cylindrical", str(cylindrical)],
+             ["Other", str(other)],
+             ["Total", str(len(all_faces))]]
+        )
+    except Exception as e:
+        return f"Face count failed: {e}"
+
+
+def _query_all_parts(model: StepModel) -> str:
+    if not model.parts:
+        return (f"No assembly structure found in {model.path.name}.\n"
+                f"File appears to be a single solid body.")
+    rows = []
+    for p in model.parts:
+        rows.append([
+            p.get("part_number", "—"),
+            p.get("name", "—"),
+            str(p.get("quantity", 1)),
+            str(p.get("level", 0)),
+            p.get("parent", ""),
+        ])
+    return _table(
+        f"ALL PARTS — {model.path.name}",
+        ["#", "Name", "Qty", "Level", "Parent"],
+        rows
+    ) + f"\nTotal: {len(model.parts)} parts"
+
+
+def _query_holes(model: StepModel, mounting_only: bool = False,
+                 dia_filter: Optional[float] = None) -> str:
+    if model.backend != "build123d":
+        return f"Hole query requires build123d (loaded via {model.backend})"
+    try:
+        holes = _find_holes(model, mounting_only=mounting_only, dia_filter=dia_filter)
+        if not holes:
+            label = "mounting holes" if mounting_only else "holes"
+            qualifier = f" ≈{dia_filter}mm" if dia_filter else ""
+            return f"No {label}{qualifier} found in {model.path.name}."
+        header = "MOUNTING HOLES" if mounting_only else "ALL HOLES"
+        # Group by diameter bucket for summary view
+        from collections import Counter
+        dia_counts = Counter(round(h["dia"], 1) for h in holes)
+        MAX_ROWS = 50
+        display_holes = holes[:MAX_ROWS]
+        rows = []
+        for i, h in enumerate(display_holes, 1):
+            rows.append([
+                str(i),
+                f"{h['dia']:.2f} mm",
+                f"{h['depth']:.2f} mm" if h["depth"] else "—",
+                f"({h['x']:.1f}, {h['y']:.1f}, {h['z']:.1f})",
+            ])
+        result = _table(
+            f"{header} — {model.path.name}",
+            ["#", "Diameter", "Depth", "Position (x,y,z)"],
+            rows
+        )
+        result += f"\nShowing {len(display_holes)} of {len(holes)} unique hole locations"
+        result += "\n\nDIAMETER SUMMARY"
+        result += "\n" + "─" * 30
+        for dia, count in sorted(dia_counts.items()):
+            result += f"\n  {dia:.1f} mm  ×{count}"
+        result += "\n" + "─" * 30
+        return result
+    except Exception as e:
+        return f"Hole query failed: {e}"
+
+
+def _find_holes(model: StepModel, mounting_only: bool, dia_filter):
+    """Extract and deduplicate cylindrical faces from build123d model.
+
+    Deduplication: round axis position to 1mm grid, group by (dia_bucket, x, y, z).
+    This collapses multiple cylindrical faces from the same physical hole
+    (e.g. inner + outer surface of same cylinder) into one entry.
+    """
+    from OCP.BRepAdaptor import BRepAdaptor_Surface
+    from OCP.GeomAbs import GeomAbs_Cylinder
+
+    seen = {}  # key → best entry
+    try:
+        faces = model.shape.faces()
+    except Exception:
+        return []
+
+    for face in faces:
+        try:
+            adaptor = BRepAdaptor_Surface(face.wrapped)
+            if adaptor.GetType() != GeomAbs_Cylinder:
+                continue
+            cyl = adaptor.Cylinder()
+            r = cyl.Radius()
+            dia = round(r * 2, 2)
+            # Diameter filters
+            if mounting_only and dia > 15.0:
+                continue
+            if dia_filter and abs(dia - dia_filter) > 0.5:
+                continue
+            axis_pt = cyl.Location()
+            # Round to 1mm grid for deduplication
+            gx = round(axis_pt.X())
+            gy = round(axis_pt.Y())
+            gz = round(axis_pt.Z())
+            dia_bucket = round(dia, 1)
+            key = (dia_bucket, gx, gy, gz)
+            if key not in seen:
+                bb = face.bounding_box()
+                depth = round(max(bb.size.X, bb.size.Y, bb.size.Z), 2)
+                seen[key] = {
+                    "dia": dia,
+                    "depth": depth,
+                    "x": round(axis_pt.X(), 1),
+                    "y": round(axis_pt.Y(), 1),
+                    "z": round(axis_pt.Z(), 1),
+                }
+        except Exception:
+            continue
+    return list(seen.values())
+
+
+def _query_wall_thickness(model: StepModel) -> str:
+    if model.backend != "build123d":
+        return f"Wall thickness query requires build123d (loaded via {model.backend})"
+    try:
+        faces = model.shape.faces()
+        planar = [f for f in faces if f.geom_type() == "PLANE"]
+        if len(planar) < 2:
+            return "Insufficient planar faces to determine wall thickness."
+        # Heuristic: find minimum non-zero distance between parallel opposing faces
+        min_t = None
+        for i, f1 in enumerate(planar):
+            n1 = f1.normal_at()
+            for f2 in planar[i+1:]:
+                n2 = f2.normal_at()
+                # Parallel if normals are anti-parallel
+                dot = abs(n1.dot(n2))
+                if dot > 0.99:
+                    c1 = f1.center()
+                    c2 = f2.center()
+                    dist = round(abs((c1 - c2).dot(n1)), 3)
+                    if dist > 0.01:
+                        if min_t is None or dist < min_t:
+                            min_t = dist
+        if min_t is None:
+            return "Could not determine wall thickness from available faces."
+        return _table(
+            f"WALL THICKNESS — {model.path.name}",
+            ["Measurement", "Value"],
+            [["Minimum wall thickness",
+              f"{min_t} mm ({min_t/25.4:.3f} in)"]]
+        )
+    except Exception as e:
+        return f"Wall thickness query failed: {e}"
+
+
+def _query_largest_face(model: StepModel) -> str:
+    if model.backend != "build123d":
+        return f"Largest face query requires build123d (loaded via {model.backend})"
+    try:
+        faces = model.shape.faces()
+        planar = [(f, f.area()) for f in faces if f.geom_type() == "PLANE"]
+        if not planar:
+            return "No planar faces found."
+        largest, area = max(planar, key=lambda x: x[1])
+        bb = largest.bounding_box()
+        return _table(
+            f"LARGEST PLANAR FACE — {model.path.name}",
+            ["Property", "Value"],
+            [["Area",    f"{round(area, 2)} mm²"],
+             ["Width",   f"{round(bb.size.X, 2)} mm"],
+             ["Height",  f"{round(bb.size.Z, 2)} mm"]]
+        )
+    except Exception as e:
+        return f"Largest face query failed: {e}"
+
+
+# ── Formatting helpers ─────────────────────────────────────────────────────────
+
+def _table(title: str, headers: list, rows: list) -> str:
+    col_widths = [len(h) for h in headers]
+    for row in rows:
+        for i, cell in enumerate(row):
+            col_widths[i] = max(col_widths[i], len(str(cell)))
+    sep = "─" * (sum(col_widths) + 3 * len(headers) - 1)
+    lines = [title, sep]
+    header_line = "  ".join(h.ljust(col_widths[i]) for i, h in enumerate(headers))
+    lines.append(header_line)
+    lines.append(sep)
+    for row in rows:
+        lines.append("  ".join(str(cell).ljust(col_widths[i]) for i, cell in enumerate(row)))
+    lines.append(sep)
+    return "\n".join(lines)
+
+
+def _help_text() -> str:
+    return textwrap.dedent("""\
+    SUPPORTED QUERIES
+    ─────────────────────────────────────────────────────────────
+    bounding box            Overall model extents (W×D×H in mm)
+    face count              Faces by type (planar, cylindrical, other)
+    all parts               Full assembly listing with quantities
+    list all holes          All cylindrical through-features
+    list all mounting holes Holes smaller than 15mm diameter
+    holes diameter 4.2mm    Filter holes by specific diameter
+    wall thickness          Minimum wall thickness estimate
+    largest face            Largest planar face area
+    help                    Show this message
+    exit                    Exit the REPL
+    ─────────────────────────────────────────────────────────────
+    Tip: geometry queries require build123d backend.
+    If the file loaded via FreeCAD fallback, only bounding box
+    and parts list are available.""")
@@ -0,0 +1,321 @@
+"""
+renderer.py — Offscreen PNG thumbnail generation.
+
+Pipeline (with color): build123d → GLTF export → trimesh scene → pyrender → PNG
+Pipeline (fallback):   build123d → per-solid STL → colored trimesh scene → pyrender → PNG
+
+Coordinate convention AFTER STEP→GLTF export→trimesh load:
+  trimesh applies a Z-up→Y-up GLTF convention that swaps STEP's Y and Z axes:
+    X = width  (~248mm for MR16) — left/right
+    Y = depth  (~41mm  for MR16) — front/back; Y_min=screen face, Y_max=back panel
+    Z = height (~459mm for MR16) — tall axis;  Z_min=top end,    Z_max=bottom end
+  → "front" camera sits at -Y (screen side) looking toward +Y to see the LCD face.
+  → World "up" vector is (0,0,-1) — negative Z = top of display in image.
+
+6 standard views: front, rear, left, right, iso_left, iso_right
+"""
+import logging
+import tempfile
+from pathlib import Path
+
+import numpy as np
+
+from .loader import StepModel
+
+logger = logging.getLogger("step_processor.renderer")
+
+# Camera direction vectors: where the camera is PLACED relative to model center.
+# Camera always looks toward center from direction * distance.
+#
+# Trimesh world axes after GLTF load (STEP Y and Z are swapped by GLTF Y-up conv):
+#   X = width  — left/right
+#   Y = depth  — Y_min = screen face (LCD), Y_max = back panel (ports)
+#   Z = height — Z_min = TOP end of display, Z_max = BOTTOM end of display
+#
+# "front": camera at -Y (screen side) looks toward +Y → sees LCD face.
+# "rear":  camera at +Y (back side) looks toward -Y  → sees port panel.
+# "left":  camera at -X looks toward +X               → sees left edge.
+# "right": camera at +X looks toward -X               → sees right edge.
+# iso views: -Y component keeps camera on screen side; -Z = toward top end.
+VIEW_CAMERAS = {
+    "front":     ( 0, -1,  0),    # LCD screen face  (Y_min side)
+    "rear":      ( 0,  1,  0),    # back panel/ports (Y_max side)
+    "left":      (-1,  0,  0),    # left edge        (X_min side)
+    "right":     ( 1,  0,  0),    # right edge       (X_max side)
+    "top":       ( 0,  0, -1),    # top end          (Z_min side)
+    "bottom":    ( 0,  0,  1),    # bottom end       (Z_max side)
+    "iso_left":  (-1, -1, -0.5),  # front-left-above: screen + left edge + top
+    "iso_right": ( 1, -1, -0.5),  # front-right-above: screen + right edge + top
+}
+DEFAULT_VIEWS  = ["front", "rear", "left", "right", "iso_left", "iso_right"]
+
+# Color palette for per-part coloring when GLTF has no embedded colors
+# 20 distinct RGBA colors (alpha=200 for slight transparency on overlaps)
+PART_COLORS = [
+    [180, 180, 185, 255],  # light steel
+    [ 70, 130, 180, 255],  # steel blue
+    [205, 133,  63, 255],  # peru / bronze
+    [ 60, 179, 113, 255],  # medium sea green
+    [188, 143, 143, 255],  # rosy brown
+    [100, 149, 237, 255],  # cornflower blue
+    [255, 160,  50, 255],  # dark orange
+    [147, 112, 219, 255],  # medium purple
+    [ 46, 139,  87, 255],  # sea green
+    [205,  92,  92, 255],  # indian red
+    [135, 206, 235, 255],  # sky blue
+    [244, 164,  96, 255],  # sandy brown
+    [106,  90, 205, 255],  # slate blue
+    [ 32, 178, 170, 255],  # light sea green
+    [220,  20,  60, 255],  # crimson
+    [218, 165,  32, 255],  # goldenrod
+    [ 72,  61, 139, 255],  # dark slate blue
+    [143, 188, 143, 255],  # dark sea green
+    [255,  99,  71, 255],  # tomato
+    [176, 196, 222, 255],  # light steel blue
+]
+DEFAULT_WIDTH  = 1024
+DEFAULT_HEIGHT = 768
+
+
+def render_views(model: StepModel, step_path: Path,
+                 views=None, width=DEFAULT_WIDTH, height=DEFAULT_HEIGHT) -> dict:
+    """Render PNG views. Returns dict of view_name → output Path."""
+    views = views or DEFAULT_VIEWS
+    stem = step_path.stem
+    out_dir = step_path.parent
+    results = {}
+    mesh = _get_mesh(model, step_path)
+    if mesh is None:
+        logger.warning("Could not obtain mesh — thumbnails skipped")
+        return results
+    for view_name in views:
+        if view_name not in VIEW_CAMERAS:
+            logger.warning(f"Unknown view '{view_name}' — skipping")
+            continue
+        out_path = out_dir / f"{stem}_{view_name}.png"
+        try:
+            _render_single_view(mesh, VIEW_CAMERAS[view_name], out_path, width, height)
+            results[view_name] = out_path
+            logger.info(f"Rendered: {out_path.name}")
+        except Exception as e:
+            logger.warning(f"Render failed for '{view_name}': {e}")
+    return results
+
+
+def _get_mesh(model: StepModel, step_path: Path):
+    """Get a single assembled trimesh.Trimesh from the model.
+
+    Always returns a single concatenated mesh (not a Scene) so the camera
+    distance and bounds calculations are correct and parts don't explode.
+
+    Priority:
+      1. build123d → GLTF → scene.dump() → concatenate with transforms applied
+         (preserves per-part colors if embedded in STEP)
+      2. build123d → single STL of full assembly (fallback, monochrome but correct)
+      3. FreeCAD path → bounding box box mesh
+    """
+    try:
+        import trimesh
+    except ImportError:
+        logger.warning("trimesh not installed — thumbnails unavailable. pip install trimesh")
+        return None
+
+    if model.backend == "build123d":
+        # ── GLTF path: color-aware, transforms flattened via scene.dump() ─────
+        with tempfile.NamedTemporaryFile(suffix=".gltf", delete=False) as tmp:
+            gltf_path = Path(tmp.name)
+        try:
+            from build123d import export_gltf
+            import warnings
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")
+                export_gltf(model.shape, str(gltf_path))
+            scene = trimesh.load(str(gltf_path))
+
+            if isinstance(scene, trimesh.Scene) and scene.geometry:
+                # scene.dump() applies the full transform graph → parts at correct positions
+                dumped = scene.dump()
+                if dumped:
+                    # Pull baseColorFactor directly from PBR material per mesh.
+                    # .to_color() loses this in some trimesh versions.
+                    materialized = []
+                    for m in dumped:
+                        try:
+                            bc = m.visual.material.baseColorFactor
+                            if bc is not None:
+                                m.visual = trimesh.visual.ColorVisuals(
+                                    mesh=m, face_colors=np.array(bc, dtype=np.uint8))
+                            else:
+                                m.visual = trimesh.visual.ColorVisuals(
+                                    mesh=m, face_colors=[185, 190, 195, 255])
+                        except Exception:
+                            try:
+                                m.visual = m.visual.to_color()
+                            except Exception:
+                                pass
+                        materialized.append(m)
+
+                    mesh = trimesh.util.concatenate(materialized)
+                    if mesh is not None and len(mesh.faces) > 0:
+                        n_colors = len(set(
+                            tuple(c) for c in mesh.visual.face_colors[:, :3][::1000]
+                        )) if hasattr(mesh.visual, 'face_colors') else 0
+                        logger.info(f"GLTF: assembled {len(mesh.faces)} faces, "
+                                    f"~{n_colors} distinct colors sampled")
+                        return mesh
+        except Exception as e:
+            logger.warning(f"GLTF path failed ({e}) — falling back to STL")
+        finally:
+            try:
+                gltf_path.unlink()
+            except Exception:
+                pass
+
+        # ── STL fallback: single merged mesh, correct positions, monochrome ───
+        with tempfile.NamedTemporaryFile(suffix=".stl", delete=False) as tmp:
+            stl_path = Path(tmp.name)
+        try:
+            from build123d import export_stl
+            export_stl(model.shape, str(stl_path))
+            mesh = trimesh.load(str(stl_path), force="mesh")
+            if mesh is not None and len(mesh.faces) > 0:
+                mesh.visual.face_colors = [185, 190, 195, 255]
+                logger.info(f"STL fallback: {len(mesh.faces)} faces (uniform color)")
+                return mesh
+        except Exception as e:
+            logger.warning(f"STL fallback failed ({e})")
+        finally:
+            try:
+                stl_path.unlink()
+            except Exception:
+                pass
+
+    # FreeCAD / last resort: bounding box
+    return _bbox_wireframe_mesh(model)
+
+
+def _mesh_has_color(mesh) -> bool:
+    """Return True if the mesh has meaningful (non-gray) face colors."""
+    try:
+        fc = mesh.visual.face_colors
+        if fc is None or len(fc) == 0:
+            return False
+        # If all faces are within ±25 of [128,128,128] treat as uncolored
+        gray = [128, 128, 128]
+        mean_rgb = fc[:, :3].mean(axis=0)
+        return not all(abs(int(mean_rgb[i]) - gray[i]) < 25 for i in range(3))
+    except Exception:
+        return False
+
+
+def _bbox_wireframe_mesh(model: StepModel):
+    """Create a simple box mesh from the model's bounding box. Last-resort fallback."""
+    try:
+        import trimesh
+        bb = model.shape.bounding_box()
+        # _FreeCADShapeProxy stores a _BoundBox
+        if hasattr(bb, "XMin"):
+            extents = [
+                bb.XMax - bb.XMin,
+                bb.YMax - bb.YMin,
+                bb.ZMax - bb.ZMin,
+            ]
+            center = [
+                (bb.XMax + bb.XMin) / 2,
+                (bb.YMax + bb.YMin) / 2,
+                (bb.ZMax + bb.ZMin) / 2,
+            ]
+        else:
+            # build123d BoundBox
+            extents = [bb.size.X, bb.size.Y, bb.size.Z]
+            center = [bb.center.X, bb.center.Y, bb.center.Z]
+        mesh = trimesh.creation.box(extents=extents)
+        mesh.apply_translation(center)
+        logger.debug("Using bbox wireframe mesh for rendering")
+        return mesh
+    except Exception as e:
+        logger.warning(f"Bbox wireframe mesh failed: {e}")
+        return None
+
+
+def _render_single_view(mesh, camera_direction: tuple, out_path: Path,
+                         width: int, height: int):
+    """Render one view using pyrender offscreen, save to out_path."""
+    try:
+        import pyrender
+    except ImportError:
+        raise ImportError("pyrender not installed. pip install pyrender")
+
+    # Normalize camera direction
+    direction = np.array(camera_direction, dtype=float)
+    direction = direction / np.linalg.norm(direction)
+
+    # Bounding box of the assembled mesh
+    bounds = mesh.bounds  # shape (2,3): [[xmin,ymin,zmin],[xmax,ymax,zmax]]
+    center = (bounds[0] + bounds[1]) / 2.0
+    diag = np.linalg.norm(bounds[1] - bounds[0])
+
+    # Camera sits at 2.5× diagonal distance from center, looking at center
+    camera_distance = diag * 2.5
+    eye = center + direction * camera_distance
+
+    # World up: (0,0,-1) = negative Z = top of display in trimesh GLTF space.
+    # Fallback to (0,-1,0) for top/bottom end views where direction ≈ ±Z.
+    world_up = np.array([0, 0, -1], dtype=float)
+    if abs(np.dot(direction, world_up)) > 0.9:
+        world_up = np.array([0, -1, 0], dtype=float)
+
+    # Proven camera frame formula (right-handed, same structure as original code):
+    #   right   = cross(world_up, direction)   [world_up × backward]
+    #   cam_up  = cross(direction, right)       [backward × right]
+    #   col2    = direction                     [camera +Z = backward; looks down -Z]
+    right = np.cross(world_up, direction)
+    if np.linalg.norm(right) < 1e-8:
+        right = np.cross(np.array([1, 0, 0], dtype=float), direction)
+    right = right / np.linalg.norm(right)
+    cam_up = np.cross(direction, right)
+    cam_up = cam_up / np.linalg.norm(cam_up)
+
+    # 4×4 camera pose: columns = [right, cam_up, backward, eye]
+    camera_pose = np.eye(4)
+    camera_pose[:3, 0] = right
+    camera_pose[:3, 1] = cam_up
+    camera_pose[:3, 2] = direction   # camera +Z = backward; pyrender looks down -Z
+    camera_pose[:3, 3] = eye
+
+    # Build pyrender scene — white background
+    pr_scene = pyrender.Scene(ambient_light=[0.35, 0.35, 0.35],
+                              bg_color=[255, 255, 255, 255])
+    pr_scene.add(pyrender.Mesh.from_trimesh(mesh, smooth=False))
+
+    # FOV sized so the model fills ~80% of the frame
+    yfov = 2.0 * np.arctan((diag * 0.5) / camera_distance) * 1.25
+    camera = pyrender.PerspectiveCamera(yfov=yfov, aspectRatio=width / height)
+    pr_scene.add(camera, pose=camera_pose)
+
+    # Key light from camera position + fill from above-opposite
+    pr_scene.add(pyrender.DirectionalLight(color=np.ones(3), intensity=4.5),
+                 pose=camera_pose)
+    fill_pose = np.eye(4)
+    # Fill light: offset from top of display (-Z) to avoid zero-vector when
+    # direction is parallel to (0,1,0) (e.g. rear view).
+    fill_dir = -direction + np.array([0, 0, -1], dtype=float)
+    fill_norm = np.linalg.norm(fill_dir)
+    if fill_norm < 1e-8:
+        fill_dir = np.array([0, 0, -1], dtype=float)
+    else:
+        fill_dir = fill_dir / fill_norm
+    fill_eye = center - fill_dir * camera_distance
+    fill_pose[:3, 3] = fill_eye
+    pr_scene.add(pyrender.DirectionalLight(color=np.ones(3), intensity=1.8),
+                 pose=fill_pose)
+
+    # Offscreen render
+    r = pyrender.OffscreenRenderer(viewport_width=width, viewport_height=height)
+    try:
+        color, _ = r.render(pr_scene)
+    finally:
+        r.delete()
+
+    from PIL import Image
+    Image.fromarray(color).save(str(out_path))
@@ -0,0 +1,103 @@
+"""
+rewriter.py — STEP label rewriter for Chinese→English translation.
+
+Produces {stem}_EN.step — NEVER modifies source file.
+Targets only PRODUCT entity name strings.
+Validates entity count before/after to ensure file integrity.
+"""
+import logging
+import re
+from pathlib import Path
+
+logger = logging.getLogger("step_processor.rewriter")
+
+# Targets both quoted strings in: #N = PRODUCT('id', 'name', 'description', ...)
+# ISO 10303-21 PRODUCT has two name fields; CAD viewers typically display the second.
+# Chinese CAD exports set both to the same Chinese string, so both must be replaced.
+# Groups: (prefix)  (id)  (sep)  (name)  (suffix-quote)
+PRODUCT_PATTERN = re.compile(
+    r"(#\d+\s*=\s*PRODUCT\s*\(\s*')([^']*)(',\s*')([^']*)(')",
+    re.IGNORECASE
+)
+ENTITY_PATTERN = re.compile(r"^#\d+\s*=\s*\S+\s*\(", re.MULTILINE)
+
+
+def _read_step_for_rewrite(source_path: Path) -> str:
+    """Read STEP file with GBK-aware encoding detection.
+
+    STEP files from Chinese CAD tools embed raw GBK bytes in PRODUCT name
+    strings.  Reading as UTF-8 turns those bytes into replacement characters
+    (U+FFFD), which makes the Chinese→English lookup fail.  We try GBK when
+    UTF-8 produces replacement chars so the regex substitution can actually
+    find and replace the Chinese strings.
+    """
+    for enc in ('utf-8', 'gbk'):
+        try:
+            text = source_path.read_text(encoding=enc)
+            if enc == 'utf-8' and '�' in text:
+                continue  # has replacement chars — retry as GBK
+            return text
+        except (UnicodeDecodeError, LookupError):
+            continue
+    return source_path.read_text(encoding='latin-1', errors='replace')
+
+
+def rewrite_step(source_path: Path, translation_map: dict):
+    """
+    Produce English-labeled copy of the STEP file.
+    Returns output Path or None if no rewrite needed or failed.
+    """
+    if not translation_map:
+        logger.info("No translations to apply — _EN.step skipped")
+        return None
+    try:
+        source_text = _read_step_for_rewrite(source_path)
+    except Exception as e:
+        logger.error(f"Could not read source STEP: {e}")
+        return None
+    original_count = len(ENTITY_PATTERN.findall(source_text))
+    if not any(orig in source_text for orig in translation_map):
+        logger.info("No Chinese labels in STEP text — _EN.step skipped")
+        return None
+    lines = source_text.splitlines(keepends=True)
+    replaced_count = 0
+    output_lines = []
+    for line in lines:
+        new_line, count = _replace_product_names(line, translation_map)
+        replaced_count += count
+        output_lines.append(new_line)
+    output_text = "".join(output_lines)
+    new_count = len(ENTITY_PATTERN.findall(output_text))
+    if new_count != original_count:
+        logger.error(
+            f"Entity count mismatch: {original_count} → {new_count}. "
+            "Aborting — source file untouched.")
+        return None
+    if replaced_count == 0:
+        logger.info("No PRODUCT entities matched — _EN.step skipped")
+        return None
+    out_path = source_path.parent / f"{source_path.stem}_EN.step"
+    try:
+        out_path.write_text(output_text, encoding="utf-8")
+        logger.info(f"_EN.step written: {out_path.name} ({replaced_count} labels replaced)")
+        return out_path
+    except Exception as e:
+        logger.error(f"Failed to write _EN.step: {e}")
+        out_path.unlink(missing_ok=True)
+        return None
+
+
+def _replace_product_names(line: str, translation_map: dict):
+    count = 0
+    def replacer(m):
+        nonlocal count
+        # Try id field first (group 2), fall back to name field (group 4)
+        # Both are Chinese in Chinese CAD exports; replace both with English.
+        translated = translation_map.get(m.group(2)) or translation_map.get(m.group(4))
+        if translated:
+            count += 1
+            # Replace both the id field and the name field
+            return m.group(1) + translated + m.group(3) + translated + m.group(5)
+        return m.group(0)
+    new_line = PRODUCT_PATTERN.sub(replacer, line)
+    return new_line, count
@@ -0,0 +1,107 @@
+"""
+translator.py — Chinese to English part name translation via Claude API.
+
+Detects CJK unicode range. Batches all names in a single API call per file.
+Flags uncertain translations in the notes column.
+"""
+
+import json
+import logging
+import os
+import re
+
+import pandas as pd
+
+logger = logging.getLogger("step_processor.translator")
+
+CJK_PATTERN = re.compile(r'[一-鿿㐀-䶿]')
+
+SYSTEM_PROMPT = (
+    "You are a mechanical engineering translator specializing in Chinese "
+    "manufacturing CAD files for display and enclosure products. "
+    "Translate the following part names from Chinese to English. "
+    "Preserve technical precision. Use standard hardware/manufacturing terminology. "
+    "Output ONLY a JSON object mapping original Chinese to translated English, nothing else.\n"
+    'Example: {"安装支架": "Mounting Bracket", "螺钉M4": "M4 Screw", "前面板": "Front Panel"}'
+)
+
+
+def has_chinese(text: str) -> bool:
+    """Return True if text contains CJK characters."""
+    return bool(CJK_PATTERN.search(str(text)))
+
+
+def translate_bom(df: pd.DataFrame, model_name: str = "") -> pd.DataFrame:
+    """Detect Chinese part names and translate via Claude API."""
+    needs_translation = df["part_name_original"].apply(has_chinese)
+    chinese_names = df.loc[needs_translation, "part_name_original"].unique().tolist()
+    if not chinese_names:
+        logger.info("No Chinese part names detected — translation skipped")
+        return df
+    logger.info(f"Translating {len(chinese_names)} Chinese part names...")
+    translation_map = _call_claude_api(chinese_names, model_name)
+    if not translation_map:
+        logger.warning("Translation API returned no results — retaining original names")
+        df.loc[needs_translation, "notes"] = (
+            df.loc[needs_translation, "notes"].apply(
+                lambda n: (n + "; " if n else "") + "translation-failed"))
+        return df
+    for idx, row in df.iterrows():
+        original = row["part_name_original"]
+        if has_chinese(original):
+            translated = translation_map.get(original)
+            if translated:
+                df.at[idx, "part_name_english"] = translated
+                note_tag = "ambiguous-translation" if "[?]" in translated else "machine-translated"
+            else:
+                df.at[idx, "part_name_english"] = original
+                note_tag = "translation-missing"
+            existing = row["notes"]
+            df.at[idx, "notes"] = (existing + "; " if existing else "") + note_tag
+    logger.info(f"Translated {needs_translation.sum()} parts")
+    return df
+
+
+def get_translation_map(df: pd.DataFrame) -> dict:
+    """Return dict of original → english for all translated rows."""
+    mask = df["part_name_original"] != df["part_name_english"]
+    return dict(zip(df.loc[mask, "part_name_original"],
+                    df.loc[mask, "part_name_english"]))
+
+
+def _call_claude_api(names: list, model_name: str = "") -> dict:
+    """Single batched Claude API call. Returns original→translated dict."""
+    api_key = os.environ.get("ANTHROPIC_API_KEY")
+    if not api_key:
+        logger.error("ANTHROPIC_API_KEY not set — translation unavailable")
+        return {}
+    try:
+        import anthropic
+    except ImportError:
+        logger.error("anthropic package not installed — pip install anthropic")
+        return {}
+    names_json = json.dumps(names, ensure_ascii=False)
+    user_msg = f"Translate these part names from Chinese to English:\n{names_json}"
+    if model_name:
+        user_msg += f"\n\nContext: Parts from a {model_name} display enclosure assembly."
+    try:
+        client = anthropic.Anthropic(api_key=api_key)
+        response = client.messages.create(
+            model="claude-haiku-4-5-20251001",
+            max_tokens=2048,
+            system=SYSTEM_PROMPT,
+            messages=[{"role": "user", "content": user_msg}],
+        )
+        text = response.content[0].text.strip()
+        json_match = re.search(r'\{.*\}', text, re.DOTALL)
+        if json_match:
+            text = json_match.group(0)
+        result = json.loads(text)
+        logger.info(f"API returned {len(result)} translations")
+        return result
+    except json.JSONDecodeError as e:
+        logger.error(f"Translation API JSON parse error: {e}")
+        return {}
+    except Exception as e:
+        logger.error(f"Translation API error: {type(e).__name__}: {e}")
+        return {}