This commit is contained in:
Jason Stedwell
2026-06-17 16:03:26 -05:00
parent fa1e9b68c7
commit c1abe36822
99 changed files with 1562887 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
# step-processor modules package
Binary file not shown.
Binary file not shown.
+275
View File
@@ -0,0 +1,275 @@
"""
bom.py — BOM extraction from STEP assembly tree.
Primary: build123d assembly traversal.
Fallback: STEP ISO 10303-21 text parser for PRODUCT entities.
Always produces a complete DataFrame; saved as MPM-branded Excel (.xlsx).
"""
import logging
import math
import re
from collections import Counter
from pathlib import Path
import pandas as pd
from .loader import StepModel
logger = logging.getLogger("step_processor.bom")
BOM_COLUMNS = [
"part_number", "part_name_original", "part_name_english",
"quantity", "level", "parent",
"bbox_x_mm", "bbox_y_mm", "bbox_z_mm", "notes"
]
# ── Excel output — MPM brand palette (hex, no #) ─────────────────────────────
_MPM_DARK_SHADE = "232022" # header background + body text
_MPM_LIGHT_SHADE = "F5F1EC" # header text
_MPM_WARM_OFF_WHITE = "FAF7F2" # alternating row tint
_MPM_MIDDLE_GOLD = "DCBB4F" # accent border under header row
# Column rename + reorder for stakeholder-facing Excel output.
# Internal processing always uses BOM_COLUMNS names.
_XLSX_RENAME = {
"part_name_english": "part_description",
"part_name_original": "part_name_supplier",
}
_XLSX_ORDER = [
"part_number", "part_description", "quantity", "level", "parent",
"bbox_x_mm", "bbox_y_mm", "bbox_z_mm", "notes", "part_name_supplier",
]
_XLSX_HEADERS = {
"part_number": "Part #",
"part_description": "Part Description",
"quantity": "Qty",
"level": "Level",
"parent": "Parent",
"bbox_x_mm": "X (mm)",
"bbox_y_mm": "Y (mm)",
"bbox_z_mm": "Z (mm)",
"notes": "Notes",
"part_name_supplier": "Supplier Part Name",
}
_XLSX_WIDTHS = {
"part_number": 12, "part_description": 40, "quantity": 8,
"level": 7, "parent": 22, "bbox_x_mm": 11, "bbox_y_mm": 11,
"bbox_z_mm": 11, "notes": 34, "part_name_supplier": 40,
}
def _safe(v):
"""Convert NaN/None → None so openpyxl writes blank cells."""
if v is None:
return None
try:
if isinstance(v, float) and math.isnan(v):
return None
except Exception:
pass
return v
def extract_bom(model: StepModel) -> pd.DataFrame:
"""Extract BOM from a loaded StepModel. Returns DataFrame with BOM_COLUMNS.
Name-extraction strategy
------------------------
The STEP text parser is always the primary source for part_name_original.
It reads raw bytes with GBK/UTF-8 encoding detection, correctly decoding
Chinese CAD part labels.
OCC's STEP reader (used by build123d) applies an internal codec that maps
each 2-byte GBK sequence to an incorrect Unicode codepoint — the resulting
strings cannot be recovered. We therefore never rely on child.label for
part names when the file may contain CJK characters.
OCC assembly walk (_bom_from_parts) is kept as a fallback only for files
where the text parser returns nothing (e.g., non-PRODUCT-entity STEP files).
"""
rows = []
# Primary: STEP text parser — encoding-aware, correct for ASCII and CJK files
rows = _bom_from_step_text(model.path)
if not rows and model.backend == "build123d" and model.parts:
# Fallback: OCC assembly walk (CJK names will be garbled but structure intact)
logger.debug("STEP text parser empty — falling back to OCC assembly walk")
rows = _bom_from_parts(model.parts)
if not rows:
logger.info("No assembly structure — treating as single part")
stem = model.path.stem
rows = [{"part_number": "001", "part_name_original": stem,
"part_name_english": stem, "quantity": 1, "level": 0,
"parent": "", "bbox_x_mm": None, "bbox_y_mm": None,
"bbox_z_mm": None, "notes": "single-body file"}]
df = pd.DataFrame(rows, columns=BOM_COLUMNS)
if model.backend == "build123d":
df = _enrich_bboxes(model, df)
logger.info(f"BOM extracted: {len(df)} parts")
return df
def _bom_from_parts(parts: list) -> list:
name_counts = Counter(p["name"] for p in parts)
seen = set()
rows = []
for i, p in enumerate(parts):
name = p["name"]
if name in seen:
continue
seen.add(name)
rows.append({
"part_number": f"{len(rows)+1:03d}",
"part_name_original": name,
"part_name_english": name,
"quantity": name_counts[name],
"level": p.get("level", 0),
"parent": p.get("parent", ""),
"bbox_x_mm": None, "bbox_y_mm": None, "bbox_z_mm": None,
"notes": "",
})
return rows
def _read_step_text(step_path: Path) -> str:
"""Read STEP file text with CJK-aware encoding detection.
STEP files from Chinese manufacturers embed raw GBK bytes in name strings.
Strategy: try UTF-8 first (correct for modern files); if replacement chars
appear, retry as GBK (covers Chinese CAD exports); fall back to latin-1
which never fails (may contain mojibake, but at least it's readable).
"""
for enc in ('utf-8', 'gbk'):
try:
text = step_path.read_text(encoding=enc)
if enc == 'utf-8' and '' in text:
# Replacement chars detected — GBK bytes can't be UTF-8
continue
return text
except (UnicodeDecodeError, LookupError):
continue
return step_path.read_text(encoding='latin-1', errors='replace')
def _bom_from_step_text(step_path: Path) -> list:
"""Parse STEP ISO 10303-21 PRODUCT entities directly."""
try:
text = _read_step_text(step_path)
except Exception as e:
logger.warning(f"Could not read STEP text: {e}")
return []
pattern = re.compile(r"#\d+\s*=\s*PRODUCT\s*\(\s*'([^']*)'", re.IGNORECASE)
seen = {}
for match in pattern.finditer(text):
name = match.group(1).strip()
if not name or name.upper() in ("", "NONE"):
continue
if name in seen:
seen[name]["quantity"] += 1
else:
seen[name] = {
"part_number": f"{len(seen)+1:03d}",
"part_name_original": name, "part_name_english": name,
"quantity": 1, "level": 0, "parent": "",
"bbox_x_mm": None, "bbox_y_mm": None, "bbox_z_mm": None,
"notes": "parsed from STEP text",
}
rows = list(seen.values())
if rows:
logger.info(f"STEP text parser found {len(rows)} unique part names")
return rows
def _enrich_bboxes(model: StepModel, df: pd.DataFrame) -> pd.DataFrame:
"""Add bounding box dims per part from build123d. Best-effort."""
try:
bb = model.shape.bounding_box()
if len(df) == 1:
df.at[0, "bbox_x_mm"] = round(bb.size.X, 2)
df.at[0, "bbox_y_mm"] = round(bb.size.Y, 2)
df.at[0, "bbox_z_mm"] = round(bb.size.Z, 2)
else:
children = getattr(model.shape, "children", []) or []
for i, child in enumerate(children):
if i >= len(df):
break
try:
cb = child.bounding_box()
df.at[i, "bbox_x_mm"] = round(cb.size.X, 2)
df.at[i, "bbox_y_mm"] = round(cb.size.Y, 2)
df.at[i, "bbox_z_mm"] = round(cb.size.Z, 2)
except Exception:
pass
except Exception as e:
logger.debug(f"bbox enrichment skipped: {e}")
return df
def save_bom_csv(df: pd.DataFrame, step_path: Path) -> Path:
"""Write BOM DataFrame to CSV (legacy fallback)."""
out_path = step_path.parent / f"{step_path.stem}_bom.csv"
df.to_csv(out_path, index=False)
logger.info(f"BOM CSV → {out_path.name}")
return out_path
def save_bom_xlsx(df: pd.DataFrame, step_path: Path) -> Path:
"""Write BOM DataFrame to an MPM-branded Excel workbook.
Column changes vs internal schema (BOM_COLUMNS):
part_name_english → Part Description (column 2)
part_name_original → Supplier Part Name (last column)
Falls back to CSV if openpyxl is unavailable.
"""
out_path = step_path.parent / f"{step_path.stem}_bom.xlsx"
try:
from openpyxl import Workbook
from openpyxl.styles import Alignment, Border, Font, PatternFill, Side
from openpyxl.utils import get_column_letter
except ImportError:
logger.warning("openpyxl not installed — falling back to CSV")
return save_bom_csv(df, step_path)
# Build display DataFrame
disp = df.rename(columns=_XLSX_RENAME).copy()
for col in _XLSX_ORDER:
if col not in disp.columns:
disp[col] = None
disp = disp[_XLSX_ORDER]
wb = Workbook()
ws = wb.active
ws.title = "Bill of Materials"
gold_border = Border(bottom=Side(style="medium", color=_MPM_MIDDLE_GOLD))
hdr_fill = PatternFill("solid", fgColor=_MPM_DARK_SHADE)
hdr_font = Font(name="Montserrat", bold=True, color=_MPM_LIGHT_SHADE, size=10)
hdr_align = Alignment(horizontal="center", vertical="center", wrap_text=True)
# Header row
for c, col in enumerate(_XLSX_ORDER, 1):
cell = ws.cell(row=1, column=c, value=_XLSX_HEADERS.get(col, col))
cell.font = hdr_font
cell.fill = hdr_fill
cell.alignment = hdr_align
cell.border = gold_border
ws.column_dimensions[get_column_letter(c)].width = _XLSX_WIDTHS.get(col, 15)
ws.row_dimensions[1].height = 28
# Data rows
body_font = Font(name="Open Sans", size=10, color=_MPM_DARK_SHADE)
body_align = Alignment(horizontal="left", vertical="center")
for r, (_, row) in enumerate(disp.iterrows(), 2):
fill = PatternFill("solid", fgColor=_MPM_WARM_OFF_WHITE if r % 2 == 0 else "FFFFFF")
for c, col in enumerate(_XLSX_ORDER, 1):
cell = ws.cell(row=r, column=c, value=_safe(row[col]))
cell.font = body_font
cell.fill = fill
cell.alignment = body_align
ws.freeze_panes = "A2"
wb.save(str(out_path))
logger.info(f"BOM XLSX → {out_path.name}")
return out_path
File diff suppressed because it is too large Load Diff
+187
View File
@@ -0,0 +1,187 @@
"""
loader.py — STEP file loading with build123d primary and FreeCAD fallback.
Returns a StepModel dataclass used by all other modules.
FreeCAD fallback invokes the signed app bundle Python to avoid
Gatekeeper issues on macOS 15 Sequoia.
"""
import json
import logging
import subprocess
import tempfile
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Optional
logger = logging.getLogger("step_processor.loader")
FREECAD_PYTHON = "/Applications/FreeCAD.app/Contents/Resources/bin/python"
FREECAD_LIB = "/Applications/FreeCAD.app/Contents/Resources/lib"
FREECAD_CMD = "/Applications/FreeCAD.app/Contents/Resources/bin/freecadcmd"
@dataclass
class StepModel:
"""Unified model object returned by load_step(). Used by all modules."""
shape: Any
backend: str # "build123d" | "freecad"
path: Path
parts: list = field(default_factory=list)
face_count: int = 0
metadata: dict = field(default_factory=dict)
def load_step(filepath) -> Optional["StepModel"]:
"""Load a STEP file. Tries build123d first; falls back to FreeCAD."""
step_path = Path(filepath).expanduser().resolve()
if not step_path.exists():
logger.error(f"File not found: {step_path}")
return None
try:
return _load_via_build123d(step_path)
except ImportError:
logger.warning("build123d not available — falling back to FreeCAD")
return _load_via_freecad(step_path)
except Exception as e:
logger.warning(f"build123d failed ({type(e).__name__}: {e}) — falling back to FreeCAD")
return _load_via_freecad(step_path)
def _load_via_build123d(step_path: Path) -> "StepModel":
"""Load using build123d. Raises on failure."""
from build123d import import_step
logger.info(f"[build123d] Loading: {step_path.name}")
shape = import_step(str(step_path))
face_count = 0
try:
face_count = sum(1 for _ in shape.faces())
except Exception:
pass
parts = _extract_parts_build123d(shape)
logger.info(f"[build123d] Loaded: {step_path.name} | {face_count} faces | {len(parts)} parts")
return StepModel(shape=shape, backend="build123d", path=step_path,
parts=parts, face_count=face_count)
def _fix_gbk_mojibake(s: str) -> str:
"""
Recover Chinese text stored as mojibake in STEP part labels.
STEP files from Chinese CAD tools (SolidWorks CN, etc.) embed raw GBK bytes
in PRODUCT name strings. OpenCASCADE reads STEP strings as latin-1, which
re-interprets those GBK bytes as latin-1 code points — classic mojibake.
Fix: re-encode the string to latin-1 (restoring the original GBK byte
sequence) then decode as GBK to get correct Unicode Chinese characters.
If the string is pure ASCII, or the round-trip fails (already valid Unicode
or a non-GBK extended char), returns the original string unchanged.
"""
if not s or all(ord(c) < 128 for c in s):
return s # pure ASCII: nothing to fix
try:
return s.encode('latin-1').decode('gbk')
except (UnicodeDecodeError, UnicodeEncodeError):
return s # not GBK mojibake — leave original
def _extract_parts_build123d(shape) -> list:
"""Walk build123d compound tree and extract named parts."""
parts = []
def _walk(compound, level=0, parent_name=""):
children = []
try:
children = compound.children if hasattr(compound, "children") else []
except Exception:
pass
if children:
for child in children:
raw = (getattr(child, "label", "") or
getattr(child, "name", "") or f"Part_{level}")
name = _fix_gbk_mojibake(raw)
parts.append({"name": name, "level": level, "parent": parent_name})
_walk(child, level + 1, name)
else:
raw = (getattr(compound, "label", "") or
getattr(compound, "name", "") or "")
if raw:
name = _fix_gbk_mojibake(raw)
parts.append({"name": name, "level": level, "parent": parent_name})
_walk(shape)
return parts
def _load_via_freecad(step_path: Path) -> Optional["StepModel"]:
"""Load using FreeCAD app bundle Python via subprocess."""
if not Path(FREECAD_PYTHON).exists():
logger.error(f"FreeCAD Python not found at {FREECAD_PYTHON}. Install FreeCAD.app.")
return None
logger.info(f"[FreeCAD] Loading: {step_path.name}")
script = f"""
import sys, json
sys.path.insert(0, {repr(FREECAD_LIB)})
import FreeCAD, Part
try:
shape = Part.read({repr(str(step_path))})
bb = shape.BoundBox
sub = shape.SubShapes if hasattr(shape, 'SubShapes') else []
parts = [{{"name": f"Part_{{i}}", "level": 1, "parent": "root"}}
for i in range(len(sub))]
print(json.dumps({{"ok": True, "face_count": len(shape.Faces),
"parts": parts,
"bbox": {{"XMin": bb.XMin, "XMax": bb.XMax,
"YMin": bb.YMin, "YMax": bb.YMax,
"ZMin": bb.ZMin, "ZMax": bb.ZMax}}}}))
except Exception as e:
print(json.dumps({{"ok": False, "error": str(e)}}))
"""
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
f.write(script)
script_path = f.name
try:
proc = subprocess.run([FREECAD_PYTHON, script_path],
capture_output=True, text=True, timeout=120)
json_line = next((l.strip() for l in proc.stdout.splitlines()
if l.strip().startswith("{")), None)
if not json_line:
logger.error(f"[FreeCAD] No JSON output. stderr: {proc.stderr[:300]}")
return None
data = json.loads(json_line)
if not data.get("ok"):
logger.error(f"[FreeCAD] Load failed: {data.get('error')}")
return None
proxy = _FreeCADShapeProxy(data["bbox"], data["face_count"])
logger.info(f"[FreeCAD] Loaded: {step_path.name} | {data['face_count']} faces")
return StepModel(shape=proxy, backend="freecad", path=step_path,
parts=data.get("parts", []),
face_count=data["face_count"],
metadata={"bbox": data["bbox"]})
except subprocess.TimeoutExpired:
logger.error("[FreeCAD] Load timed out after 120s")
return None
except Exception as e:
logger.error(f"[FreeCAD] Unexpected error: {e}")
return None
finally:
Path(script_path).unlink(missing_ok=True)
class _FreeCADShapeProxy:
"""Proxy carrying FreeCAD geometry data extracted via subprocess."""
def __init__(self, bbox_dict: dict, face_count: int):
self.BoundBox = _BoundBox(bbox_dict)
self.face_count = face_count
self.Faces = [None] * face_count
def faces(self):
for _ in range(self.face_count):
yield object()
class _BoundBox:
def __init__(self, d: dict):
self.XMin = d.get("XMin", 0); self.XMax = d.get("XMax", 0)
self.YMin = d.get("YMin", 0); self.YMax = d.get("YMax", 0)
self.ZMin = d.get("ZMin", 0); self.ZMax = d.get("ZMax", 0)
+335
View File
@@ -0,0 +1,335 @@
"""
query_engine.py — Natural language geometric query handler.
Supports both single-query (--query "...") and interactive REPL (--repl).
REPL keeps the model in memory between queries for speed.
All output is formatted ASCII tables.
Supported query types (see SKILL.md for full reference):
bounding box overall model extents
face count total faces by type
all parts full assembly listing
list all holes all cylindrical through-features
list all mounting holes cylinders dia < 15mm, axis ⊥ primary face
holes diameter N filter by diameter
wall thickness min distance between opposing parallel faces
largest face largest planar face area
help list supported queries
exit / quit exit REPL
"""
import logging
import re
import textwrap
from typing import Optional
from .loader import StepModel
logger = logging.getLogger("step_processor.query")
# Regex patterns for query routing
_BBOX_RE = re.compile(r"\b(bounding.?box|extents|dimensions|overall.?size)\b", re.I)
_FACECOUNT_RE = re.compile(r"\b(face.?count|how many faces|number of faces)\b", re.I)
_PARTS_RE = re.compile(r"\ball.?parts|part.?list|assembly|components\b", re.I)
_HOLES_RE = re.compile(r"\bholes?\b", re.I)
_MOUNTING_RE = re.compile(r"\bmounting\b", re.I)
_DIA_RE = re.compile(r"diameter\s+([\d.]+)\s*mm?", re.I)
_WALL_RE = re.compile(r"\bwall.?thickness\b", re.I)
_LARGEST_RE = re.compile(r"\blargest.?face\b", re.I)
_HELP_RE = re.compile(r"\bhelp\b", re.I)
_EXIT_RE = re.compile(r"\b(exit|quit|q)\b", re.I)
def run_query(model: StepModel, query: str) -> str:
"""Dispatch a query string and return formatted output."""
q = query.strip()
if _EXIT_RE.search(q):
return "EXIT"
if _HELP_RE.search(q):
return _help_text()
if _BBOX_RE.search(q):
return _query_bounding_box(model)
if _FACECOUNT_RE.search(q):
return _query_face_count(model)
if _PARTS_RE.search(q):
return _query_all_parts(model)
if _HOLES_RE.search(q):
dia_match = _DIA_RE.search(q)
dia_filter = float(dia_match.group(1)) if dia_match else None
mounting_only = bool(_MOUNTING_RE.search(q))
return _query_holes(model, mounting_only=mounting_only, dia_filter=dia_filter)
if _WALL_RE.search(q):
return _query_wall_thickness(model)
if _LARGEST_RE.search(q):
return _query_largest_face(model)
return (f"Query not recognized: '{q}'\n"
f"Type 'help' to see supported queries.")
def repl(model: StepModel, step_path):
"""Launch interactive REPL. Returns when user types exit/quit."""
print(f"\nSTEP Query REPL — {step_path.name}")
print("Type 'help' for supported queries, 'exit' to quit.\n")
while True:
try:
q = input("> ").strip()
except (EOFError, KeyboardInterrupt):
print()
break
if not q:
continue
result = run_query(model, q)
if result == "EXIT":
break
print(result)
print()
# ── Query implementations ─────────────────────────────────────────────────────
def _query_bounding_box(model: StepModel) -> str:
try:
if model.backend == "build123d":
bb = model.shape.bounding_box()
x = round(bb.size.X, 2)
y = round(bb.size.Y, 2)
z = round(bb.size.Z, 2)
else:
bb = model.shape.bounding_box
x = round(bb.XMax - bb.XMin, 2)
y = round(bb.YMax - bb.YMin, 2)
z = round(bb.ZMax - bb.ZMin, 2)
return _table(
f"BOUNDING BOX — {model.path.name}",
["Axis", "Dimension"],
[["Width (X)", f"{x} mm ({x/25.4:.3f} in)"],
["Depth (Y)", f"{y} mm ({y/25.4:.3f} in)"],
["Height (Z)", f"{z} mm ({z/25.4:.3f} in)"]]
)
except Exception as e:
return f"Bounding box query failed: {e}"
def _query_face_count(model: StepModel) -> str:
if model.backend != "build123d":
return f"Face count query requires build123d (loaded via {model.backend})"
try:
from build123d import Compound
all_faces = model.shape.faces()
planar = sum(1 for f in all_faces if f.geom_type() == "PLANE")
cylindrical = sum(1 for f in all_faces if f.geom_type() == "CYLINDER")
other = len(all_faces) - planar - cylindrical
return _table(
f"FACE COUNT — {model.path.name}",
["Type", "Count"],
[["Planar", str(planar)],
["Cylindrical", str(cylindrical)],
["Other", str(other)],
["Total", str(len(all_faces))]]
)
except Exception as e:
return f"Face count failed: {e}"
def _query_all_parts(model: StepModel) -> str:
if not model.parts:
return (f"No assembly structure found in {model.path.name}.\n"
f"File appears to be a single solid body.")
rows = []
for p in model.parts:
rows.append([
p.get("part_number", ""),
p.get("name", ""),
str(p.get("quantity", 1)),
str(p.get("level", 0)),
p.get("parent", ""),
])
return _table(
f"ALL PARTS — {model.path.name}",
["#", "Name", "Qty", "Level", "Parent"],
rows
) + f"\nTotal: {len(model.parts)} parts"
def _query_holes(model: StepModel, mounting_only: bool = False,
dia_filter: Optional[float] = None) -> str:
if model.backend != "build123d":
return f"Hole query requires build123d (loaded via {model.backend})"
try:
holes = _find_holes(model, mounting_only=mounting_only, dia_filter=dia_filter)
if not holes:
label = "mounting holes" if mounting_only else "holes"
qualifier = f"{dia_filter}mm" if dia_filter else ""
return f"No {label}{qualifier} found in {model.path.name}."
header = "MOUNTING HOLES" if mounting_only else "ALL HOLES"
# Group by diameter bucket for summary view
from collections import Counter
dia_counts = Counter(round(h["dia"], 1) for h in holes)
MAX_ROWS = 50
display_holes = holes[:MAX_ROWS]
rows = []
for i, h in enumerate(display_holes, 1):
rows.append([
str(i),
f"{h['dia']:.2f} mm",
f"{h['depth']:.2f} mm" if h["depth"] else "",
f"({h['x']:.1f}, {h['y']:.1f}, {h['z']:.1f})",
])
result = _table(
f"{header}{model.path.name}",
["#", "Diameter", "Depth", "Position (x,y,z)"],
rows
)
result += f"\nShowing {len(display_holes)} of {len(holes)} unique hole locations"
result += "\n\nDIAMETER SUMMARY"
result += "\n" + "" * 30
for dia, count in sorted(dia_counts.items()):
result += f"\n {dia:.1f} mm ×{count}"
result += "\n" + "" * 30
return result
except Exception as e:
return f"Hole query failed: {e}"
def _find_holes(model: StepModel, mounting_only: bool, dia_filter):
"""Extract and deduplicate cylindrical faces from build123d model.
Deduplication: round axis position to 1mm grid, group by (dia_bucket, x, y, z).
This collapses multiple cylindrical faces from the same physical hole
(e.g. inner + outer surface of same cylinder) into one entry.
"""
from OCP.BRepAdaptor import BRepAdaptor_Surface
from OCP.GeomAbs import GeomAbs_Cylinder
seen = {} # key → best entry
try:
faces = model.shape.faces()
except Exception:
return []
for face in faces:
try:
adaptor = BRepAdaptor_Surface(face.wrapped)
if adaptor.GetType() != GeomAbs_Cylinder:
continue
cyl = adaptor.Cylinder()
r = cyl.Radius()
dia = round(r * 2, 2)
# Diameter filters
if mounting_only and dia > 15.0:
continue
if dia_filter and abs(dia - dia_filter) > 0.5:
continue
axis_pt = cyl.Location()
# Round to 1mm grid for deduplication
gx = round(axis_pt.X())
gy = round(axis_pt.Y())
gz = round(axis_pt.Z())
dia_bucket = round(dia, 1)
key = (dia_bucket, gx, gy, gz)
if key not in seen:
bb = face.bounding_box()
depth = round(max(bb.size.X, bb.size.Y, bb.size.Z), 2)
seen[key] = {
"dia": dia,
"depth": depth,
"x": round(axis_pt.X(), 1),
"y": round(axis_pt.Y(), 1),
"z": round(axis_pt.Z(), 1),
}
except Exception:
continue
return list(seen.values())
def _query_wall_thickness(model: StepModel) -> str:
if model.backend != "build123d":
return f"Wall thickness query requires build123d (loaded via {model.backend})"
try:
faces = model.shape.faces()
planar = [f for f in faces if f.geom_type() == "PLANE"]
if len(planar) < 2:
return "Insufficient planar faces to determine wall thickness."
# Heuristic: find minimum non-zero distance between parallel opposing faces
min_t = None
for i, f1 in enumerate(planar):
n1 = f1.normal_at()
for f2 in planar[i+1:]:
n2 = f2.normal_at()
# Parallel if normals are anti-parallel
dot = abs(n1.dot(n2))
if dot > 0.99:
c1 = f1.center()
c2 = f2.center()
dist = round(abs((c1 - c2).dot(n1)), 3)
if dist > 0.01:
if min_t is None or dist < min_t:
min_t = dist
if min_t is None:
return "Could not determine wall thickness from available faces."
return _table(
f"WALL THICKNESS — {model.path.name}",
["Measurement", "Value"],
[["Minimum wall thickness",
f"{min_t} mm ({min_t/25.4:.3f} in)"]]
)
except Exception as e:
return f"Wall thickness query failed: {e}"
def _query_largest_face(model: StepModel) -> str:
if model.backend != "build123d":
return f"Largest face query requires build123d (loaded via {model.backend})"
try:
faces = model.shape.faces()
planar = [(f, f.area()) for f in faces if f.geom_type() == "PLANE"]
if not planar:
return "No planar faces found."
largest, area = max(planar, key=lambda x: x[1])
bb = largest.bounding_box()
return _table(
f"LARGEST PLANAR FACE — {model.path.name}",
["Property", "Value"],
[["Area", f"{round(area, 2)} mm²"],
["Width", f"{round(bb.size.X, 2)} mm"],
["Height", f"{round(bb.size.Z, 2)} mm"]]
)
except Exception as e:
return f"Largest face query failed: {e}"
# ── Formatting helpers ─────────────────────────────────────────────────────────
def _table(title: str, headers: list, rows: list) -> str:
col_widths = [len(h) for h in headers]
for row in rows:
for i, cell in enumerate(row):
col_widths[i] = max(col_widths[i], len(str(cell)))
sep = "" * (sum(col_widths) + 3 * len(headers) - 1)
lines = [title, sep]
header_line = " ".join(h.ljust(col_widths[i]) for i, h in enumerate(headers))
lines.append(header_line)
lines.append(sep)
for row in rows:
lines.append(" ".join(str(cell).ljust(col_widths[i]) for i, cell in enumerate(row)))
lines.append(sep)
return "\n".join(lines)
def _help_text() -> str:
return textwrap.dedent("""\
SUPPORTED QUERIES
─────────────────────────────────────────────────────────────
bounding box Overall model extents (W×D×H in mm)
face count Faces by type (planar, cylindrical, other)
all parts Full assembly listing with quantities
list all holes All cylindrical through-features
list all mounting holes Holes smaller than 15mm diameter
holes diameter 4.2mm Filter holes by specific diameter
wall thickness Minimum wall thickness estimate
largest face Largest planar face area
help Show this message
exit Exit the REPL
─────────────────────────────────────────────────────────────
Tip: geometry queries require build123d backend.
If the file loaded via FreeCAD fallback, only bounding box
and parts list are available.""")
+321
View File
@@ -0,0 +1,321 @@
"""
renderer.py — Offscreen PNG thumbnail generation.
Pipeline (with color): build123d → GLTF export → trimesh scene → pyrender → PNG
Pipeline (fallback): build123d → per-solid STL → colored trimesh scene → pyrender → PNG
Coordinate convention AFTER STEP→GLTF export→trimesh load:
trimesh applies a Z-up→Y-up GLTF convention that swaps STEP's Y and Z axes:
X = width (~248mm for MR16) — left/right
Y = depth (~41mm for MR16) — front/back; Y_min=screen face, Y_max=back panel
Z = height (~459mm for MR16) — tall axis; Z_min=top end, Z_max=bottom end
"front" camera sits at -Y (screen side) looking toward +Y to see the LCD face.
→ World "up" vector is (0,0,-1) — negative Z = top of display in image.
6 standard views: front, rear, left, right, iso_left, iso_right
"""
import logging
import tempfile
from pathlib import Path
import numpy as np
from .loader import StepModel
logger = logging.getLogger("step_processor.renderer")
# Camera direction vectors: where the camera is PLACED relative to model center.
# Camera always looks toward center from direction * distance.
#
# Trimesh world axes after GLTF load (STEP Y and Z are swapped by GLTF Y-up conv):
# X = width — left/right
# Y = depth — Y_min = screen face (LCD), Y_max = back panel (ports)
# Z = height — Z_min = TOP end of display, Z_max = BOTTOM end of display
#
# "front": camera at -Y (screen side) looks toward +Y → sees LCD face.
# "rear": camera at +Y (back side) looks toward -Y → sees port panel.
# "left": camera at -X looks toward +X → sees left edge.
# "right": camera at +X looks toward -X → sees right edge.
# iso views: -Y component keeps camera on screen side; -Z = toward top end.
VIEW_CAMERAS = {
"front": ( 0, -1, 0), # LCD screen face (Y_min side)
"rear": ( 0, 1, 0), # back panel/ports (Y_max side)
"left": (-1, 0, 0), # left edge (X_min side)
"right": ( 1, 0, 0), # right edge (X_max side)
"top": ( 0, 0, -1), # top end (Z_min side)
"bottom": ( 0, 0, 1), # bottom end (Z_max side)
"iso_left": (-1, -1, -0.5), # front-left-above: screen + left edge + top
"iso_right": ( 1, -1, -0.5), # front-right-above: screen + right edge + top
}
DEFAULT_VIEWS = ["front", "rear", "left", "right", "iso_left", "iso_right"]
# Color palette for per-part coloring when GLTF has no embedded colors
# 20 distinct RGBA colors (alpha=200 for slight transparency on overlaps)
PART_COLORS = [
[180, 180, 185, 255], # light steel
[ 70, 130, 180, 255], # steel blue
[205, 133, 63, 255], # peru / bronze
[ 60, 179, 113, 255], # medium sea green
[188, 143, 143, 255], # rosy brown
[100, 149, 237, 255], # cornflower blue
[255, 160, 50, 255], # dark orange
[147, 112, 219, 255], # medium purple
[ 46, 139, 87, 255], # sea green
[205, 92, 92, 255], # indian red
[135, 206, 235, 255], # sky blue
[244, 164, 96, 255], # sandy brown
[106, 90, 205, 255], # slate blue
[ 32, 178, 170, 255], # light sea green
[220, 20, 60, 255], # crimson
[218, 165, 32, 255], # goldenrod
[ 72, 61, 139, 255], # dark slate blue
[143, 188, 143, 255], # dark sea green
[255, 99, 71, 255], # tomato
[176, 196, 222, 255], # light steel blue
]
DEFAULT_WIDTH = 1024
DEFAULT_HEIGHT = 768
def render_views(model: StepModel, step_path: Path,
views=None, width=DEFAULT_WIDTH, height=DEFAULT_HEIGHT) -> dict:
"""Render PNG views. Returns dict of view_name → output Path."""
views = views or DEFAULT_VIEWS
stem = step_path.stem
out_dir = step_path.parent
results = {}
mesh = _get_mesh(model, step_path)
if mesh is None:
logger.warning("Could not obtain mesh — thumbnails skipped")
return results
for view_name in views:
if view_name not in VIEW_CAMERAS:
logger.warning(f"Unknown view '{view_name}' — skipping")
continue
out_path = out_dir / f"{stem}_{view_name}.png"
try:
_render_single_view(mesh, VIEW_CAMERAS[view_name], out_path, width, height)
results[view_name] = out_path
logger.info(f"Rendered: {out_path.name}")
except Exception as e:
logger.warning(f"Render failed for '{view_name}': {e}")
return results
def _get_mesh(model: StepModel, step_path: Path):
"""Get a single assembled trimesh.Trimesh from the model.
Always returns a single concatenated mesh (not a Scene) so the camera
distance and bounds calculations are correct and parts don't explode.
Priority:
1. build123d → GLTF → scene.dump() → concatenate with transforms applied
(preserves per-part colors if embedded in STEP)
2. build123d → single STL of full assembly (fallback, monochrome but correct)
3. FreeCAD path → bounding box box mesh
"""
try:
import trimesh
except ImportError:
logger.warning("trimesh not installed — thumbnails unavailable. pip install trimesh")
return None
if model.backend == "build123d":
# ── GLTF path: color-aware, transforms flattened via scene.dump() ─────
with tempfile.NamedTemporaryFile(suffix=".gltf", delete=False) as tmp:
gltf_path = Path(tmp.name)
try:
from build123d import export_gltf
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore")
export_gltf(model.shape, str(gltf_path))
scene = trimesh.load(str(gltf_path))
if isinstance(scene, trimesh.Scene) and scene.geometry:
# scene.dump() applies the full transform graph → parts at correct positions
dumped = scene.dump()
if dumped:
# Pull baseColorFactor directly from PBR material per mesh.
# .to_color() loses this in some trimesh versions.
materialized = []
for m in dumped:
try:
bc = m.visual.material.baseColorFactor
if bc is not None:
m.visual = trimesh.visual.ColorVisuals(
mesh=m, face_colors=np.array(bc, dtype=np.uint8))
else:
m.visual = trimesh.visual.ColorVisuals(
mesh=m, face_colors=[185, 190, 195, 255])
except Exception:
try:
m.visual = m.visual.to_color()
except Exception:
pass
materialized.append(m)
mesh = trimesh.util.concatenate(materialized)
if mesh is not None and len(mesh.faces) > 0:
n_colors = len(set(
tuple(c) for c in mesh.visual.face_colors[:, :3][::1000]
)) if hasattr(mesh.visual, 'face_colors') else 0
logger.info(f"GLTF: assembled {len(mesh.faces)} faces, "
f"~{n_colors} distinct colors sampled")
return mesh
except Exception as e:
logger.warning(f"GLTF path failed ({e}) — falling back to STL")
finally:
try:
gltf_path.unlink()
except Exception:
pass
# ── STL fallback: single merged mesh, correct positions, monochrome ───
with tempfile.NamedTemporaryFile(suffix=".stl", delete=False) as tmp:
stl_path = Path(tmp.name)
try:
from build123d import export_stl
export_stl(model.shape, str(stl_path))
mesh = trimesh.load(str(stl_path), force="mesh")
if mesh is not None and len(mesh.faces) > 0:
mesh.visual.face_colors = [185, 190, 195, 255]
logger.info(f"STL fallback: {len(mesh.faces)} faces (uniform color)")
return mesh
except Exception as e:
logger.warning(f"STL fallback failed ({e})")
finally:
try:
stl_path.unlink()
except Exception:
pass
# FreeCAD / last resort: bounding box
return _bbox_wireframe_mesh(model)
def _mesh_has_color(mesh) -> bool:
"""Return True if the mesh has meaningful (non-gray) face colors."""
try:
fc = mesh.visual.face_colors
if fc is None or len(fc) == 0:
return False
# If all faces are within ±25 of [128,128,128] treat as uncolored
gray = [128, 128, 128]
mean_rgb = fc[:, :3].mean(axis=0)
return not all(abs(int(mean_rgb[i]) - gray[i]) < 25 for i in range(3))
except Exception:
return False
def _bbox_wireframe_mesh(model: StepModel):
"""Create a simple box mesh from the model's bounding box. Last-resort fallback."""
try:
import trimesh
bb = model.shape.bounding_box()
# _FreeCADShapeProxy stores a _BoundBox
if hasattr(bb, "XMin"):
extents = [
bb.XMax - bb.XMin,
bb.YMax - bb.YMin,
bb.ZMax - bb.ZMin,
]
center = [
(bb.XMax + bb.XMin) / 2,
(bb.YMax + bb.YMin) / 2,
(bb.ZMax + bb.ZMin) / 2,
]
else:
# build123d BoundBox
extents = [bb.size.X, bb.size.Y, bb.size.Z]
center = [bb.center.X, bb.center.Y, bb.center.Z]
mesh = trimesh.creation.box(extents=extents)
mesh.apply_translation(center)
logger.debug("Using bbox wireframe mesh for rendering")
return mesh
except Exception as e:
logger.warning(f"Bbox wireframe mesh failed: {e}")
return None
def _render_single_view(mesh, camera_direction: tuple, out_path: Path,
width: int, height: int):
"""Render one view using pyrender offscreen, save to out_path."""
try:
import pyrender
except ImportError:
raise ImportError("pyrender not installed. pip install pyrender")
# Normalize camera direction
direction = np.array(camera_direction, dtype=float)
direction = direction / np.linalg.norm(direction)
# Bounding box of the assembled mesh
bounds = mesh.bounds # shape (2,3): [[xmin,ymin,zmin],[xmax,ymax,zmax]]
center = (bounds[0] + bounds[1]) / 2.0
diag = np.linalg.norm(bounds[1] - bounds[0])
# Camera sits at 2.5× diagonal distance from center, looking at center
camera_distance = diag * 2.5
eye = center + direction * camera_distance
# World up: (0,0,-1) = negative Z = top of display in trimesh GLTF space.
# Fallback to (0,-1,0) for top/bottom end views where direction ≈ ±Z.
world_up = np.array([0, 0, -1], dtype=float)
if abs(np.dot(direction, world_up)) > 0.9:
world_up = np.array([0, -1, 0], dtype=float)
# Proven camera frame formula (right-handed, same structure as original code):
# right = cross(world_up, direction) [world_up × backward]
# cam_up = cross(direction, right) [backward × right]
# col2 = direction [camera +Z = backward; looks down -Z]
right = np.cross(world_up, direction)
if np.linalg.norm(right) < 1e-8:
right = np.cross(np.array([1, 0, 0], dtype=float), direction)
right = right / np.linalg.norm(right)
cam_up = np.cross(direction, right)
cam_up = cam_up / np.linalg.norm(cam_up)
# 4×4 camera pose: columns = [right, cam_up, backward, eye]
camera_pose = np.eye(4)
camera_pose[:3, 0] = right
camera_pose[:3, 1] = cam_up
camera_pose[:3, 2] = direction # camera +Z = backward; pyrender looks down -Z
camera_pose[:3, 3] = eye
# Build pyrender scene — white background
pr_scene = pyrender.Scene(ambient_light=[0.35, 0.35, 0.35],
bg_color=[255, 255, 255, 255])
pr_scene.add(pyrender.Mesh.from_trimesh(mesh, smooth=False))
# FOV sized so the model fills ~80% of the frame
yfov = 2.0 * np.arctan((diag * 0.5) / camera_distance) * 1.25
camera = pyrender.PerspectiveCamera(yfov=yfov, aspectRatio=width / height)
pr_scene.add(camera, pose=camera_pose)
# Key light from camera position + fill from above-opposite
pr_scene.add(pyrender.DirectionalLight(color=np.ones(3), intensity=4.5),
pose=camera_pose)
fill_pose = np.eye(4)
# Fill light: offset from top of display (-Z) to avoid zero-vector when
# direction is parallel to (0,1,0) (e.g. rear view).
fill_dir = -direction + np.array([0, 0, -1], dtype=float)
fill_norm = np.linalg.norm(fill_dir)
if fill_norm < 1e-8:
fill_dir = np.array([0, 0, -1], dtype=float)
else:
fill_dir = fill_dir / fill_norm
fill_eye = center - fill_dir * camera_distance
fill_pose[:3, 3] = fill_eye
pr_scene.add(pyrender.DirectionalLight(color=np.ones(3), intensity=1.8),
pose=fill_pose)
# Offscreen render
r = pyrender.OffscreenRenderer(viewport_width=width, viewport_height=height)
try:
color, _ = r.render(pr_scene)
finally:
r.delete()
from PIL import Image
Image.fromarray(color).save(str(out_path))
+103
View File
@@ -0,0 +1,103 @@
"""
rewriter.py — STEP label rewriter for Chinese→English translation.
Produces {stem}_EN.step — NEVER modifies source file.
Targets only PRODUCT entity name strings.
Validates entity count before/after to ensure file integrity.
"""
import logging
import re
from pathlib import Path
logger = logging.getLogger("step_processor.rewriter")
# Targets both quoted strings in: #N = PRODUCT('id', 'name', 'description', ...)
# ISO 10303-21 PRODUCT has two name fields; CAD viewers typically display the second.
# Chinese CAD exports set both to the same Chinese string, so both must be replaced.
# Groups: (prefix) (id) (sep) (name) (suffix-quote)
PRODUCT_PATTERN = re.compile(
r"(#\d+\s*=\s*PRODUCT\s*\(\s*')([^']*)(',\s*')([^']*)(')",
re.IGNORECASE
)
ENTITY_PATTERN = re.compile(r"^#\d+\s*=\s*\S+\s*\(", re.MULTILINE)
def _read_step_for_rewrite(source_path: Path) -> str:
"""Read STEP file with GBK-aware encoding detection.
STEP files from Chinese CAD tools embed raw GBK bytes in PRODUCT name
strings. Reading as UTF-8 turns those bytes into replacement characters
(U+FFFD), which makes the Chinese→English lookup fail. We try GBK when
UTF-8 produces replacement chars so the regex substitution can actually
find and replace the Chinese strings.
"""
for enc in ('utf-8', 'gbk'):
try:
text = source_path.read_text(encoding=enc)
if enc == 'utf-8' and '' in text:
continue # has replacement chars — retry as GBK
return text
except (UnicodeDecodeError, LookupError):
continue
return source_path.read_text(encoding='latin-1', errors='replace')
def rewrite_step(source_path: Path, translation_map: dict):
"""
Produce English-labeled copy of the STEP file.
Returns output Path or None if no rewrite needed or failed.
"""
if not translation_map:
logger.info("No translations to apply — _EN.step skipped")
return None
try:
source_text = _read_step_for_rewrite(source_path)
except Exception as e:
logger.error(f"Could not read source STEP: {e}")
return None
original_count = len(ENTITY_PATTERN.findall(source_text))
if not any(orig in source_text for orig in translation_map):
logger.info("No Chinese labels in STEP text — _EN.step skipped")
return None
lines = source_text.splitlines(keepends=True)
replaced_count = 0
output_lines = []
for line in lines:
new_line, count = _replace_product_names(line, translation_map)
replaced_count += count
output_lines.append(new_line)
output_text = "".join(output_lines)
new_count = len(ENTITY_PATTERN.findall(output_text))
if new_count != original_count:
logger.error(
f"Entity count mismatch: {original_count}{new_count}. "
"Aborting — source file untouched.")
return None
if replaced_count == 0:
logger.info("No PRODUCT entities matched — _EN.step skipped")
return None
out_path = source_path.parent / f"{source_path.stem}_EN.step"
try:
out_path.write_text(output_text, encoding="utf-8")
logger.info(f"_EN.step written: {out_path.name} ({replaced_count} labels replaced)")
return out_path
except Exception as e:
logger.error(f"Failed to write _EN.step: {e}")
out_path.unlink(missing_ok=True)
return None
def _replace_product_names(line: str, translation_map: dict):
count = 0
def replacer(m):
nonlocal count
# Try id field first (group 2), fall back to name field (group 4)
# Both are Chinese in Chinese CAD exports; replace both with English.
translated = translation_map.get(m.group(2)) or translation_map.get(m.group(4))
if translated:
count += 1
# Replace both the id field and the name field
return m.group(1) + translated + m.group(3) + translated + m.group(5)
return m.group(0)
new_line = PRODUCT_PATTERN.sub(replacer, line)
return new_line, count
+107
View File
@@ -0,0 +1,107 @@
"""
translator.py — Chinese to English part name translation via Claude API.
Detects CJK unicode range. Batches all names in a single API call per file.
Flags uncertain translations in the notes column.
"""
import json
import logging
import os
import re
import pandas as pd
logger = logging.getLogger("step_processor.translator")
CJK_PATTERN = re.compile(r'[一-鿿㐀-䶿]')
SYSTEM_PROMPT = (
"You are a mechanical engineering translator specializing in Chinese "
"manufacturing CAD files for display and enclosure products. "
"Translate the following part names from Chinese to English. "
"Preserve technical precision. Use standard hardware/manufacturing terminology. "
"Output ONLY a JSON object mapping original Chinese to translated English, nothing else.\n"
'Example: {"安装支架": "Mounting Bracket", "螺钉M4": "M4 Screw", "前面板": "Front Panel"}'
)
def has_chinese(text: str) -> bool:
"""Return True if text contains CJK characters."""
return bool(CJK_PATTERN.search(str(text)))
def translate_bom(df: pd.DataFrame, model_name: str = "") -> pd.DataFrame:
"""Detect Chinese part names and translate via Claude API."""
needs_translation = df["part_name_original"].apply(has_chinese)
chinese_names = df.loc[needs_translation, "part_name_original"].unique().tolist()
if not chinese_names:
logger.info("No Chinese part names detected — translation skipped")
return df
logger.info(f"Translating {len(chinese_names)} Chinese part names...")
translation_map = _call_claude_api(chinese_names, model_name)
if not translation_map:
logger.warning("Translation API returned no results — retaining original names")
df.loc[needs_translation, "notes"] = (
df.loc[needs_translation, "notes"].apply(
lambda n: (n + "; " if n else "") + "translation-failed"))
return df
for idx, row in df.iterrows():
original = row["part_name_original"]
if has_chinese(original):
translated = translation_map.get(original)
if translated:
df.at[idx, "part_name_english"] = translated
note_tag = "ambiguous-translation" if "[?]" in translated else "machine-translated"
else:
df.at[idx, "part_name_english"] = original
note_tag = "translation-missing"
existing = row["notes"]
df.at[idx, "notes"] = (existing + "; " if existing else "") + note_tag
logger.info(f"Translated {needs_translation.sum()} parts")
return df
def get_translation_map(df: pd.DataFrame) -> dict:
"""Return dict of original → english for all translated rows."""
mask = df["part_name_original"] != df["part_name_english"]
return dict(zip(df.loc[mask, "part_name_original"],
df.loc[mask, "part_name_english"]))
def _call_claude_api(names: list, model_name: str = "") -> dict:
"""Single batched Claude API call. Returns original→translated dict."""
api_key = os.environ.get("ANTHROPIC_API_KEY")
if not api_key:
logger.error("ANTHROPIC_API_KEY not set — translation unavailable")
return {}
try:
import anthropic
except ImportError:
logger.error("anthropic package not installed — pip install anthropic")
return {}
names_json = json.dumps(names, ensure_ascii=False)
user_msg = f"Translate these part names from Chinese to English:\n{names_json}"
if model_name:
user_msg += f"\n\nContext: Parts from a {model_name} display enclosure assembly."
try:
client = anthropic.Anthropic(api_key=api_key)
response = client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=2048,
system=SYSTEM_PROMPT,
messages=[{"role": "user", "content": user_msg}],
)
text = response.content[0].text.strip()
json_match = re.search(r'\{.*\}', text, re.DOTALL)
if json_match:
text = json_match.group(0)
result = json.loads(text)
logger.info(f"API returned {len(result)} translations")
return result
except json.JSONDecodeError as e:
logger.error(f"Translation API JSON parse error: {e}")
return {}
except Exception as e:
logger.error(f"Translation API error: {type(e).__name__}: {e}")
return {}