Files
step-parse/skill.src/modules/bom.py
T
Jason Stedwell c1abe36822 phase 0
2026-06-17 16:03:26 -05:00

276 lines
10 KiB
Python

"""
bom.py — BOM extraction from STEP assembly tree.
Primary: build123d assembly traversal.
Fallback: STEP ISO 10303-21 text parser for PRODUCT entities.
Always produces a complete DataFrame; saved as MPM-branded Excel (.xlsx).
"""
import logging
import math
import re
from collections import Counter
from pathlib import Path
import pandas as pd
from .loader import StepModel
logger = logging.getLogger("step_processor.bom")
BOM_COLUMNS = [
"part_number", "part_name_original", "part_name_english",
"quantity", "level", "parent",
"bbox_x_mm", "bbox_y_mm", "bbox_z_mm", "notes"
]
# ── Excel output — MPM brand palette (hex, no #) ─────────────────────────────
_MPM_DARK_SHADE = "232022" # header background + body text
_MPM_LIGHT_SHADE = "F5F1EC" # header text
_MPM_WARM_OFF_WHITE = "FAF7F2" # alternating row tint
_MPM_MIDDLE_GOLD = "DCBB4F" # accent border under header row
# Column rename + reorder for stakeholder-facing Excel output.
# Internal processing always uses BOM_COLUMNS names.
_XLSX_RENAME = {
"part_name_english": "part_description",
"part_name_original": "part_name_supplier",
}
_XLSX_ORDER = [
"part_number", "part_description", "quantity", "level", "parent",
"bbox_x_mm", "bbox_y_mm", "bbox_z_mm", "notes", "part_name_supplier",
]
_XLSX_HEADERS = {
"part_number": "Part #",
"part_description": "Part Description",
"quantity": "Qty",
"level": "Level",
"parent": "Parent",
"bbox_x_mm": "X (mm)",
"bbox_y_mm": "Y (mm)",
"bbox_z_mm": "Z (mm)",
"notes": "Notes",
"part_name_supplier": "Supplier Part Name",
}
_XLSX_WIDTHS = {
"part_number": 12, "part_description": 40, "quantity": 8,
"level": 7, "parent": 22, "bbox_x_mm": 11, "bbox_y_mm": 11,
"bbox_z_mm": 11, "notes": 34, "part_name_supplier": 40,
}
def _safe(v):
"""Convert NaN/None → None so openpyxl writes blank cells."""
if v is None:
return None
try:
if isinstance(v, float) and math.isnan(v):
return None
except Exception:
pass
return v
def extract_bom(model: StepModel) -> pd.DataFrame:
"""Extract BOM from a loaded StepModel. Returns DataFrame with BOM_COLUMNS.
Name-extraction strategy
------------------------
The STEP text parser is always the primary source for part_name_original.
It reads raw bytes with GBK/UTF-8 encoding detection, correctly decoding
Chinese CAD part labels.
OCC's STEP reader (used by build123d) applies an internal codec that maps
each 2-byte GBK sequence to an incorrect Unicode codepoint — the resulting
strings cannot be recovered. We therefore never rely on child.label for
part names when the file may contain CJK characters.
OCC assembly walk (_bom_from_parts) is kept as a fallback only for files
where the text parser returns nothing (e.g., non-PRODUCT-entity STEP files).
"""
rows = []
# Primary: STEP text parser — encoding-aware, correct for ASCII and CJK files
rows = _bom_from_step_text(model.path)
if not rows and model.backend == "build123d" and model.parts:
# Fallback: OCC assembly walk (CJK names will be garbled but structure intact)
logger.debug("STEP text parser empty — falling back to OCC assembly walk")
rows = _bom_from_parts(model.parts)
if not rows:
logger.info("No assembly structure — treating as single part")
stem = model.path.stem
rows = [{"part_number": "001", "part_name_original": stem,
"part_name_english": stem, "quantity": 1, "level": 0,
"parent": "", "bbox_x_mm": None, "bbox_y_mm": None,
"bbox_z_mm": None, "notes": "single-body file"}]
df = pd.DataFrame(rows, columns=BOM_COLUMNS)
if model.backend == "build123d":
df = _enrich_bboxes(model, df)
logger.info(f"BOM extracted: {len(df)} parts")
return df
def _bom_from_parts(parts: list) -> list:
name_counts = Counter(p["name"] for p in parts)
seen = set()
rows = []
for i, p in enumerate(parts):
name = p["name"]
if name in seen:
continue
seen.add(name)
rows.append({
"part_number": f"{len(rows)+1:03d}",
"part_name_original": name,
"part_name_english": name,
"quantity": name_counts[name],
"level": p.get("level", 0),
"parent": p.get("parent", ""),
"bbox_x_mm": None, "bbox_y_mm": None, "bbox_z_mm": None,
"notes": "",
})
return rows
def _read_step_text(step_path: Path) -> str:
"""Read STEP file text with CJK-aware encoding detection.
STEP files from Chinese manufacturers embed raw GBK bytes in name strings.
Strategy: try UTF-8 first (correct for modern files); if replacement chars
appear, retry as GBK (covers Chinese CAD exports); fall back to latin-1
which never fails (may contain mojibake, but at least it's readable).
"""
for enc in ('utf-8', 'gbk'):
try:
text = step_path.read_text(encoding=enc)
if enc == 'utf-8' and '' in text:
# Replacement chars detected — GBK bytes can't be UTF-8
continue
return text
except (UnicodeDecodeError, LookupError):
continue
return step_path.read_text(encoding='latin-1', errors='replace')
def _bom_from_step_text(step_path: Path) -> list:
"""Parse STEP ISO 10303-21 PRODUCT entities directly."""
try:
text = _read_step_text(step_path)
except Exception as e:
logger.warning(f"Could not read STEP text: {e}")
return []
pattern = re.compile(r"#\d+\s*=\s*PRODUCT\s*\(\s*'([^']*)'", re.IGNORECASE)
seen = {}
for match in pattern.finditer(text):
name = match.group(1).strip()
if not name or name.upper() in ("", "NONE"):
continue
if name in seen:
seen[name]["quantity"] += 1
else:
seen[name] = {
"part_number": f"{len(seen)+1:03d}",
"part_name_original": name, "part_name_english": name,
"quantity": 1, "level": 0, "parent": "",
"bbox_x_mm": None, "bbox_y_mm": None, "bbox_z_mm": None,
"notes": "parsed from STEP text",
}
rows = list(seen.values())
if rows:
logger.info(f"STEP text parser found {len(rows)} unique part names")
return rows
def _enrich_bboxes(model: StepModel, df: pd.DataFrame) -> pd.DataFrame:
"""Add bounding box dims per part from build123d. Best-effort."""
try:
bb = model.shape.bounding_box()
if len(df) == 1:
df.at[0, "bbox_x_mm"] = round(bb.size.X, 2)
df.at[0, "bbox_y_mm"] = round(bb.size.Y, 2)
df.at[0, "bbox_z_mm"] = round(bb.size.Z, 2)
else:
children = getattr(model.shape, "children", []) or []
for i, child in enumerate(children):
if i >= len(df):
break
try:
cb = child.bounding_box()
df.at[i, "bbox_x_mm"] = round(cb.size.X, 2)
df.at[i, "bbox_y_mm"] = round(cb.size.Y, 2)
df.at[i, "bbox_z_mm"] = round(cb.size.Z, 2)
except Exception:
pass
except Exception as e:
logger.debug(f"bbox enrichment skipped: {e}")
return df
def save_bom_csv(df: pd.DataFrame, step_path: Path) -> Path:
"""Write BOM DataFrame to CSV (legacy fallback)."""
out_path = step_path.parent / f"{step_path.stem}_bom.csv"
df.to_csv(out_path, index=False)
logger.info(f"BOM CSV → {out_path.name}")
return out_path
def save_bom_xlsx(df: pd.DataFrame, step_path: Path) -> Path:
"""Write BOM DataFrame to an MPM-branded Excel workbook.
Column changes vs internal schema (BOM_COLUMNS):
part_name_english → Part Description (column 2)
part_name_original → Supplier Part Name (last column)
Falls back to CSV if openpyxl is unavailable.
"""
out_path = step_path.parent / f"{step_path.stem}_bom.xlsx"
try:
from openpyxl import Workbook
from openpyxl.styles import Alignment, Border, Font, PatternFill, Side
from openpyxl.utils import get_column_letter
except ImportError:
logger.warning("openpyxl not installed — falling back to CSV")
return save_bom_csv(df, step_path)
# Build display DataFrame
disp = df.rename(columns=_XLSX_RENAME).copy()
for col in _XLSX_ORDER:
if col not in disp.columns:
disp[col] = None
disp = disp[_XLSX_ORDER]
wb = Workbook()
ws = wb.active
ws.title = "Bill of Materials"
gold_border = Border(bottom=Side(style="medium", color=_MPM_MIDDLE_GOLD))
hdr_fill = PatternFill("solid", fgColor=_MPM_DARK_SHADE)
hdr_font = Font(name="Montserrat", bold=True, color=_MPM_LIGHT_SHADE, size=10)
hdr_align = Alignment(horizontal="center", vertical="center", wrap_text=True)
# Header row
for c, col in enumerate(_XLSX_ORDER, 1):
cell = ws.cell(row=1, column=c, value=_XLSX_HEADERS.get(col, col))
cell.font = hdr_font
cell.fill = hdr_fill
cell.alignment = hdr_align
cell.border = gold_border
ws.column_dimensions[get_column_letter(c)].width = _XLSX_WIDTHS.get(col, 15)
ws.row_dimensions[1].height = 28
# Data rows
body_font = Font(name="Open Sans", size=10, color=_MPM_DARK_SHADE)
body_align = Alignment(horizontal="left", vertical="center")
for r, (_, row) in enumerate(disp.iterrows(), 2):
fill = PatternFill("solid", fgColor=_MPM_WARM_OFF_WHITE if r % 2 == 0 else "FFFFFF")
for c, col in enumerate(_XLSX_ORDER, 1):
cell = ws.cell(row=r, column=c, value=_safe(row[col]))
cell.font = body_font
cell.fill = fill
cell.alignment = body_align
ws.freeze_panes = "A2"
wb.save(str(out_path))
logger.info(f"BOM XLSX → {out_path.name}")
return out_path