276 lines
10 KiB
Python
276 lines
10 KiB
Python
"""
|
|
bom.py — BOM extraction from STEP assembly tree.
|
|
|
|
Primary: build123d assembly traversal.
|
|
Fallback: STEP ISO 10303-21 text parser for PRODUCT entities.
|
|
Always produces a complete DataFrame; saved as MPM-branded Excel (.xlsx).
|
|
"""
|
|
import logging
|
|
import math
|
|
import re
|
|
from collections import Counter
|
|
from pathlib import Path
|
|
|
|
import pandas as pd
|
|
|
|
from .loader import StepModel
|
|
|
|
logger = logging.getLogger("step_processor.bom")
|
|
|
|
BOM_COLUMNS = [
|
|
"part_number", "part_name_original", "part_name_english",
|
|
"quantity", "level", "parent",
|
|
"bbox_x_mm", "bbox_y_mm", "bbox_z_mm", "notes"
|
|
]
|
|
|
|
# ── Excel output — MPM brand palette (hex, no #) ─────────────────────────────
|
|
_MPM_DARK_SHADE = "232022" # header background + body text
|
|
_MPM_LIGHT_SHADE = "F5F1EC" # header text
|
|
_MPM_WARM_OFF_WHITE = "FAF7F2" # alternating row tint
|
|
_MPM_MIDDLE_GOLD = "DCBB4F" # accent border under header row
|
|
|
|
# Column rename + reorder for stakeholder-facing Excel output.
|
|
# Internal processing always uses BOM_COLUMNS names.
|
|
_XLSX_RENAME = {
|
|
"part_name_english": "part_description",
|
|
"part_name_original": "part_name_supplier",
|
|
}
|
|
_XLSX_ORDER = [
|
|
"part_number", "part_description", "quantity", "level", "parent",
|
|
"bbox_x_mm", "bbox_y_mm", "bbox_z_mm", "notes", "part_name_supplier",
|
|
]
|
|
_XLSX_HEADERS = {
|
|
"part_number": "Part #",
|
|
"part_description": "Part Description",
|
|
"quantity": "Qty",
|
|
"level": "Level",
|
|
"parent": "Parent",
|
|
"bbox_x_mm": "X (mm)",
|
|
"bbox_y_mm": "Y (mm)",
|
|
"bbox_z_mm": "Z (mm)",
|
|
"notes": "Notes",
|
|
"part_name_supplier": "Supplier Part Name",
|
|
}
|
|
_XLSX_WIDTHS = {
|
|
"part_number": 12, "part_description": 40, "quantity": 8,
|
|
"level": 7, "parent": 22, "bbox_x_mm": 11, "bbox_y_mm": 11,
|
|
"bbox_z_mm": 11, "notes": 34, "part_name_supplier": 40,
|
|
}
|
|
|
|
|
|
def _safe(v):
|
|
"""Convert NaN/None → None so openpyxl writes blank cells."""
|
|
if v is None:
|
|
return None
|
|
try:
|
|
if isinstance(v, float) and math.isnan(v):
|
|
return None
|
|
except Exception:
|
|
pass
|
|
return v
|
|
|
|
|
|
def extract_bom(model: StepModel) -> pd.DataFrame:
|
|
"""Extract BOM from a loaded StepModel. Returns DataFrame with BOM_COLUMNS.
|
|
|
|
Name-extraction strategy
|
|
------------------------
|
|
The STEP text parser is always the primary source for part_name_original.
|
|
It reads raw bytes with GBK/UTF-8 encoding detection, correctly decoding
|
|
Chinese CAD part labels.
|
|
|
|
OCC's STEP reader (used by build123d) applies an internal codec that maps
|
|
each 2-byte GBK sequence to an incorrect Unicode codepoint — the resulting
|
|
strings cannot be recovered. We therefore never rely on child.label for
|
|
part names when the file may contain CJK characters.
|
|
|
|
OCC assembly walk (_bom_from_parts) is kept as a fallback only for files
|
|
where the text parser returns nothing (e.g., non-PRODUCT-entity STEP files).
|
|
"""
|
|
rows = []
|
|
|
|
# Primary: STEP text parser — encoding-aware, correct for ASCII and CJK files
|
|
rows = _bom_from_step_text(model.path)
|
|
|
|
if not rows and model.backend == "build123d" and model.parts:
|
|
# Fallback: OCC assembly walk (CJK names will be garbled but structure intact)
|
|
logger.debug("STEP text parser empty — falling back to OCC assembly walk")
|
|
rows = _bom_from_parts(model.parts)
|
|
|
|
if not rows:
|
|
logger.info("No assembly structure — treating as single part")
|
|
stem = model.path.stem
|
|
rows = [{"part_number": "001", "part_name_original": stem,
|
|
"part_name_english": stem, "quantity": 1, "level": 0,
|
|
"parent": "", "bbox_x_mm": None, "bbox_y_mm": None,
|
|
"bbox_z_mm": None, "notes": "single-body file"}]
|
|
df = pd.DataFrame(rows, columns=BOM_COLUMNS)
|
|
if model.backend == "build123d":
|
|
df = _enrich_bboxes(model, df)
|
|
logger.info(f"BOM extracted: {len(df)} parts")
|
|
return df
|
|
|
|
|
|
def _bom_from_parts(parts: list) -> list:
|
|
name_counts = Counter(p["name"] for p in parts)
|
|
seen = set()
|
|
rows = []
|
|
for i, p in enumerate(parts):
|
|
name = p["name"]
|
|
if name in seen:
|
|
continue
|
|
seen.add(name)
|
|
rows.append({
|
|
"part_number": f"{len(rows)+1:03d}",
|
|
"part_name_original": name,
|
|
"part_name_english": name,
|
|
"quantity": name_counts[name],
|
|
"level": p.get("level", 0),
|
|
"parent": p.get("parent", ""),
|
|
"bbox_x_mm": None, "bbox_y_mm": None, "bbox_z_mm": None,
|
|
"notes": "",
|
|
})
|
|
return rows
|
|
|
|
|
|
def _read_step_text(step_path: Path) -> str:
|
|
"""Read STEP file text with CJK-aware encoding detection.
|
|
|
|
STEP files from Chinese manufacturers embed raw GBK bytes in name strings.
|
|
Strategy: try UTF-8 first (correct for modern files); if replacement chars
|
|
appear, retry as GBK (covers Chinese CAD exports); fall back to latin-1
|
|
which never fails (may contain mojibake, but at least it's readable).
|
|
"""
|
|
for enc in ('utf-8', 'gbk'):
|
|
try:
|
|
text = step_path.read_text(encoding=enc)
|
|
if enc == 'utf-8' and '�' in text:
|
|
# Replacement chars detected — GBK bytes can't be UTF-8
|
|
continue
|
|
return text
|
|
except (UnicodeDecodeError, LookupError):
|
|
continue
|
|
return step_path.read_text(encoding='latin-1', errors='replace')
|
|
|
|
|
|
def _bom_from_step_text(step_path: Path) -> list:
|
|
"""Parse STEP ISO 10303-21 PRODUCT entities directly."""
|
|
try:
|
|
text = _read_step_text(step_path)
|
|
except Exception as e:
|
|
logger.warning(f"Could not read STEP text: {e}")
|
|
return []
|
|
pattern = re.compile(r"#\d+\s*=\s*PRODUCT\s*\(\s*'([^']*)'", re.IGNORECASE)
|
|
seen = {}
|
|
for match in pattern.finditer(text):
|
|
name = match.group(1).strip()
|
|
if not name or name.upper() in ("", "NONE"):
|
|
continue
|
|
if name in seen:
|
|
seen[name]["quantity"] += 1
|
|
else:
|
|
seen[name] = {
|
|
"part_number": f"{len(seen)+1:03d}",
|
|
"part_name_original": name, "part_name_english": name,
|
|
"quantity": 1, "level": 0, "parent": "",
|
|
"bbox_x_mm": None, "bbox_y_mm": None, "bbox_z_mm": None,
|
|
"notes": "parsed from STEP text",
|
|
}
|
|
rows = list(seen.values())
|
|
if rows:
|
|
logger.info(f"STEP text parser found {len(rows)} unique part names")
|
|
return rows
|
|
|
|
|
|
def _enrich_bboxes(model: StepModel, df: pd.DataFrame) -> pd.DataFrame:
|
|
"""Add bounding box dims per part from build123d. Best-effort."""
|
|
try:
|
|
bb = model.shape.bounding_box()
|
|
if len(df) == 1:
|
|
df.at[0, "bbox_x_mm"] = round(bb.size.X, 2)
|
|
df.at[0, "bbox_y_mm"] = round(bb.size.Y, 2)
|
|
df.at[0, "bbox_z_mm"] = round(bb.size.Z, 2)
|
|
else:
|
|
children = getattr(model.shape, "children", []) or []
|
|
for i, child in enumerate(children):
|
|
if i >= len(df):
|
|
break
|
|
try:
|
|
cb = child.bounding_box()
|
|
df.at[i, "bbox_x_mm"] = round(cb.size.X, 2)
|
|
df.at[i, "bbox_y_mm"] = round(cb.size.Y, 2)
|
|
df.at[i, "bbox_z_mm"] = round(cb.size.Z, 2)
|
|
except Exception:
|
|
pass
|
|
except Exception as e:
|
|
logger.debug(f"bbox enrichment skipped: {e}")
|
|
return df
|
|
|
|
|
|
def save_bom_csv(df: pd.DataFrame, step_path: Path) -> Path:
|
|
"""Write BOM DataFrame to CSV (legacy fallback)."""
|
|
out_path = step_path.parent / f"{step_path.stem}_bom.csv"
|
|
df.to_csv(out_path, index=False)
|
|
logger.info(f"BOM CSV → {out_path.name}")
|
|
return out_path
|
|
|
|
|
|
def save_bom_xlsx(df: pd.DataFrame, step_path: Path) -> Path:
|
|
"""Write BOM DataFrame to an MPM-branded Excel workbook.
|
|
|
|
Column changes vs internal schema (BOM_COLUMNS):
|
|
part_name_english → Part Description (column 2)
|
|
part_name_original → Supplier Part Name (last column)
|
|
Falls back to CSV if openpyxl is unavailable.
|
|
"""
|
|
out_path = step_path.parent / f"{step_path.stem}_bom.xlsx"
|
|
try:
|
|
from openpyxl import Workbook
|
|
from openpyxl.styles import Alignment, Border, Font, PatternFill, Side
|
|
from openpyxl.utils import get_column_letter
|
|
except ImportError:
|
|
logger.warning("openpyxl not installed — falling back to CSV")
|
|
return save_bom_csv(df, step_path)
|
|
|
|
# Build display DataFrame
|
|
disp = df.rename(columns=_XLSX_RENAME).copy()
|
|
for col in _XLSX_ORDER:
|
|
if col not in disp.columns:
|
|
disp[col] = None
|
|
disp = disp[_XLSX_ORDER]
|
|
|
|
wb = Workbook()
|
|
ws = wb.active
|
|
ws.title = "Bill of Materials"
|
|
|
|
gold_border = Border(bottom=Side(style="medium", color=_MPM_MIDDLE_GOLD))
|
|
hdr_fill = PatternFill("solid", fgColor=_MPM_DARK_SHADE)
|
|
hdr_font = Font(name="Montserrat", bold=True, color=_MPM_LIGHT_SHADE, size=10)
|
|
hdr_align = Alignment(horizontal="center", vertical="center", wrap_text=True)
|
|
|
|
# Header row
|
|
for c, col in enumerate(_XLSX_ORDER, 1):
|
|
cell = ws.cell(row=1, column=c, value=_XLSX_HEADERS.get(col, col))
|
|
cell.font = hdr_font
|
|
cell.fill = hdr_fill
|
|
cell.alignment = hdr_align
|
|
cell.border = gold_border
|
|
ws.column_dimensions[get_column_letter(c)].width = _XLSX_WIDTHS.get(col, 15)
|
|
ws.row_dimensions[1].height = 28
|
|
|
|
# Data rows
|
|
body_font = Font(name="Open Sans", size=10, color=_MPM_DARK_SHADE)
|
|
body_align = Alignment(horizontal="left", vertical="center")
|
|
for r, (_, row) in enumerate(disp.iterrows(), 2):
|
|
fill = PatternFill("solid", fgColor=_MPM_WARM_OFF_WHITE if r % 2 == 0 else "FFFFFF")
|
|
for c, col in enumerate(_XLSX_ORDER, 1):
|
|
cell = ws.cell(row=r, column=c, value=_safe(row[col]))
|
|
cell.font = body_font
|
|
cell.fill = fill
|
|
cell.alignment = body_align
|
|
|
|
ws.freeze_panes = "A2"
|
|
wb.save(str(out_path))
|
|
logger.info(f"BOM XLSX → {out_path.name}")
|
|
return out_path
|