Phase 1: FastAPI backend with async job model
- backend/app: FastAPI API wrapping the CAD skill modules - upload -> job -> poll -> model / BOM / artifacts -> geometry query - SQLite via SQLModel (Model, Job, BomRow, QueryLog) - ThreadPoolExecutor worker, serialized, with live stage updates - docker-compose.yml: dev server (mounts source, --reload) on :8000 - api-test.sh: end-to-end live validation script - requirements.txt: add fastapi, uvicorn, python-multipart, sqlmodel - external_diagram.py: port active-area detection OCC.Core -> OCP - .gitignore, PHASE1.md Validated live: MR16 round-trip passes (28 BOM rows, 12 artifacts, bounding-box query, xlsx download; active-area detection working). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,22 @@
|
||||
"""Runtime configuration. All paths overridable via environment variables so the
|
||||
same image runs locally (repo root) and on Unraid (/data volume)."""
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# Repo root = two levels up from backend/app/config.py
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
|
||||
# The CAD skill source (loader, bom, renderer, query_engine, external_diagram, ...)
|
||||
SKILL_SRC = Path(os.environ.get("SKILL_SRC", ROOT / "skill.src"))
|
||||
|
||||
# Where uploads, per-model output dirs, and the SQLite DB live.
|
||||
DATA_DIR = Path(os.environ.get("DATA_DIR", ROOT / "_data"))
|
||||
DB_PATH = Path(os.environ.get("DB_PATH", DATA_DIR / "step_parser.db"))
|
||||
|
||||
# CAD jobs are heavy and largely CPU-bound — serialize by default (one at a time).
|
||||
MAX_WORKERS = int(os.environ.get("MAX_WORKERS", "1"))
|
||||
|
||||
# Accepted upload extensions.
|
||||
ALLOWED_SUFFIXES = {".step", ".stp"}
|
||||
|
||||
MODELS_DIR = DATA_DIR / "models"
|
||||
@@ -0,0 +1,28 @@
|
||||
"""SQLite engine + session helpers.
|
||||
|
||||
SQLModel (SQLAlchemy + Pydantic) is used rather than Jason's usual Prisma — Prisma
|
||||
is JS-first and its Python client is unofficial, whereas SQLModel is the FastAPI-
|
||||
native ORM. SQLite stays the DB, single-file under the data volume.
|
||||
"""
|
||||
from sqlmodel import SQLModel, Session, create_engine
|
||||
|
||||
from .config import DATA_DIR, DB_PATH, MODELS_DIR
|
||||
|
||||
# check_same_thread=False: the worker thread shares the engine with request handlers.
|
||||
engine = create_engine(
|
||||
f"sqlite:///{DB_PATH}",
|
||||
connect_args={"check_same_thread": False},
|
||||
)
|
||||
|
||||
|
||||
def init_db() -> None:
|
||||
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
MODELS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
# Import models so they register on SQLModel.metadata before create_all.
|
||||
from . import models # noqa: F401
|
||||
SQLModel.metadata.create_all(engine)
|
||||
|
||||
|
||||
def get_session():
|
||||
with Session(engine) as session:
|
||||
yield session
|
||||
@@ -0,0 +1,157 @@
|
||||
"""STEP Parser API — Phase 1.
|
||||
|
||||
Endpoints:
|
||||
GET /api/health
|
||||
POST /api/upload multipart STEP upload -> creates model + job
|
||||
GET /api/jobs/{job_id} job status / stage / artifacts
|
||||
GET /api/models list models
|
||||
GET /api/models/{id} model metadata + BOM + artifacts
|
||||
GET /api/models/{id}/artifacts/{name} download a generated file
|
||||
POST /api/models/{id}/query run a geometry query
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import Depends, FastAPI, File, Form, HTTPException, UploadFile
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse
|
||||
from pydantic import BaseModel
|
||||
from sqlmodel import Session, select
|
||||
|
||||
from . import processing
|
||||
from .config import ALLOWED_SUFFIXES, MODELS_DIR
|
||||
from .db import get_session, init_db
|
||||
from .models import BomRow, Job, Model, QueryLog
|
||||
from .worker import submit_job
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(name)s: %(message)s")
|
||||
log = logging.getLogger("step_parser.api")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
init_db()
|
||||
log.info("DB initialized")
|
||||
yield
|
||||
|
||||
|
||||
app = FastAPI(title="STEP Parser API", version="0.1.0", lifespan=lifespan)
|
||||
|
||||
# Dev-permissive CORS; tighten to the Unraid frontend origin before it leaves the LAN.
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
class QueryRequest(BaseModel):
|
||||
query: str
|
||||
|
||||
|
||||
@app.get("/api/health")
|
||||
def health():
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@app.post("/api/upload")
|
||||
async def upload(
|
||||
file: UploadFile = File(...),
|
||||
thumbnails: bool = Form(True),
|
||||
bom: bool = Form(True),
|
||||
diagram: bool = Form(False),
|
||||
translate: bool = Form(False),
|
||||
diagram_mode: str = Form("enclosure_only"),
|
||||
session: Session = Depends(get_session),
|
||||
):
|
||||
safe_name = Path(file.filename or "").name
|
||||
suffix = Path(safe_name).suffix.lower()
|
||||
if suffix not in ALLOWED_SUFFIXES:
|
||||
raise HTTPException(400, f"Unsupported file type '{suffix}'. Expected .step or .stp")
|
||||
|
||||
stem = Path(safe_name).stem
|
||||
model = Model(name=stem, original_filename=safe_name, stem=stem)
|
||||
session.add(model)
|
||||
session.commit()
|
||||
session.refresh(model)
|
||||
|
||||
out_dir = MODELS_DIR / str(model.id)
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
(out_dir / safe_name).write_bytes(await file.read())
|
||||
|
||||
options = {
|
||||
"thumbnails": thumbnails, "bom": bom, "diagram": diagram,
|
||||
"translate": translate, "diagram_mode": diagram_mode,
|
||||
}
|
||||
job = Job(model_id=model.id, status="pending", options=json.dumps(options))
|
||||
session.add(job)
|
||||
session.commit()
|
||||
session.refresh(job)
|
||||
|
||||
submit_job(job.id)
|
||||
return {"model_id": model.id, "job_id": job.id, "status": job.status}
|
||||
|
||||
|
||||
@app.get("/api/jobs/{job_id}")
|
||||
def get_job(job_id: int, session: Session = Depends(get_session)):
|
||||
job = session.get(Job, job_id)
|
||||
if job is None:
|
||||
raise HTTPException(404, "job not found")
|
||||
data = job.model_dump()
|
||||
data["artifacts"] = json.loads(job.artifacts) if job.artifacts else []
|
||||
data["options"] = json.loads(job.options) if job.options else {}
|
||||
return data
|
||||
|
||||
|
||||
@app.get("/api/models")
|
||||
def list_models(session: Session = Depends(get_session)):
|
||||
return session.exec(select(Model).order_by(Model.id.desc())).all()
|
||||
|
||||
|
||||
@app.get("/api/models/{model_id}")
|
||||
def get_model(model_id: int, session: Session = Depends(get_session)):
|
||||
model = session.get(Model, model_id)
|
||||
if model is None:
|
||||
raise HTTPException(404, "model not found")
|
||||
bom = session.exec(
|
||||
select(BomRow).where(BomRow.model_id == model_id).order_by(BomRow.level, BomRow.id)
|
||||
).all()
|
||||
latest = session.exec(
|
||||
select(Job).where(Job.model_id == model_id).order_by(Job.id.desc())
|
||||
).first()
|
||||
artifacts = json.loads(latest.artifacts) if (latest and latest.artifacts) else []
|
||||
return {
|
||||
"model": model,
|
||||
"bom": bom,
|
||||
"artifacts": artifacts,
|
||||
"latest_job": {"id": latest.id, "status": latest.status, "stage": latest.stage} if latest else None,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/models/{model_id}/artifacts/{name}")
|
||||
def get_artifact(model_id: int, name: str):
|
||||
safe = Path(name).name # block path traversal
|
||||
path = MODELS_DIR / str(model_id) / safe
|
||||
if not path.is_file():
|
||||
raise HTTPException(404, "artifact not found")
|
||||
return FileResponse(path, filename=safe)
|
||||
|
||||
|
||||
@app.post("/api/models/{model_id}/query")
|
||||
def query_model(model_id: int, req: QueryRequest, session: Session = Depends(get_session)):
|
||||
model = session.get(Model, model_id)
|
||||
if model is None:
|
||||
raise HTTPException(404, "model not found")
|
||||
step_path = MODELS_DIR / str(model_id) / model.original_filename
|
||||
if not step_path.is_file():
|
||||
raise HTTPException(404, "model source file missing")
|
||||
try:
|
||||
result = processing.run_query(step_path, req.query)
|
||||
except Exception as e: # noqa: BLE001
|
||||
raise HTTPException(500, f"query failed: {type(e).__name__}: {e}")
|
||||
session.add(QueryLog(model_id=model_id, query=req.query, result=result))
|
||||
session.commit()
|
||||
return {"query": req.query, "result": result}
|
||||
@@ -0,0 +1,64 @@
|
||||
"""SQLModel tables: Model, Job, BomRow, QueryLog."""
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
from sqlmodel import Field, SQLModel
|
||||
|
||||
|
||||
def utcnow() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
class Model(SQLModel, table=True):
|
||||
"""One uploaded STEP file and its extracted metadata."""
|
||||
id: Optional[int] = Field(default=None, primary_key=True)
|
||||
name: str
|
||||
original_filename: str
|
||||
stem: str
|
||||
backend: Optional[str] = None # "build123d" | "freecad"
|
||||
face_count: Optional[int] = None
|
||||
part_count: Optional[int] = None
|
||||
bbox_x_mm: Optional[float] = None
|
||||
bbox_y_mm: Optional[float] = None
|
||||
bbox_z_mm: Optional[float] = None
|
||||
has_chinese: bool = False
|
||||
created_at: datetime = Field(default_factory=utcnow)
|
||||
|
||||
|
||||
class Job(SQLModel, table=True):
|
||||
"""A processing run for a model. Status: pending|running|done|error."""
|
||||
id: Optional[int] = Field(default=None, primary_key=True)
|
||||
model_id: int = Field(foreign_key="model.id", index=True)
|
||||
status: str = Field(default="pending", index=True)
|
||||
stage: Optional[str] = None # current pipeline stage
|
||||
error: Optional[str] = None
|
||||
artifacts: Optional[str] = None # JSON list of relative filenames
|
||||
options: Optional[str] = None # JSON of the request options
|
||||
created_at: datetime = Field(default_factory=utcnow)
|
||||
started_at: Optional[datetime] = None
|
||||
finished_at: Optional[datetime] = None
|
||||
|
||||
|
||||
class BomRow(SQLModel, table=True):
|
||||
"""One BOM line for a model."""
|
||||
id: Optional[int] = Field(default=None, primary_key=True)
|
||||
model_id: int = Field(foreign_key="model.id", index=True)
|
||||
part_number: Optional[str] = None
|
||||
part_name_original: Optional[str] = None
|
||||
part_name_english: Optional[str] = None
|
||||
quantity: Optional[int] = None
|
||||
level: Optional[int] = None
|
||||
parent: Optional[str] = None
|
||||
bbox_x_mm: Optional[float] = None
|
||||
bbox_y_mm: Optional[float] = None
|
||||
bbox_z_mm: Optional[float] = None
|
||||
notes: Optional[str] = None
|
||||
|
||||
|
||||
class QueryLog(SQLModel, table=True):
|
||||
"""A natural-language geometry query and its result."""
|
||||
id: Optional[int] = Field(default=None, primary_key=True)
|
||||
model_id: int = Field(foreign_key="model.id", index=True)
|
||||
query: str
|
||||
result: Optional[str] = None
|
||||
created_at: datetime = Field(default_factory=utcnow)
|
||||
@@ -0,0 +1,142 @@
|
||||
"""Pipeline orchestration — wraps the CAD skill modules.
|
||||
|
||||
Mirrors step_processor.main() but writes artifacts into a per-model directory and
|
||||
returns structured metadata + BOM rows for the DB instead of printing a summary.
|
||||
Heavy kernel imports happen lazily inside each stage.
|
||||
"""
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Callable, Optional
|
||||
|
||||
from . import skill_bridge # noqa: F401 — sets sys.path so `modules.*` imports resolve
|
||||
|
||||
log = logging.getLogger("step_parser.processing")
|
||||
|
||||
Progress = Callable[[str], None]
|
||||
|
||||
|
||||
def _f(v) -> Optional[float]:
|
||||
try:
|
||||
x = float(v)
|
||||
return None if math.isnan(x) else round(x, 2)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _i(v) -> Optional[int]:
|
||||
try:
|
||||
x = float(v)
|
||||
return None if math.isnan(x) else int(x)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _s(v) -> Optional[str]:
|
||||
if v is None:
|
||||
return None
|
||||
s = str(v).strip()
|
||||
return s or None
|
||||
|
||||
|
||||
def _bom_df_to_rows(df) -> list[dict]:
|
||||
rows = []
|
||||
for _, r in df.iterrows():
|
||||
rows.append({
|
||||
"part_number": _s(r.get("part_number")),
|
||||
"part_name_original": _s(r.get("part_name_original")),
|
||||
"part_name_english": _s(r.get("part_name_english")),
|
||||
"quantity": _i(r.get("quantity")),
|
||||
"level": _i(r.get("level")),
|
||||
"parent": _s(r.get("parent")),
|
||||
"bbox_x_mm": _f(r.get("bbox_x_mm")),
|
||||
"bbox_y_mm": _f(r.get("bbox_y_mm")),
|
||||
"bbox_z_mm": _f(r.get("bbox_z_mm")),
|
||||
"notes": _s(r.get("notes")),
|
||||
})
|
||||
return rows
|
||||
|
||||
|
||||
def _collect_artifacts(out_dir: Path) -> list[str]:
|
||||
"""All downloadable files in the model dir (original upload + generated outputs)."""
|
||||
return sorted(p.name for p in out_dir.iterdir() if p.is_file())
|
||||
|
||||
|
||||
def run_pipeline(step_path: Path, out_dir: Path, options: dict, progress: Progress) -> dict:
|
||||
"""Run the requested pipeline stages. Returns metadata dict for the DB."""
|
||||
meta: dict = {
|
||||
"backend": None, "face_count": None, "part_count": None,
|
||||
"has_chinese": False, "bbox": (None, None, None),
|
||||
"bom_rows": [], "artifacts": [],
|
||||
}
|
||||
|
||||
progress("loading")
|
||||
import modules.loader as loader_mod
|
||||
model = loader_mod.load_step(step_path)
|
||||
if model is None:
|
||||
raise RuntimeError("Failed to load STEP — build123d/FreeCAD unavailable or file invalid")
|
||||
meta["backend"] = model.backend
|
||||
meta["face_count"] = model.face_count
|
||||
meta["part_count"] = len(model.parts) if model.parts else None
|
||||
|
||||
bom_df = None
|
||||
if options.get("bom", True):
|
||||
progress("bom")
|
||||
from modules.bom import extract_bom, save_bom_xlsx
|
||||
bom_df = extract_bom(model)
|
||||
save_bom_xlsx(bom_df, step_path)
|
||||
|
||||
if bom_df is not None:
|
||||
from modules.translator import has_chinese
|
||||
meta["has_chinese"] = bool(bom_df["part_name_original"].apply(has_chinese).any())
|
||||
|
||||
if options.get("translate") and meta["has_chinese"]:
|
||||
if os.environ.get("ANTHROPIC_API_KEY"):
|
||||
progress("translate")
|
||||
from modules.translator import get_translation_map, translate_bom
|
||||
bom_df = translate_bom(bom_df, model_name=step_path.stem)
|
||||
save_bom_xlsx(bom_df, step_path)
|
||||
tmap = get_translation_map(bom_df)
|
||||
if tmap:
|
||||
from modules.rewriter import rewrite_step
|
||||
rewrite_step(step_path, tmap)
|
||||
else:
|
||||
log.warning("translate requested but ANTHROPIC_API_KEY not set — skipping")
|
||||
|
||||
meta["bom_rows"] = _bom_df_to_rows(bom_df)
|
||||
try:
|
||||
root = bom_df[bom_df["level"] == 0]
|
||||
if len(root):
|
||||
r0 = root.iloc[0]
|
||||
meta["bbox"] = (_f(r0["bbox_x_mm"]), _f(r0["bbox_y_mm"]), _f(r0["bbox_z_mm"]))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if options.get("thumbnails", True):
|
||||
progress("thumbnails")
|
||||
from modules.renderer import render_views
|
||||
render_views(model, step_path)
|
||||
|
||||
if options.get("diagram", False):
|
||||
progress("diagram")
|
||||
from modules.external_diagram import step_external_diagram
|
||||
step_external_diagram(
|
||||
path=str(step_path),
|
||||
mode=options.get("diagram_mode", "enclosure_only"),
|
||||
options={"pdf": bool(options.get("diagram_pdf", False))},
|
||||
)
|
||||
|
||||
progress("collect")
|
||||
meta["artifacts"] = _collect_artifacts(out_dir)
|
||||
return meta
|
||||
|
||||
|
||||
def run_query(step_path: Path, query: str) -> str:
|
||||
"""Load a model and run a single geometry query (synchronous)."""
|
||||
import modules.loader as loader_mod
|
||||
from modules.query_engine import run_query as _run_query
|
||||
model = loader_mod.load_step(step_path)
|
||||
if model is None:
|
||||
raise RuntimeError("Failed to load STEP for query")
|
||||
return _run_query(model, query)
|
||||
@@ -0,0 +1,16 @@
|
||||
"""Make the CAD skill (skill.src/modules/*) importable.
|
||||
|
||||
skill.src uses package-relative imports under a top-level `modules` package, the
|
||||
same way step_processor.py runs it (script dir on sys.path[0]). We replicate that
|
||||
by putting SKILL_SRC on sys.path, then `import modules.loader` etc. resolves.
|
||||
|
||||
Heavy deps (build123d/OCP) are imported lazily inside the skill functions, so
|
||||
importing the modules here is cheap; the kernel only loads when a job runs.
|
||||
"""
|
||||
import sys
|
||||
|
||||
from .config import SKILL_SRC
|
||||
|
||||
_p = str(SKILL_SRC)
|
||||
if _p not in sys.path:
|
||||
sys.path.insert(0, _p)
|
||||
@@ -0,0 +1,88 @@
|
||||
"""Background job worker.
|
||||
|
||||
CAD processing is heavy and CPU-bound, so jobs run in a ThreadPoolExecutor
|
||||
(serialized by default, MAX_WORKERS=1) rather than blocking the event loop with
|
||||
FastAPI BackgroundTasks. Job state + results are persisted to SQLite as it runs,
|
||||
so a client polling GET /api/jobs/{id} sees live stage updates.
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from sqlmodel import Session, delete
|
||||
|
||||
from .config import MAX_WORKERS, MODELS_DIR
|
||||
from .db import engine
|
||||
from .models import BomRow, Job, Model
|
||||
from .processing import run_pipeline
|
||||
|
||||
log = logging.getLogger("step_parser.worker")
|
||||
|
||||
_executor = ThreadPoolExecutor(max_workers=MAX_WORKERS, thread_name_prefix="cadjob")
|
||||
|
||||
|
||||
def _utcnow() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
def submit_job(job_id: int) -> None:
|
||||
_executor.submit(_run_job, job_id)
|
||||
|
||||
|
||||
def _run_job(job_id: int) -> None:
|
||||
with Session(engine) as s:
|
||||
job = s.get(Job, job_id)
|
||||
if job is None:
|
||||
log.error("job %s vanished before it ran", job_id)
|
||||
return
|
||||
model = s.get(Model, job.model_id)
|
||||
out_dir = MODELS_DIR / str(model.id)
|
||||
step_path = out_dir / model.original_filename
|
||||
options = json.loads(job.options or "{}")
|
||||
|
||||
job.status = "running"
|
||||
job.stage = "loading"
|
||||
job.started_at = _utcnow()
|
||||
s.add(job)
|
||||
s.commit()
|
||||
|
||||
def progress(stage: str) -> None:
|
||||
j = s.get(Job, job_id)
|
||||
j.stage = stage
|
||||
s.add(j)
|
||||
s.commit()
|
||||
|
||||
try:
|
||||
meta = run_pipeline(step_path, out_dir, options, progress)
|
||||
|
||||
model = s.get(Model, job.model_id)
|
||||
model.backend = meta["backend"]
|
||||
model.face_count = meta["face_count"]
|
||||
model.part_count = meta["part_count"]
|
||||
model.bbox_x_mm, model.bbox_y_mm, model.bbox_z_mm = meta["bbox"]
|
||||
model.has_chinese = meta["has_chinese"]
|
||||
s.add(model)
|
||||
|
||||
# Replace any prior BOM rows for an idempotent re-run.
|
||||
s.exec(delete(BomRow).where(BomRow.model_id == model.id))
|
||||
for row in meta["bom_rows"]:
|
||||
s.add(BomRow(model_id=model.id, **row))
|
||||
|
||||
job = s.get(Job, job_id)
|
||||
job.status = "done"
|
||||
job.stage = "done"
|
||||
job.artifacts = json.dumps(meta["artifacts"])
|
||||
job.finished_at = _utcnow()
|
||||
s.add(job)
|
||||
s.commit()
|
||||
log.info("job %s done — %d artifacts", job_id, len(meta["artifacts"]))
|
||||
except Exception as e: # noqa: BLE001 — record any failure on the job
|
||||
log.exception("job %s failed", job_id)
|
||||
s.rollback()
|
||||
j = s.get(Job, job_id)
|
||||
j.status = "error"
|
||||
j.error = f"{type(e).__name__}: {e}"
|
||||
j.finished_at = _utcnow()
|
||||
s.add(j)
|
||||
s.commit()
|
||||
Reference in New Issue
Block a user