Phase 1: FastAPI backend with async job model

- backend/app: FastAPI API wrapping the CAD skill modules
  - upload -> job -> poll -> model / BOM / artifacts -> geometry query
  - SQLite via SQLModel (Model, Job, BomRow, QueryLog)
  - ThreadPoolExecutor worker, serialized, with live stage updates
- docker-compose.yml: dev server (mounts source, --reload) on :8000
- api-test.sh: end-to-end live validation script
- requirements.txt: add fastapi, uvicorn, python-multipart, sqlmodel
- external_diagram.py: port active-area detection OCC.Core -> OCP
- .gitignore, PHASE1.md

Validated live: MR16 round-trip passes (28 BOM rows, 12 artifacts,
bounding-box query, xlsx download; active-area detection working).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Jason Stedwell
2026-06-17 16:38:26 -05:00
parent c1abe36822
commit b3c3e2a3b2
15 changed files with 701 additions and 5 deletions
View File
View File
+22
View File
@@ -0,0 +1,22 @@
"""Runtime configuration. All paths overridable via environment variables so the
same image runs locally (repo root) and on Unraid (/data volume)."""
import os
from pathlib import Path
# Repo root = two levels up from backend/app/config.py
ROOT = Path(__file__).resolve().parents[2]
# The CAD skill source (loader, bom, renderer, query_engine, external_diagram, ...)
SKILL_SRC = Path(os.environ.get("SKILL_SRC", ROOT / "skill.src"))
# Where uploads, per-model output dirs, and the SQLite DB live.
DATA_DIR = Path(os.environ.get("DATA_DIR", ROOT / "_data"))
DB_PATH = Path(os.environ.get("DB_PATH", DATA_DIR / "step_parser.db"))
# CAD jobs are heavy and largely CPU-bound — serialize by default (one at a time).
MAX_WORKERS = int(os.environ.get("MAX_WORKERS", "1"))
# Accepted upload extensions.
ALLOWED_SUFFIXES = {".step", ".stp"}
MODELS_DIR = DATA_DIR / "models"
+28
View File
@@ -0,0 +1,28 @@
"""SQLite engine + session helpers.
SQLModel (SQLAlchemy + Pydantic) is used rather than Jason's usual Prisma — Prisma
is JS-first and its Python client is unofficial, whereas SQLModel is the FastAPI-
native ORM. SQLite stays the DB, single-file under the data volume.
"""
from sqlmodel import SQLModel, Session, create_engine
from .config import DATA_DIR, DB_PATH, MODELS_DIR
# check_same_thread=False: the worker thread shares the engine with request handlers.
engine = create_engine(
f"sqlite:///{DB_PATH}",
connect_args={"check_same_thread": False},
)
def init_db() -> None:
DATA_DIR.mkdir(parents=True, exist_ok=True)
MODELS_DIR.mkdir(parents=True, exist_ok=True)
# Import models so they register on SQLModel.metadata before create_all.
from . import models # noqa: F401
SQLModel.metadata.create_all(engine)
def get_session():
with Session(engine) as session:
yield session
+157
View File
@@ -0,0 +1,157 @@
"""STEP Parser API — Phase 1.
Endpoints:
GET /api/health
POST /api/upload multipart STEP upload -> creates model + job
GET /api/jobs/{job_id} job status / stage / artifacts
GET /api/models list models
GET /api/models/{id} model metadata + BOM + artifacts
GET /api/models/{id}/artifacts/{name} download a generated file
POST /api/models/{id}/query run a geometry query
"""
import json
import logging
from contextlib import asynccontextmanager
from pathlib import Path
from fastapi import Depends, FastAPI, File, Form, HTTPException, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from pydantic import BaseModel
from sqlmodel import Session, select
from . import processing
from .config import ALLOWED_SUFFIXES, MODELS_DIR
from .db import get_session, init_db
from .models import BomRow, Job, Model, QueryLog
from .worker import submit_job
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(name)s: %(message)s")
log = logging.getLogger("step_parser.api")
@asynccontextmanager
async def lifespan(app: FastAPI):
init_db()
log.info("DB initialized")
yield
app = FastAPI(title="STEP Parser API", version="0.1.0", lifespan=lifespan)
# Dev-permissive CORS; tighten to the Unraid frontend origin before it leaves the LAN.
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
class QueryRequest(BaseModel):
query: str
@app.get("/api/health")
def health():
return {"status": "ok"}
@app.post("/api/upload")
async def upload(
file: UploadFile = File(...),
thumbnails: bool = Form(True),
bom: bool = Form(True),
diagram: bool = Form(False),
translate: bool = Form(False),
diagram_mode: str = Form("enclosure_only"),
session: Session = Depends(get_session),
):
safe_name = Path(file.filename or "").name
suffix = Path(safe_name).suffix.lower()
if suffix not in ALLOWED_SUFFIXES:
raise HTTPException(400, f"Unsupported file type '{suffix}'. Expected .step or .stp")
stem = Path(safe_name).stem
model = Model(name=stem, original_filename=safe_name, stem=stem)
session.add(model)
session.commit()
session.refresh(model)
out_dir = MODELS_DIR / str(model.id)
out_dir.mkdir(parents=True, exist_ok=True)
(out_dir / safe_name).write_bytes(await file.read())
options = {
"thumbnails": thumbnails, "bom": bom, "diagram": diagram,
"translate": translate, "diagram_mode": diagram_mode,
}
job = Job(model_id=model.id, status="pending", options=json.dumps(options))
session.add(job)
session.commit()
session.refresh(job)
submit_job(job.id)
return {"model_id": model.id, "job_id": job.id, "status": job.status}
@app.get("/api/jobs/{job_id}")
def get_job(job_id: int, session: Session = Depends(get_session)):
job = session.get(Job, job_id)
if job is None:
raise HTTPException(404, "job not found")
data = job.model_dump()
data["artifacts"] = json.loads(job.artifacts) if job.artifacts else []
data["options"] = json.loads(job.options) if job.options else {}
return data
@app.get("/api/models")
def list_models(session: Session = Depends(get_session)):
return session.exec(select(Model).order_by(Model.id.desc())).all()
@app.get("/api/models/{model_id}")
def get_model(model_id: int, session: Session = Depends(get_session)):
model = session.get(Model, model_id)
if model is None:
raise HTTPException(404, "model not found")
bom = session.exec(
select(BomRow).where(BomRow.model_id == model_id).order_by(BomRow.level, BomRow.id)
).all()
latest = session.exec(
select(Job).where(Job.model_id == model_id).order_by(Job.id.desc())
).first()
artifacts = json.loads(latest.artifacts) if (latest and latest.artifacts) else []
return {
"model": model,
"bom": bom,
"artifacts": artifacts,
"latest_job": {"id": latest.id, "status": latest.status, "stage": latest.stage} if latest else None,
}
@app.get("/api/models/{model_id}/artifacts/{name}")
def get_artifact(model_id: int, name: str):
safe = Path(name).name # block path traversal
path = MODELS_DIR / str(model_id) / safe
if not path.is_file():
raise HTTPException(404, "artifact not found")
return FileResponse(path, filename=safe)
@app.post("/api/models/{model_id}/query")
def query_model(model_id: int, req: QueryRequest, session: Session = Depends(get_session)):
model = session.get(Model, model_id)
if model is None:
raise HTTPException(404, "model not found")
step_path = MODELS_DIR / str(model_id) / model.original_filename
if not step_path.is_file():
raise HTTPException(404, "model source file missing")
try:
result = processing.run_query(step_path, req.query)
except Exception as e: # noqa: BLE001
raise HTTPException(500, f"query failed: {type(e).__name__}: {e}")
session.add(QueryLog(model_id=model_id, query=req.query, result=result))
session.commit()
return {"query": req.query, "result": result}
+64
View File
@@ -0,0 +1,64 @@
"""SQLModel tables: Model, Job, BomRow, QueryLog."""
from datetime import datetime, timezone
from typing import Optional
from sqlmodel import Field, SQLModel
def utcnow() -> datetime:
return datetime.now(timezone.utc)
class Model(SQLModel, table=True):
"""One uploaded STEP file and its extracted metadata."""
id: Optional[int] = Field(default=None, primary_key=True)
name: str
original_filename: str
stem: str
backend: Optional[str] = None # "build123d" | "freecad"
face_count: Optional[int] = None
part_count: Optional[int] = None
bbox_x_mm: Optional[float] = None
bbox_y_mm: Optional[float] = None
bbox_z_mm: Optional[float] = None
has_chinese: bool = False
created_at: datetime = Field(default_factory=utcnow)
class Job(SQLModel, table=True):
"""A processing run for a model. Status: pending|running|done|error."""
id: Optional[int] = Field(default=None, primary_key=True)
model_id: int = Field(foreign_key="model.id", index=True)
status: str = Field(default="pending", index=True)
stage: Optional[str] = None # current pipeline stage
error: Optional[str] = None
artifacts: Optional[str] = None # JSON list of relative filenames
options: Optional[str] = None # JSON of the request options
created_at: datetime = Field(default_factory=utcnow)
started_at: Optional[datetime] = None
finished_at: Optional[datetime] = None
class BomRow(SQLModel, table=True):
"""One BOM line for a model."""
id: Optional[int] = Field(default=None, primary_key=True)
model_id: int = Field(foreign_key="model.id", index=True)
part_number: Optional[str] = None
part_name_original: Optional[str] = None
part_name_english: Optional[str] = None
quantity: Optional[int] = None
level: Optional[int] = None
parent: Optional[str] = None
bbox_x_mm: Optional[float] = None
bbox_y_mm: Optional[float] = None
bbox_z_mm: Optional[float] = None
notes: Optional[str] = None
class QueryLog(SQLModel, table=True):
"""A natural-language geometry query and its result."""
id: Optional[int] = Field(default=None, primary_key=True)
model_id: int = Field(foreign_key="model.id", index=True)
query: str
result: Optional[str] = None
created_at: datetime = Field(default_factory=utcnow)
+142
View File
@@ -0,0 +1,142 @@
"""Pipeline orchestration — wraps the CAD skill modules.
Mirrors step_processor.main() but writes artifacts into a per-model directory and
returns structured metadata + BOM rows for the DB instead of printing a summary.
Heavy kernel imports happen lazily inside each stage.
"""
import logging
import math
import os
from pathlib import Path
from typing import Callable, Optional
from . import skill_bridge # noqa: F401 — sets sys.path so `modules.*` imports resolve
log = logging.getLogger("step_parser.processing")
Progress = Callable[[str], None]
def _f(v) -> Optional[float]:
try:
x = float(v)
return None if math.isnan(x) else round(x, 2)
except (TypeError, ValueError):
return None
def _i(v) -> Optional[int]:
try:
x = float(v)
return None if math.isnan(x) else int(x)
except (TypeError, ValueError):
return None
def _s(v) -> Optional[str]:
if v is None:
return None
s = str(v).strip()
return s or None
def _bom_df_to_rows(df) -> list[dict]:
rows = []
for _, r in df.iterrows():
rows.append({
"part_number": _s(r.get("part_number")),
"part_name_original": _s(r.get("part_name_original")),
"part_name_english": _s(r.get("part_name_english")),
"quantity": _i(r.get("quantity")),
"level": _i(r.get("level")),
"parent": _s(r.get("parent")),
"bbox_x_mm": _f(r.get("bbox_x_mm")),
"bbox_y_mm": _f(r.get("bbox_y_mm")),
"bbox_z_mm": _f(r.get("bbox_z_mm")),
"notes": _s(r.get("notes")),
})
return rows
def _collect_artifacts(out_dir: Path) -> list[str]:
"""All downloadable files in the model dir (original upload + generated outputs)."""
return sorted(p.name for p in out_dir.iterdir() if p.is_file())
def run_pipeline(step_path: Path, out_dir: Path, options: dict, progress: Progress) -> dict:
"""Run the requested pipeline stages. Returns metadata dict for the DB."""
meta: dict = {
"backend": None, "face_count": None, "part_count": None,
"has_chinese": False, "bbox": (None, None, None),
"bom_rows": [], "artifacts": [],
}
progress("loading")
import modules.loader as loader_mod
model = loader_mod.load_step(step_path)
if model is None:
raise RuntimeError("Failed to load STEP — build123d/FreeCAD unavailable or file invalid")
meta["backend"] = model.backend
meta["face_count"] = model.face_count
meta["part_count"] = len(model.parts) if model.parts else None
bom_df = None
if options.get("bom", True):
progress("bom")
from modules.bom import extract_bom, save_bom_xlsx
bom_df = extract_bom(model)
save_bom_xlsx(bom_df, step_path)
if bom_df is not None:
from modules.translator import has_chinese
meta["has_chinese"] = bool(bom_df["part_name_original"].apply(has_chinese).any())
if options.get("translate") and meta["has_chinese"]:
if os.environ.get("ANTHROPIC_API_KEY"):
progress("translate")
from modules.translator import get_translation_map, translate_bom
bom_df = translate_bom(bom_df, model_name=step_path.stem)
save_bom_xlsx(bom_df, step_path)
tmap = get_translation_map(bom_df)
if tmap:
from modules.rewriter import rewrite_step
rewrite_step(step_path, tmap)
else:
log.warning("translate requested but ANTHROPIC_API_KEY not set — skipping")
meta["bom_rows"] = _bom_df_to_rows(bom_df)
try:
root = bom_df[bom_df["level"] == 0]
if len(root):
r0 = root.iloc[0]
meta["bbox"] = (_f(r0["bbox_x_mm"]), _f(r0["bbox_y_mm"]), _f(r0["bbox_z_mm"]))
except Exception:
pass
if options.get("thumbnails", True):
progress("thumbnails")
from modules.renderer import render_views
render_views(model, step_path)
if options.get("diagram", False):
progress("diagram")
from modules.external_diagram import step_external_diagram
step_external_diagram(
path=str(step_path),
mode=options.get("diagram_mode", "enclosure_only"),
options={"pdf": bool(options.get("diagram_pdf", False))},
)
progress("collect")
meta["artifacts"] = _collect_artifacts(out_dir)
return meta
def run_query(step_path: Path, query: str) -> str:
"""Load a model and run a single geometry query (synchronous)."""
import modules.loader as loader_mod
from modules.query_engine import run_query as _run_query
model = loader_mod.load_step(step_path)
if model is None:
raise RuntimeError("Failed to load STEP for query")
return _run_query(model, query)
+16
View File
@@ -0,0 +1,16 @@
"""Make the CAD skill (skill.src/modules/*) importable.
skill.src uses package-relative imports under a top-level `modules` package, the
same way step_processor.py runs it (script dir on sys.path[0]). We replicate that
by putting SKILL_SRC on sys.path, then `import modules.loader` etc. resolves.
Heavy deps (build123d/OCP) are imported lazily inside the skill functions, so
importing the modules here is cheap; the kernel only loads when a job runs.
"""
import sys
from .config import SKILL_SRC
_p = str(SKILL_SRC)
if _p not in sys.path:
sys.path.insert(0, _p)
+88
View File
@@ -0,0 +1,88 @@
"""Background job worker.
CAD processing is heavy and CPU-bound, so jobs run in a ThreadPoolExecutor
(serialized by default, MAX_WORKERS=1) rather than blocking the event loop with
FastAPI BackgroundTasks. Job state + results are persisted to SQLite as it runs,
so a client polling GET /api/jobs/{id} sees live stage updates.
"""
import json
import logging
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timezone
from sqlmodel import Session, delete
from .config import MAX_WORKERS, MODELS_DIR
from .db import engine
from .models import BomRow, Job, Model
from .processing import run_pipeline
log = logging.getLogger("step_parser.worker")
_executor = ThreadPoolExecutor(max_workers=MAX_WORKERS, thread_name_prefix="cadjob")
def _utcnow() -> datetime:
return datetime.now(timezone.utc)
def submit_job(job_id: int) -> None:
_executor.submit(_run_job, job_id)
def _run_job(job_id: int) -> None:
with Session(engine) as s:
job = s.get(Job, job_id)
if job is None:
log.error("job %s vanished before it ran", job_id)
return
model = s.get(Model, job.model_id)
out_dir = MODELS_DIR / str(model.id)
step_path = out_dir / model.original_filename
options = json.loads(job.options or "{}")
job.status = "running"
job.stage = "loading"
job.started_at = _utcnow()
s.add(job)
s.commit()
def progress(stage: str) -> None:
j = s.get(Job, job_id)
j.stage = stage
s.add(j)
s.commit()
try:
meta = run_pipeline(step_path, out_dir, options, progress)
model = s.get(Model, job.model_id)
model.backend = meta["backend"]
model.face_count = meta["face_count"]
model.part_count = meta["part_count"]
model.bbox_x_mm, model.bbox_y_mm, model.bbox_z_mm = meta["bbox"]
model.has_chinese = meta["has_chinese"]
s.add(model)
# Replace any prior BOM rows for an idempotent re-run.
s.exec(delete(BomRow).where(BomRow.model_id == model.id))
for row in meta["bom_rows"]:
s.add(BomRow(model_id=model.id, **row))
job = s.get(Job, job_id)
job.status = "done"
job.stage = "done"
job.artifacts = json.dumps(meta["artifacts"])
job.finished_at = _utcnow()
s.add(job)
s.commit()
log.info("job %s done — %d artifacts", job_id, len(meta["artifacts"]))
except Exception as e: # noqa: BLE001 — record any failure on the job
log.exception("job %s failed", job_id)
s.rollback()
j = s.get(Job, job_id)
j.status = "error"
j.error = f"{type(e).__name__}: {e}"
j.finished_at = _utcnow()
s.add(j)
s.commit()