scaper wait time and debug
Build and Push Docker Image / build (push) Successful in 14s

This commit is contained in:
2026-06-06 14:14:15 -05:00
parent 34e3166b97
commit ea6612125c
+63 -13
View File
@@ -70,24 +70,55 @@ export async function checkStockStatus(url: string): Promise<ScrapeResult> {
await page.goto(url, { waitUntil: 'networkidle2', timeout: 45000 });
// Wait for React hydration
// Wait for React hydration, then actively wait for a <button> to appear
// (hydration on store.ui.com sometimes takes 4-6s in the Unraid container).
await new Promise(r => setTimeout(r, 2500));
try {
await page.waitForFunction(
() => document.querySelectorAll('button').length > 0,
{ timeout: 8000 }
);
} catch {
// Fall through — the _debug payload below will record buttonCount: 0.
}
const result = await page.evaluate(() => {
let status: 'in_stock' | 'sold_out' | 'unknown' = 'unknown';
// In stock: primary button carries label="Add to Cart" as an attribute.
// Why: styled-components class hashes rebuild every deploy, and "Add to Cart"
// text alone can appear in related-product cards on the page.
if (document.querySelector('button[label="Add to Cart"]')) {
// Normalize text: collapse whitespace (incl. NBSP), lowercase. Necessary
// because the page text may include NBSPs, leading icons that contribute
// whitespace to textContent, or casing changes between deploys.
const norm = (s: string | null | undefined) =>
(s ?? '').replace(/ /g, ' ').replace(/\s+/g, ' ').trim().toLowerCase();
const buttons = Array.from(document.querySelectorAll('button'));
// In stock detection — try most specific signal first, fall back to text.
// 1. button with label="Add to Cart" attribute (styled-components prop pass-through)
// 2. button whose normalized text contains "add to cart"
// 3. any span text exactly "Add to Cart" inside a button (legacy DOM fallback)
const hasLabelAttr = buttons.some(
b => norm(b.getAttribute('label')) === 'add to cart'
);
const hasCartText = buttons.some(b => norm(b.textContent).includes('add to cart'));
if (hasLabelAttr || hasCartText) {
status = 'in_stock';
} else {
const soldOutPhrases = ['Notify me when available', 'Sold Out', 'Out of Stock'];
const buttons = document.querySelectorAll('button');
for (const btn of buttons) {
const text = btn.textContent?.trim();
if (text && soldOutPhrases.includes(text)) { status = 'sold_out'; break; }
}
// Sold-out / unavailable phrases — substring match on normalized button text.
const soldOutPhrases = [
'notify me when available',
'notify me',
'sold out',
'out of stock',
'currently unavailable',
'coming soon',
];
const hit = buttons.some(b => {
const t = norm(b.textContent);
return t && soldOutPhrases.some(p => t.includes(p));
});
if (hit) status = 'sold_out';
}
// Product name: og:title is most reliable for single-product pages
@@ -108,10 +139,29 @@ export async function checkStockStatus(url: string): Promise<ScrapeResult> {
thumbnail = ogImage.content;
}
return { status, name, thumbnail };
// Debug payload — populated only when we can't determine status, so we
// can see what the page actually looked like to the scraper.
let _debug: Record<string, unknown> | undefined;
if (status === 'unknown') {
_debug = {
title: document.title,
url: location.href,
bodyLen: document.body?.innerHTML.length ?? 0,
buttonCount: buttons.length,
buttonLabels: buttons.map(b => b.getAttribute('label')).filter(Boolean).slice(0, 20),
buttonTexts: buttons.map(b => norm(b.textContent)).filter(Boolean).slice(0, 30),
h1: document.querySelector('h1')?.textContent?.trim().slice(0, 120),
};
}
return { status, name, thumbnail, _debug };
});
return result as ScrapeResult;
if ((result as { _debug?: unknown })._debug) {
console.log(`[Scraper] UNKNOWN debug for ${url}:`, JSON.stringify((result as { _debug: unknown })._debug, null, 2));
}
return { status: result.status, name: result.name, thumbnail: result.thumbnail } as ScrapeResult;
} finally {
await browser.close();
scrapeSemaphore.release();