This commit is contained in:
+63
-13
@@ -70,24 +70,55 @@ export async function checkStockStatus(url: string): Promise<ScrapeResult> {
|
||||
|
||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 45000 });
|
||||
|
||||
// Wait for React hydration
|
||||
// Wait for React hydration, then actively wait for a <button> to appear
|
||||
// (hydration on store.ui.com sometimes takes 4-6s in the Unraid container).
|
||||
await new Promise(r => setTimeout(r, 2500));
|
||||
try {
|
||||
await page.waitForFunction(
|
||||
() => document.querySelectorAll('button').length > 0,
|
||||
{ timeout: 8000 }
|
||||
);
|
||||
} catch {
|
||||
// Fall through — the _debug payload below will record buttonCount: 0.
|
||||
}
|
||||
|
||||
const result = await page.evaluate(() => {
|
||||
let status: 'in_stock' | 'sold_out' | 'unknown' = 'unknown';
|
||||
|
||||
// In stock: primary button carries label="Add to Cart" as an attribute.
|
||||
// Why: styled-components class hashes rebuild every deploy, and "Add to Cart"
|
||||
// text alone can appear in related-product cards on the page.
|
||||
if (document.querySelector('button[label="Add to Cart"]')) {
|
||||
// Normalize text: collapse whitespace (incl. NBSP), lowercase. Necessary
|
||||
// because the page text may include NBSPs, leading icons that contribute
|
||||
// whitespace to textContent, or casing changes between deploys.
|
||||
const norm = (s: string | null | undefined) =>
|
||||
(s ?? '').replace(/ /g, ' ').replace(/\s+/g, ' ').trim().toLowerCase();
|
||||
|
||||
const buttons = Array.from(document.querySelectorAll('button'));
|
||||
|
||||
// In stock detection — try most specific signal first, fall back to text.
|
||||
// 1. button with label="Add to Cart" attribute (styled-components prop pass-through)
|
||||
// 2. button whose normalized text contains "add to cart"
|
||||
// 3. any span text exactly "Add to Cart" inside a button (legacy DOM fallback)
|
||||
const hasLabelAttr = buttons.some(
|
||||
b => norm(b.getAttribute('label')) === 'add to cart'
|
||||
);
|
||||
const hasCartText = buttons.some(b => norm(b.textContent).includes('add to cart'));
|
||||
|
||||
if (hasLabelAttr || hasCartText) {
|
||||
status = 'in_stock';
|
||||
} else {
|
||||
const soldOutPhrases = ['Notify me when available', 'Sold Out', 'Out of Stock'];
|
||||
const buttons = document.querySelectorAll('button');
|
||||
for (const btn of buttons) {
|
||||
const text = btn.textContent?.trim();
|
||||
if (text && soldOutPhrases.includes(text)) { status = 'sold_out'; break; }
|
||||
}
|
||||
// Sold-out / unavailable phrases — substring match on normalized button text.
|
||||
const soldOutPhrases = [
|
||||
'notify me when available',
|
||||
'notify me',
|
||||
'sold out',
|
||||
'out of stock',
|
||||
'currently unavailable',
|
||||
'coming soon',
|
||||
];
|
||||
const hit = buttons.some(b => {
|
||||
const t = norm(b.textContent);
|
||||
return t && soldOutPhrases.some(p => t.includes(p));
|
||||
});
|
||||
if (hit) status = 'sold_out';
|
||||
}
|
||||
|
||||
// Product name: og:title is most reliable for single-product pages
|
||||
@@ -108,10 +139,29 @@ export async function checkStockStatus(url: string): Promise<ScrapeResult> {
|
||||
thumbnail = ogImage.content;
|
||||
}
|
||||
|
||||
return { status, name, thumbnail };
|
||||
// Debug payload — populated only when we can't determine status, so we
|
||||
// can see what the page actually looked like to the scraper.
|
||||
let _debug: Record<string, unknown> | undefined;
|
||||
if (status === 'unknown') {
|
||||
_debug = {
|
||||
title: document.title,
|
||||
url: location.href,
|
||||
bodyLen: document.body?.innerHTML.length ?? 0,
|
||||
buttonCount: buttons.length,
|
||||
buttonLabels: buttons.map(b => b.getAttribute('label')).filter(Boolean).slice(0, 20),
|
||||
buttonTexts: buttons.map(b => norm(b.textContent)).filter(Boolean).slice(0, 30),
|
||||
h1: document.querySelector('h1')?.textContent?.trim().slice(0, 120),
|
||||
};
|
||||
}
|
||||
|
||||
return { status, name, thumbnail, _debug };
|
||||
});
|
||||
|
||||
return result as ScrapeResult;
|
||||
if ((result as { _debug?: unknown })._debug) {
|
||||
console.log(`[Scraper] UNKNOWN debug for ${url}:`, JSON.stringify((result as { _debug: unknown })._debug, null, 2));
|
||||
}
|
||||
|
||||
return { status: result.status, name: result.name, thumbnail: result.thumbnail } as ScrapeResult;
|
||||
} finally {
|
||||
await browser.close();
|
||||
scrapeSemaphore.release();
|
||||
|
||||
Reference in New Issue
Block a user