This commit is contained in:
+63
-13
@@ -70,24 +70,55 @@ export async function checkStockStatus(url: string): Promise<ScrapeResult> {
|
|||||||
|
|
||||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 45000 });
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: 45000 });
|
||||||
|
|
||||||
// Wait for React hydration
|
// Wait for React hydration, then actively wait for a <button> to appear
|
||||||
|
// (hydration on store.ui.com sometimes takes 4-6s in the Unraid container).
|
||||||
await new Promise(r => setTimeout(r, 2500));
|
await new Promise(r => setTimeout(r, 2500));
|
||||||
|
try {
|
||||||
|
await page.waitForFunction(
|
||||||
|
() => document.querySelectorAll('button').length > 0,
|
||||||
|
{ timeout: 8000 }
|
||||||
|
);
|
||||||
|
} catch {
|
||||||
|
// Fall through — the _debug payload below will record buttonCount: 0.
|
||||||
|
}
|
||||||
|
|
||||||
const result = await page.evaluate(() => {
|
const result = await page.evaluate(() => {
|
||||||
let status: 'in_stock' | 'sold_out' | 'unknown' = 'unknown';
|
let status: 'in_stock' | 'sold_out' | 'unknown' = 'unknown';
|
||||||
|
|
||||||
// In stock: primary button carries label="Add to Cart" as an attribute.
|
// Normalize text: collapse whitespace (incl. NBSP), lowercase. Necessary
|
||||||
// Why: styled-components class hashes rebuild every deploy, and "Add to Cart"
|
// because the page text may include NBSPs, leading icons that contribute
|
||||||
// text alone can appear in related-product cards on the page.
|
// whitespace to textContent, or casing changes between deploys.
|
||||||
if (document.querySelector('button[label="Add to Cart"]')) {
|
const norm = (s: string | null | undefined) =>
|
||||||
|
(s ?? '').replace(/ /g, ' ').replace(/\s+/g, ' ').trim().toLowerCase();
|
||||||
|
|
||||||
|
const buttons = Array.from(document.querySelectorAll('button'));
|
||||||
|
|
||||||
|
// In stock detection — try most specific signal first, fall back to text.
|
||||||
|
// 1. button with label="Add to Cart" attribute (styled-components prop pass-through)
|
||||||
|
// 2. button whose normalized text contains "add to cart"
|
||||||
|
// 3. any span text exactly "Add to Cart" inside a button (legacy DOM fallback)
|
||||||
|
const hasLabelAttr = buttons.some(
|
||||||
|
b => norm(b.getAttribute('label')) === 'add to cart'
|
||||||
|
);
|
||||||
|
const hasCartText = buttons.some(b => norm(b.textContent).includes('add to cart'));
|
||||||
|
|
||||||
|
if (hasLabelAttr || hasCartText) {
|
||||||
status = 'in_stock';
|
status = 'in_stock';
|
||||||
} else {
|
} else {
|
||||||
const soldOutPhrases = ['Notify me when available', 'Sold Out', 'Out of Stock'];
|
// Sold-out / unavailable phrases — substring match on normalized button text.
|
||||||
const buttons = document.querySelectorAll('button');
|
const soldOutPhrases = [
|
||||||
for (const btn of buttons) {
|
'notify me when available',
|
||||||
const text = btn.textContent?.trim();
|
'notify me',
|
||||||
if (text && soldOutPhrases.includes(text)) { status = 'sold_out'; break; }
|
'sold out',
|
||||||
}
|
'out of stock',
|
||||||
|
'currently unavailable',
|
||||||
|
'coming soon',
|
||||||
|
];
|
||||||
|
const hit = buttons.some(b => {
|
||||||
|
const t = norm(b.textContent);
|
||||||
|
return t && soldOutPhrases.some(p => t.includes(p));
|
||||||
|
});
|
||||||
|
if (hit) status = 'sold_out';
|
||||||
}
|
}
|
||||||
|
|
||||||
// Product name: og:title is most reliable for single-product pages
|
// Product name: og:title is most reliable for single-product pages
|
||||||
@@ -108,10 +139,29 @@ export async function checkStockStatus(url: string): Promise<ScrapeResult> {
|
|||||||
thumbnail = ogImage.content;
|
thumbnail = ogImage.content;
|
||||||
}
|
}
|
||||||
|
|
||||||
return { status, name, thumbnail };
|
// Debug payload — populated only when we can't determine status, so we
|
||||||
|
// can see what the page actually looked like to the scraper.
|
||||||
|
let _debug: Record<string, unknown> | undefined;
|
||||||
|
if (status === 'unknown') {
|
||||||
|
_debug = {
|
||||||
|
title: document.title,
|
||||||
|
url: location.href,
|
||||||
|
bodyLen: document.body?.innerHTML.length ?? 0,
|
||||||
|
buttonCount: buttons.length,
|
||||||
|
buttonLabels: buttons.map(b => b.getAttribute('label')).filter(Boolean).slice(0, 20),
|
||||||
|
buttonTexts: buttons.map(b => norm(b.textContent)).filter(Boolean).slice(0, 30),
|
||||||
|
h1: document.querySelector('h1')?.textContent?.trim().slice(0, 120),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return { status, name, thumbnail, _debug };
|
||||||
});
|
});
|
||||||
|
|
||||||
return result as ScrapeResult;
|
if ((result as { _debug?: unknown })._debug) {
|
||||||
|
console.log(`[Scraper] UNKNOWN debug for ${url}:`, JSON.stringify((result as { _debug: unknown })._debug, null, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
return { status: result.status, name: result.name, thumbnail: result.thumbnail } as ScrapeResult;
|
||||||
} finally {
|
} finally {
|
||||||
await browser.close();
|
await browser.close();
|
||||||
scrapeSemaphore.release();
|
scrapeSemaphore.release();
|
||||||
|
|||||||
Reference in New Issue
Block a user