import { chromium, type Page } from "playwright"; export interface AmazonData { price: number | null; rating: number | null; reviewCount: number | null; url: string; image: string | null; } async function extractPrice(page: Page): Promise { const selectors = [ ".a-price .a-offscreen", "#priceblock_ourprice", "#corePrice_feature_div .a-offscreen", ".apexPriceToPay .a-offscreen", ]; for (const sel of selectors) { const el = await page.$(sel); if (el) { const text = await el.textContent(); if (text) { const cleaned = text.replace(/[^0-9.]/g, ""); const parsed = parseFloat(cleaned); if (!isNaN(parsed) && parsed > 0) return parsed; } } } return null; } async function extractRating(page: Page): Promise { const el = await page.$( '#averageCustomerReviews .a-icon-alt, [data-hook="rating-out-of-text"]', ); if (el) { const text = await el.textContent(); if (text) { const match = text.match(/([\d.]+)\s*out of/); if (match) return parseFloat(match[1]); } } return null; } async function extractReviewCount(page: Page): Promise { const el = await page.$("#acrCustomerReviewText"); if (el) { const text = await el.textContent(); if (text) { const cleaned = text.replace(/[^0-9]/g, ""); const parsed = parseInt(cleaned, 10); if (!isNaN(parsed)) return parsed; } } return null; } async function extractImage(page: Page): Promise { const el = await page.$("#imgTagWrapperId img, #landingImage"); if (el) { const src = await el.getAttribute("src"); if (src && src.startsWith("http")) return src; } return null; } export async function scrapeAmazonProducts( asins: { name: string; asin: string }[], ): Promise> { if (asins.length === 0) return new Map(); const browser = await chromium.launch({ headless: true }); const context = await browser.newContext({ userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", viewport: { width: 1280, height: 800 }, }); const results = new Map(); for (const { name, asin } of asins) { const page = await context.newPage(); const url = `https://www.amazon.com/dp/${asin}`; try { await page.goto(url, { waitUntil: "domcontentloaded", timeout: 15000, }); await page.waitForTimeout(1000); const price = await extractPrice(page); const rating = await extractRating(page); const reviewCount = await extractReviewCount(page); const image = await extractImage(page); results.set(name, { price, rating, reviewCount, url, image }); console.log( `[amazon] ${name}: $${price ?? "N/A"}, ${rating ?? "N/A"}★, ${reviewCount ?? "N/A"} reviews`, ); } catch (err) { console.warn(`[amazon] Failed for ${name} (${asin}): ${err}`); results.set(name, { price: null, rating: null, reviewCount: null, url, image: null, }); } finally { await page.close(); } const delay = 2000 + Math.random() * 3000; await new Promise((r) => setTimeout(r, delay)); } await browser.close(); return results; }