const fs = require("fs"); const { chromium } = require("playwright"); const headed = process.argv.includes("--headed"); function getArgValue(name, defaultValue) { const prefix = `--${name}=`; const found = process.argv.find(x => x.startsWith(prefix)); return found ? found.slice(prefix.length) : defaultValue; } const maxScrollsPerColor = parseInt(getArgValue("max-scrolls-per-color", "180"), 10); const stopAfterNoNewScrolls = parseInt(getArgValue("stop-after-no-new-scrolls", "10"), 10); const baseUrl = "https://www.prismaticpowders.com/shop/powder-coating-colors"; const outputFile = "product-urls.txt"; const logFile = "color-discovery-log.json"; // Update this list if you find more color params in the site HTML. const colorParams = [ "pris_black", "pris_blue", "pris_bronze", "pris_brown", "pris_clear", "pris_copper", "pris_gold", "pris_gray", "pris_green", "pris_orange", "pris_pink", "pris_purple", "pris_red", "pris_silver", "pris_tan", "pris_white", "pris_yellow" ]; function cleanUrl(url) { return (url || "").split("?")[0].split("#")[0].trim(); } function isProductUrl(url) { return /\/shop\/powder-coating-colors\/[A-Z0-9-]+\//i.test(url || ""); } function readExistingUrls() { if (!fs.existsSync(outputFile)) return []; return fs.readFileSync(outputFile, "utf8") .split(/\r?\n/) .map(cleanUrl) .filter(Boolean); } function writeUrls(urls) { const sorted = [...urls].sort(); fs.writeFileSync(outputFile, sorted.join("\r\n") + "\r\n", "utf8"); } function readLog() { if (!fs.existsSync(logFile)) { return { completed_colors: {}, runs: [] }; } try { return JSON.parse(fs.readFileSync(logFile, "utf8")); } catch { return { completed_colors: {}, runs: [] }; } } function writeLog(log) { fs.writeFileSync(logFile, JSON.stringify(log, null, 2), "utf8"); } async function collectProductLinks(page) { const links = await page.locator("a").evaluateAll(anchors => anchors .map(a => a.href) .filter(Boolean) .filter(h => /\/shop\/powder-coating-colors\/[A-Z0-9-]+\//i.test(h)) ); return links.map(cleanUrl).filter(Boolean); } async function scrollAndCollect(page, urls, label) { let noNewScrolls = 0; let totalAddedForThisColor = 0; for (let i = 0; i < maxScrollsPerColor; i++) { const before = urls.size; for (const link of await collectProductLinks(page)) { urls.add(link); } const after = urls.size; const added = after - before; totalAddedForThisColor += added; if (added === 0) { noNewScrolls++; } else { noNewScrolls = 0; } writeUrls(urls); console.log(`[${label}] Scroll ${i + 1}/${maxScrollsPerColor}: +${added}, total ${after}, no-new ${noNewScrolls}`); if (noNewScrolls >= stopAfterNoNewScrolls) { break; } await page.mouse.wheel(0, 2500); await page.waitForTimeout(1500); } return totalAddedForThisColor; } (async () => { const existingUrls = readExistingUrls(); const urls = new Set(existingUrls); const log = readLog(); console.log(`Existing URLs in ${outputFile}: ${existingUrls.length}`); const browser = await chromium.launch({ headless: !headed }); const context = await browser.newContext({ userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", viewport: { width: 1365, height: 900 }, locale: "en-US", timezoneId: "America/New_York" }); const page = await context.newPage(); const runRecord = { started_at: new Date().toISOString(), existing_at_start: existingUrls.length, colors_attempted: [] }; for (const color of colorParams) { if (log.completed_colors[color]) { console.log(`Skipping completed color: ${color}`); continue; } const url = `${baseUrl}?color=${encodeURIComponent(color)}`; console.log(""); console.log(`Opening color filter: ${color}`); console.log(url); try { const response = await page.goto(url, { waitUntil: "domcontentloaded", timeout: 60000 }); const status = response ? response.status() : "unknown"; console.log(`HTTP status: ${status}`); await page.waitForTimeout(5000); const before = urls.size; const addedDuringScroll = await scrollAndCollect(page, urls, color); const after = urls.size; const netAdded = after - before; log.completed_colors[color] = { url, http_status: status, added: netAdded, added_during_scroll: addedDuringScroll, total_after: after, completed_at: new Date().toISOString() }; runRecord.colors_attempted.push({ color, url, http_status: status, added: netAdded, total_after: after }); writeLog(log); writeUrls(urls); console.log(`Color complete: ${color}; added ${netAdded}; total ${after}`); // Polite pause between filters. await page.waitForTimeout(3000); } catch (err) { console.log(`Color failed: ${color}; ${err.message}`); runRecord.colors_attempted.push({ color, url, added: 0, error: err.message }); writeLog(log); } } runRecord.finished_at = new Date().toISOString(); runRecord.final_total = urls.size; runRecord.new_this_run = urls.size - existingUrls.length; log.runs.push(runRecord); writeLog(log); writeUrls(urls); console.log(""); console.log("Color-param discovery complete."); console.log(`Existing at start: ${existingUrls.length}`); console.log(`Final total: ${urls.size}`); console.log(`New this run: ${urls.size - existingUrls.length}`); console.log(`Output: ${outputFile}`); console.log(`Log: ${logFile}`); await browser.close(); })();