238 lines
5.8 KiB
JavaScript
238 lines
5.8 KiB
JavaScript
const fs = require("fs");
|
|
const { chromium } = require("playwright");
|
|
|
|
const headed = process.argv.includes("--headed");
|
|
|
|
function getArgValue(name, defaultValue) {
|
|
const prefix = `--${name}=`;
|
|
const found = process.argv.find(x => x.startsWith(prefix));
|
|
return found ? found.slice(prefix.length) : defaultValue;
|
|
}
|
|
|
|
const maxScrollsPerColor = parseInt(getArgValue("max-scrolls-per-color", "180"), 10);
|
|
const stopAfterNoNewScrolls = parseInt(getArgValue("stop-after-no-new-scrolls", "10"), 10);
|
|
|
|
const baseUrl = "https://www.prismaticpowders.com/shop/powder-coating-colors";
|
|
const outputFile = "product-urls.txt";
|
|
const logFile = "color-discovery-log.json";
|
|
|
|
// Update this list if you find more color params in the site HTML.
|
|
const colorParams = [
|
|
"pris_black",
|
|
"pris_blue",
|
|
"pris_bronze",
|
|
"pris_brown",
|
|
"pris_clear",
|
|
"pris_copper",
|
|
"pris_gold",
|
|
"pris_gray",
|
|
"pris_green",
|
|
"pris_orange",
|
|
"pris_pink",
|
|
"pris_purple",
|
|
"pris_red",
|
|
"pris_silver",
|
|
"pris_tan",
|
|
"pris_white",
|
|
"pris_yellow"
|
|
];
|
|
|
|
function cleanUrl(url) {
|
|
return (url || "").split("?")[0].split("#")[0].trim();
|
|
}
|
|
|
|
function isProductUrl(url) {
|
|
return /\/shop\/powder-coating-colors\/[A-Z0-9-]+\//i.test(url || "");
|
|
}
|
|
|
|
function readExistingUrls() {
|
|
if (!fs.existsSync(outputFile)) return [];
|
|
|
|
return fs.readFileSync(outputFile, "utf8")
|
|
.split(/\r?\n/)
|
|
.map(cleanUrl)
|
|
.filter(Boolean);
|
|
}
|
|
|
|
function writeUrls(urls) {
|
|
const sorted = [...urls].sort();
|
|
fs.writeFileSync(outputFile, sorted.join("\r\n") + "\r\n", "utf8");
|
|
}
|
|
|
|
function readLog() {
|
|
if (!fs.existsSync(logFile)) {
|
|
return {
|
|
completed_colors: {},
|
|
runs: []
|
|
};
|
|
}
|
|
|
|
try {
|
|
return JSON.parse(fs.readFileSync(logFile, "utf8"));
|
|
} catch {
|
|
return {
|
|
completed_colors: {},
|
|
runs: []
|
|
};
|
|
}
|
|
}
|
|
|
|
function writeLog(log) {
|
|
fs.writeFileSync(logFile, JSON.stringify(log, null, 2), "utf8");
|
|
}
|
|
|
|
async function collectProductLinks(page) {
|
|
const links = await page.locator("a").evaluateAll(anchors =>
|
|
anchors
|
|
.map(a => a.href)
|
|
.filter(Boolean)
|
|
.filter(h => /\/shop\/powder-coating-colors\/[A-Z0-9-]+\//i.test(h))
|
|
);
|
|
|
|
return links.map(cleanUrl).filter(Boolean);
|
|
}
|
|
|
|
async function scrollAndCollect(page, urls, label) {
|
|
let noNewScrolls = 0;
|
|
let totalAddedForThisColor = 0;
|
|
|
|
for (let i = 0; i < maxScrollsPerColor; i++) {
|
|
const before = urls.size;
|
|
|
|
for (const link of await collectProductLinks(page)) {
|
|
urls.add(link);
|
|
}
|
|
|
|
const after = urls.size;
|
|
const added = after - before;
|
|
totalAddedForThisColor += added;
|
|
|
|
if (added === 0) {
|
|
noNewScrolls++;
|
|
} else {
|
|
noNewScrolls = 0;
|
|
}
|
|
|
|
writeUrls(urls);
|
|
|
|
console.log(`[${label}] Scroll ${i + 1}/${maxScrollsPerColor}: +${added}, total ${after}, no-new ${noNewScrolls}`);
|
|
|
|
if (noNewScrolls >= stopAfterNoNewScrolls) {
|
|
break;
|
|
}
|
|
|
|
await page.mouse.wheel(0, 2500);
|
|
await page.waitForTimeout(1500);
|
|
}
|
|
|
|
return totalAddedForThisColor;
|
|
}
|
|
|
|
(async () => {
|
|
const existingUrls = readExistingUrls();
|
|
const urls = new Set(existingUrls);
|
|
const log = readLog();
|
|
|
|
console.log(`Existing URLs in ${outputFile}: ${existingUrls.length}`);
|
|
|
|
const browser = await chromium.launch({ headless: !headed });
|
|
|
|
const context = await browser.newContext({
|
|
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
viewport: { width: 1365, height: 900 },
|
|
locale: "en-US",
|
|
timezoneId: "America/New_York"
|
|
});
|
|
|
|
const page = await context.newPage();
|
|
|
|
const runRecord = {
|
|
started_at: new Date().toISOString(),
|
|
existing_at_start: existingUrls.length,
|
|
colors_attempted: []
|
|
};
|
|
|
|
for (const color of colorParams) {
|
|
if (log.completed_colors[color]) {
|
|
console.log(`Skipping completed color: ${color}`);
|
|
continue;
|
|
}
|
|
|
|
const url = `${baseUrl}?color=${encodeURIComponent(color)}`;
|
|
console.log("");
|
|
console.log(`Opening color filter: ${color}`);
|
|
console.log(url);
|
|
|
|
try {
|
|
const response = await page.goto(url, {
|
|
waitUntil: "domcontentloaded",
|
|
timeout: 60000
|
|
});
|
|
|
|
const status = response ? response.status() : "unknown";
|
|
console.log(`HTTP status: ${status}`);
|
|
|
|
await page.waitForTimeout(5000);
|
|
|
|
const before = urls.size;
|
|
const addedDuringScroll = await scrollAndCollect(page, urls, color);
|
|
const after = urls.size;
|
|
const netAdded = after - before;
|
|
|
|
log.completed_colors[color] = {
|
|
url,
|
|
http_status: status,
|
|
added: netAdded,
|
|
added_during_scroll: addedDuringScroll,
|
|
total_after: after,
|
|
completed_at: new Date().toISOString()
|
|
};
|
|
|
|
runRecord.colors_attempted.push({
|
|
color,
|
|
url,
|
|
http_status: status,
|
|
added: netAdded,
|
|
total_after: after
|
|
});
|
|
|
|
writeLog(log);
|
|
writeUrls(urls);
|
|
|
|
console.log(`Color complete: ${color}; added ${netAdded}; total ${after}`);
|
|
|
|
// Polite pause between filters.
|
|
await page.waitForTimeout(3000);
|
|
} catch (err) {
|
|
console.log(`Color failed: ${color}; ${err.message}`);
|
|
|
|
runRecord.colors_attempted.push({
|
|
color,
|
|
url,
|
|
added: 0,
|
|
error: err.message
|
|
});
|
|
|
|
writeLog(log);
|
|
}
|
|
}
|
|
|
|
runRecord.finished_at = new Date().toISOString();
|
|
runRecord.final_total = urls.size;
|
|
runRecord.new_this_run = urls.size - existingUrls.length;
|
|
|
|
log.runs.push(runRecord);
|
|
writeLog(log);
|
|
writeUrls(urls);
|
|
|
|
console.log("");
|
|
console.log("Color-param discovery complete.");
|
|
console.log(`Existing at start: ${existingUrls.length}`);
|
|
console.log(`Final total: ${urls.size}`);
|
|
console.log(`New this run: ${urls.size - existingUrls.length}`);
|
|
console.log(`Output: ${outputFile}`);
|
|
console.log(`Log: ${logFile}`);
|
|
|
|
await browser.close();
|
|
})();
|