Commit misc scripts, feature specs, SQL deploy scripts, and settings updates

This commit is contained in:
2026-05-04 22:14:25 -04:00
parent ee3158b7d5
commit 4c58d57928
32 changed files with 171219 additions and 15 deletions
+30 -1
View File
@@ -142,7 +142,36 @@
"PowerShell(dotnet build *)", "PowerShell(dotnet build *)",
"PowerShell(New-Item *)", "PowerShell(New-Item *)",
"PowerShell(& \"Y:\\\\PCC\\\\PowderCoatingApp\\\\scripts\\\\generate-migration-script.ps1\")", "PowerShell(& \"Y:\\\\PCC\\\\PowderCoatingApp\\\\scripts\\\\generate-migration-script.ps1\")",
"PowerShell(if \\(Test-Path \"Y:\\\\pcc\\\\deployment\\\\migrations.sql\"\\) { $f = Get-Item \"Y:\\\\pcc\\\\deployment\\\\migrations.sql\"; Write-Host \"File exists: $\\($f.Length\\) bytes\" } else { Write-Host \"File not created\" })" "PowerShell(if \\(Test-Path \"Y:\\\\pcc\\\\deployment\\\\migrations.sql\"\\) { $f = Get-Item \"Y:\\\\pcc\\\\deployment\\\\migrations.sql\"; Write-Host \"File exists: $\\($f.Length\\) bytes\" } else { Write-Host \"File not created\" })",
"Bash(git add *)",
"Bash(git commit -m ' *)",
"Bash(git push *)",
"Bash(git commit *)",
"Bash(git checkout *)",
"Bash(git merge *)",
"Bash(dotnet package *)",
"Bash(dotnet test *)",
"Bash(git rm *)",
"Bash(git stash *)",
"Bash(dotnet ef *)",
"Bash(sqlcmd -S \".\\\\SQLEXPRESS\" -d PowderCoatingDb -Q \"SELECT Id, DisplayName, IsCoating, IsActive FROM InventoryCategoryLookups ORDER BY DisplayOrder\" -W)",
"Skill(schedule)",
"Bash(git -C \"//192.168.0.37/SCPSoftware/tmp/PowderCoatingApp-dev-perf\" log --oneline -10)",
"Bash(git -C \"//192.168.0.37/SCPSoftware/tmp/PowderCoatingApp-dev-perf\" status --short)",
"Bash(git *)",
"Bash(get-childitem -Recurse -Filter \"QuotesController.cs\")",
"Bash(Select-Object -ExpandProperty FullName)",
"Bash(dotnet user-secrets *)",
"Bash(Get-ChildItem -Path \"Y:\\\\PCC\\\\PowderCoatingApp\" -Directory)",
"Bash(Select-Object Name)",
"Bash(Get-Content *)",
"Bash(python -c \"import json; data=json.load\\(open\\('prismatic_powders.json','r',encoding='utf-8'\\)\\); print\\(f'Total records: {len\\(data\\)}'\\); print\\('First record:'\\); print\\(json.dumps\\(data[0], indent=2\\)\\)\")",
"Bash(python -c \"import json; data=json.load\\(open\\('prismatic_powders.json','r',encoding='utf-8'\\)\\); keys=list\\(data.keys\\(\\)\\); print\\('Top-level keys:', keys[:10]\\); first=data[keys[0]]; print\\('First record key:', keys[0]\\); print\\(json.dumps\\(first, indent=2\\)\\)\")",
"PowerShell(Get-ChildItem *)",
"PowerShell(Select-String *)",
"Bash(Select-Object -First 20)",
"PowerShell(node -e \"require\\('fs'\\).existsSync\\(require\\('path'\\).join\\(process.cwd\\(\\), 'node_modules', 'sharp'\\)\\) ? console.log\\('sharp ok'\\) : console.log\\('no sharp'\\)\")",
"WebFetch(domain:www.powdercoatinglogix.com)"
] ]
} }
} }
+12 -10
View File
@@ -1,15 +1,7 @@
Shop Management App TO DO List Shop Management App TO DO List
============================== ==============================
-Look into possibly having AI scan a product catalog and suggest prices for items. -Google review request email after a job
-Add images to product catalog items for easily identification of parts
-AI Company Lookup (similar to inventory lookup)
-Add ability to save a quoted item to the product catalog either from an AI Photo Quote or from the calculated item
-Check my ChatGPT chat about surface area for a few solid ideas for the system -Check my ChatGPT chat about surface area for a few solid ideas for the system
-Add SMS capabilities
-Fix up approve/decline messages between customer and user on quote approval feature -Fix up approve/decline messages between customer and user on quote approval feature
Done and need testing Done and need testing
@@ -178,7 +170,17 @@ AI Agent item where we upload a picture and it will calculate the approximate sq
-Make sure we're tracking logins. I see a user logged on, but the company health page states they have never logged in. -Make sure we're tracking logins. I see a user logged on, but the company health page states they have never logged in.
-Allow printing blank work orders (model after the SCP Powder Coating blank work order) -Allow printing blank work orders (model after the SCP Powder Coating blank work order)
-IDEA: Print powders to use on work order with their QR code so they can be scanned right from there and usage recorded. -IDEA: Print powders to use on work order with their QR code so they can be scanned right from there and usage recorded.
-Add ability to save a quoted item to the product catalog either from an AI Photo Quote or from the calculated item
-Add images to product catalog items for easily identification of parts
-Look into possibly having AI scan a product catalog and suggest prices for items.
-Add Oven and Add Blasting Setup don't work in Setup Wizard
-When scanning inventory QR Code, there is no cancel button
-Bug: When scanning Inventory QR Code, if not logged in...it takes you to the dashboard after login, not our inventory scanning screen
-Add SMS capabilities
-Lookup not working 100% correct. If I type columbia as the manufacturer and a color name....it's finding blackmamba from prismatic incorrectly.
-Lookup Modal not showing ALL matches. Maybe make scrollable
-Pickup cure information from TDS Sheet if not found by AI Search
-ON AI Photo Quote page, when the AI info comes back we should scroll the modal window down so it's visible. It's not clear that new info has been added to the modal for all customers
Ideas Removed Ideas Removed
======================= =======================
+12 -4
View File
@@ -1,9 +1,11 @@
Shop Management App TO DO List Shop Management App TO DO List
============================== ==============================
-Add ability to save a quoted item to the product catalog either from an AI Photo Quote or from the calculated item -Lookup not working 100% correct. If I type columbia as the manufacturer and a color name....it's finding blackmamba from prismatic incorrectly.
-Lookup Modal not showing ALL matches. Maybe make scrollable
-Pickup cure information from TDS Sheet if not found by AI Search
-ON AI Photo Quote page, when the AI info comes back we should scroll the modal window down so it's visible. It's not clear that new info has been added to the modal for all customers
-Google review request email after a job
-Check my ChatGPT chat about surface area for a few solid ideas for the system -Check my ChatGPT chat about surface area for a few solid ideas for the system
-Add SMS capabilities
-Fix up approve/decline messages between customer and user on quote approval feature -Fix up approve/decline messages between customer and user on quote approval feature
Done and need testing Done and need testing
@@ -172,7 +174,13 @@ AI Agent item where we upload a picture and it will calculate the approximate sq
-Make sure we're tracking logins. I see a user logged on, but the company health page states they have never logged in. -Make sure we're tracking logins. I see a user logged on, but the company health page states they have never logged in.
-Allow printing blank work orders (model after the SCP Powder Coating blank work order) -Allow printing blank work orders (model after the SCP Powder Coating blank work order)
-IDEA: Print powders to use on work order with their QR code so they can be scanned right from there and usage recorded. -IDEA: Print powders to use on work order with their QR code so they can be scanned right from there and usage recorded.
-Add ability to save a quoted item to the product catalog either from an AI Photo Quote or from the calculated item
-Add images to product catalog items for easily identification of parts
-Look into possibly having AI scan a product catalog and suggest prices for items.
-Add Oven and Add Blasting Setup don't work in Setup Wizard
-When scanning inventory QR Code, there is no cancel button
-Bug: When scanning Inventory QR Code, if not logged in...it takes you to the dashboard after login, not our inventory scanning screen
-Add SMS capabilities
Ideas Removed Ideas Removed
======================= =======================
+258
View File
@@ -0,0 +1,258 @@
# Guided Activation Flow Feature Spec
## Overview
This feature introduces a **post-setup guided activation flow** for new companies.
After completing the setup wizard, users should be guided through their **first real workflow** so they understand how to use the system immediately.
This is NOT a tooltip tour.
This is a **guided outcome flow using real system actions** (quotes, jobs, invoices).
---
## Problem
Current behavior:
- Users complete setup wizard
- Land on dashboard
- Do not create quotes, jobs, or invoices
- Drop off
Goal:
- Ensure users complete at least ONE real workflow
- Create an "aha moment" within first session
---
## Business Workflows
### 1. Quote-First Workflow
- Create Quote
- Send to customer
- Convert Quote → Job
- Process Job
- Create Invoice
- Customer Pays
### 2. Job-First Workflow (Walk-in)
- Create Job directly
- Process Job
- Create Invoice
- Customer Pays
---
## Feature Behavior
### Trigger Condition
IF:
- setup wizard is completed
- AND firstWorkflowCompleted == false
THEN:
→ redirect user to guided activation flow
---
## Step 1: Workflow Selection
Display full-screen page:
### Title:
"Your shop is set up. Lets run your first workflow."
### Subtitle:
"Choose how jobs usually start for your shop and well guide you through it."
### Question:
"How do jobs usually start for your shop?"
### Options:
#### Option A:
Title: "I send a quote first"
Description: "Create a quote, convert it to a job, then invoice when work is complete."
#### Option B:
Title: "I start with a job"
Description: "For walk-ins or approved work where you start immediately."
---
### On Selection:
Save:
- onboardingPath = "quote_first" | "job_first"
Then continue into guided flow
---
## Step 2: Guided Flow
### Path A — Quote First
#### Step A1: Create Quote
- Use existing quote creation logic
- Pre-fill fields:
- Customer: "Sample Customer"
- Item: "Wheel Set"
- Quantity: 4
- Notes: "Sample onboarding quote"
- Allow editing before submit
#### Step A2: Show Quote Created
Message:
"This is the quote you would send to your customer."
CTA:
"Convert to Job"
#### Step A3: Convert Quote → Job
- Use existing conversion logic
#### Step A4: Show Job
Message:
"This job is now tracked in your workflow."
CTA:
"Create Invoice" (if supported)
#### Step A5: Create Invoice (optional)
- Use existing invoice logic
#### Completion:
Set:
- firstWorkflowCompleted = true
---
### Path B — Job First
#### Step B1: Create Job
- Use existing job creation logic
- Pre-fill:
- Customer: "Walk-in Customer"
- Item: "Wheel Set"
- Quantity: 4
- Notes: "Sample onboarding job"
#### Step B2: Show Job
Message:
"This job is now in your workflow."
CTA:
"Create Invoice" (optional)
#### Step B3: Create Invoice (optional)
#### Completion:
Set:
- firstWorkflowCompleted = true
---
## Skipping
Provide "Skip for now" option.
If skipped:
- DO NOT set firstWorkflowCompleted
- Redirect to dashboard
- Continue showing activation banner
---
## Dashboard Behavior
If:
- setup complete
- AND firstWorkflowCompleted == false
Show persistent banner:
Title:
"Create your first job or quote"
Text:
"Run a quick 2-minute workflow to see how the system works."
CTA:
"Start first workflow"
---
## Data Model Changes
Add to Company or User:
- onboardingPath: string | null
- firstWorkflowCompleted: boolean
Optional:
- firstQuoteCreatedAt: datetime
- firstJobCreatedAt: datetime
- firstInvoiceCreatedAt: datetime
---
## Events / Tracking (if system exists)
Track:
- onboarding_path_selected
- first_quote_created
- first_job_created
- first_invoice_created
- first_workflow_completed
- first_workflow_skipped
---
## Implementation Constraints
- MUST reuse existing quote/job/invoice logic
- DO NOT duplicate business logic
- DO NOT create separate fake systems
- Use existing forms and APIs where possible
- Keep UI minimal and fast
- Pre-fill as much as possible
---
## UX Requirements
- No tooltip tours
- Linear guided flow only
- One action at a time
- Minimize user effort
- Show immediate visual feedback
---
## Developer Instructions
Before coding:
1. Inspect setup wizard completion logic
2. Identify routing after setup
3. Identify quote/job/invoice creation flows
4. Identify data model structure
Then:
5. Propose implementation plan
6. Wait for approval
7. Implement incrementally
8. Summarize changes
9. Provide manual QA steps
---
## Success Criteria
- % of users creating first job increases significantly
- Users complete at least one workflow during onboarding
- Reduced drop-off after setup wizard
Target:
≥ 30% of new users create at least one job or quote
+173
View File
@@ -0,0 +1,173 @@
Add a dashboard progress widget for post-onboarding activation.
Context:
This is a powder coating shop management app. We recently shortened the setup wizard and added a guided activation flow. Some setup items are intentionally deferred so users can evaluate the system quickly before fully configuring everything.
Goal:
Create a dashboard widget that helps users “get the most out of their shop” without making it feel like unfinished homework.
Do NOT call it “Complete setup.”
Recommended title:
“Get the most out of your shop”
Purpose:
Show progress based on real usage/configuration milestones and give users clear next actions.
Requirements:
1. Inspect existing dashboard structure
* Locate the dashboard controller/view/components.
* Reuse existing card, alert, progress bar, and button styles.
* Follow existing UI conventions.
2. Widget visibility
Show the widget for companies that:
* Have completed the setup wizard
* Are not yet meaningfully activated OR still have recommended setup tasks incomplete
It is okay to keep showing it until all tasks are complete.
3. Progress calculation
Create a checklist of 56 items max.
Suggested items:
A. Create your first quote or job
Complete when:
* company has at least one quote OR at least one job
CTA:
* “Create quote/job” or “Start workflow”
B. Move a job through your workflow
Complete when:
* at least one job has had a status/stage change
* If there is no existing way to detect this, use the closest available activity/history/status timestamp
CTA:
* “Open daily board”
C. Create your first invoice
Complete when:
* company has at least one invoice
CTA:
* “Create invoice”
D. Invite your team
Complete when:
* company has more than one active user/team member
CTA:
* “Invite team”
E. Customize pricing
Complete when:
* company has configured pricing tiers/custom pricing settings beyond defaults
* If this is hard to detect reliably, make this optional or use a simple existing flag/count
CTA:
* “Customize pricing”
F. Review payment terms
Complete when:
* company has customized payment terms from default
* If this is hard to detect reliably, make this optional or use a simple existing flag/value comparison
CTA:
* “Review terms”
4. UX copy
Use friendly, value-focused language.
Widget title:
“Get the most out of your shop”
Subtitle:
“Complete a few quick steps to unlock the full workflow.”
Progress text:
“X of Y complete”
Avoid wording like:
* “Incomplete setup”
* “Missing configuration”
* “Required steps”
5. Visual design
* Use a card-style widget near the top of the dashboard.
* Include a progress bar.
* Show checklist rows with completed and incomplete states.
* Completed items should feel rewarding.
* Incomplete items should have one clear CTA.
* Keep it compact and non-annoying.
6. Behavior
* Each checklist item should link to the most relevant existing page or action.
* Do not build new duplicate workflows.
* Reuse existing guided activation route for “Create your first quote or job” if available.
* If a task cannot be detected reliably yet, implement it conservatively or leave a TODO comment explaining why.
7. Data/query logic
* Prefer calculating progress server-side in the dashboard view model.
* Avoid expensive queries.
* Reuse existing repositories/services if available.
* Keep the logic readable and testable.
8. Dismissal behavior
Add optional dismissal if easy:
* Let user collapse or dismiss the widget.
* If dismissed, do not permanently hide it forever unless all tasks are complete.
* Prefer “collapse” over full dismissal.
* Store dismissal/collapse state only if there is already a simple place to store dashboard preferences.
9. Important product guidance
This widget should guide users from evaluation into real adoption.
The emotional framing should be:
“Youre already making progress — here are the next valuable things to try.”
Not:
“You failed to finish setup.”
10. Implementation style
Before coding:
* Inspect relevant dashboard, setup wizard, guided activation, company preference, quote, job, invoice, user/team, pricing, and payment term structures.
* Propose a concise implementation plan.
* Then implement incrementally.
After coding:
* Summarize changed files.
* Explain how progress is calculated.
* Provide manual QA steps.
Manual QA scenarios:
* Brand new company after setup wizard
* Company with first quote/job created
* Company with moved job/status change
* Company with invoice created
* Company with invited team member
* Company with all tasks complete
View File
File diff suppressed because it is too large Load Diff
+49
View File
@@ -0,0 +1,49 @@
BEGIN TRANSACTION;
GO
ALTER TABLE [CompanyPreferences] ADD [FirstInvoiceCreatedAt] datetime2 NULL;
GO
ALTER TABLE [CompanyPreferences] ADD [FirstJobCreatedAt] datetime2 NULL;
GO
ALTER TABLE [CompanyPreferences] ADD [FirstQuoteCreatedAt] datetime2 NULL;
GO
ALTER TABLE [CompanyPreferences] ADD [FirstWorkflowCompleted] bit NOT NULL DEFAULT CAST(0 AS bit);
GO
ALTER TABLE [CompanyPreferences] ADD [FirstWorkflowCompletedAt] datetime2 NULL;
GO
ALTER TABLE [CompanyPreferences] ADD [GuidedActivationDismissedAt] datetime2 NULL;
GO
ALTER TABLE [CompanyPreferences] ADD [OnboardingPath] nvarchar(max) NULL;
GO
UPDATE [PricingTiers] SET [CreatedAt] = '2026-04-28T16:40:22.3595055Z'
WHERE [Id] = 1;
SELECT @@ROWCOUNT;
GO
UPDATE [PricingTiers] SET [CreatedAt] = '2026-04-28T16:40:22.3595063Z'
WHERE [Id] = 2;
SELECT @@ROWCOUNT;
GO
UPDATE [PricingTiers] SET [CreatedAt] = '2026-04-28T16:40:22.3595065Z'
WHERE [Id] = 3;
SELECT @@ROWCOUNT;
GO
INSERT INTO [__EFMigrationsHistory] ([MigrationId], [ProductVersion])
VALUES (N'20260428164026_AddGuidedActivationFields', N'8.0.11');
GO
COMMIT;
GO
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,319 @@
# Discover-Prismatic-Product-Urls-By-ColorParam.ps1
#
# Discovers Prismatic Powders product URLs by visiting color filter URLs like:
# https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_red
#
# Outputs:
# .\product-urls.txt
# .\color-discovery-log.json
#
# First-time setup:
# Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass
# .\Discover-Prismatic-Product-Urls-By-ColorParam.ps1 -InstallPlaywright -Headed
#
# Normal run:
# .\Discover-Prismatic-Product-Urls-By-ColorParam.ps1
#
# Watch browser:
# .\Discover-Prismatic-Product-Urls-By-ColorParam.ps1 -Headed
param(
[switch]$InstallPlaywright,
[switch]$Headed,
[int]$MaxScrollsPerColor = 180,
[int]$StopAfterNoNewScrolls = 10
)
$ErrorActionPreference = "Stop"
function Ensure-NodeAvailable {
if (-not (Get-Command node -ErrorAction SilentlyContinue)) {
throw "Node.js is required. Install Node.js LTS from https://nodejs.org/"
}
if (-not (Get-Command npm -ErrorAction SilentlyContinue)) {
throw "npm is required. It usually comes with Node.js."
}
}
function Install-PlaywrightIfNeeded {
param([bool]$Requested)
Ensure-NodeAvailable
if ($Requested -or -not (Test-Path ".\node_modules\playwright")) {
Write-Host "Installing Playwright package locally..."
npm init -y | Out-Null
npm install playwright | Out-Null
Write-Host "Installing Playwright Chromium browser..."
npx playwright install chromium
}
}
function Write-NodeDiscoveryScript {
$js = @'
const fs = require("fs");
const { chromium } = require("playwright");
const headed = process.argv.includes("--headed");
function getArgValue(name, defaultValue) {
const prefix = `--${name}=`;
const found = process.argv.find(x => x.startsWith(prefix));
return found ? found.slice(prefix.length) : defaultValue;
}
const maxScrollsPerColor = parseInt(getArgValue("max-scrolls-per-color", "180"), 10);
const stopAfterNoNewScrolls = parseInt(getArgValue("stop-after-no-new-scrolls", "10"), 10);
const baseUrl = "https://www.prismaticpowders.com/shop/powder-coating-colors";
const outputFile = "product-urls.txt";
const logFile = "color-discovery-log.json";
// Update this list if you find more color params in the site HTML.
const colorParams = [
"pris_black",
"pris_blue",
"pris_bronze",
"pris_brown",
"pris_clear",
"pris_copper",
"pris_gold",
"pris_gray",
"pris_green",
"pris_orange",
"pris_pink",
"pris_purple",
"pris_red",
"pris_silver",
"pris_tan",
"pris_white",
"pris_yellow"
];
function cleanUrl(url) {
return (url || "").split("?")[0].split("#")[0].trim();
}
function isProductUrl(url) {
return /\/shop\/powder-coating-colors\/[A-Z0-9-]+\//i.test(url || "");
}
function readExistingUrls() {
if (!fs.existsSync(outputFile)) return [];
return fs.readFileSync(outputFile, "utf8")
.split(/\r?\n/)
.map(cleanUrl)
.filter(Boolean);
}
function writeUrls(urls) {
const sorted = [...urls].sort();
fs.writeFileSync(outputFile, sorted.join("\r\n") + "\r\n", "utf8");
}
function readLog() {
if (!fs.existsSync(logFile)) {
return {
completed_colors: {},
runs: []
};
}
try {
return JSON.parse(fs.readFileSync(logFile, "utf8"));
} catch {
return {
completed_colors: {},
runs: []
};
}
}
function writeLog(log) {
fs.writeFileSync(logFile, JSON.stringify(log, null, 2), "utf8");
}
async function collectProductLinks(page) {
const links = await page.locator("a").evaluateAll(anchors =>
anchors
.map(a => a.href)
.filter(Boolean)
.filter(h => /\/shop\/powder-coating-colors\/[A-Z0-9-]+\//i.test(h))
);
return links.map(cleanUrl).filter(Boolean);
}
async function scrollAndCollect(page, urls, label) {
let noNewScrolls = 0;
let totalAddedForThisColor = 0;
for (let i = 0; i < maxScrollsPerColor; i++) {
const before = urls.size;
for (const link of await collectProductLinks(page)) {
urls.add(link);
}
const after = urls.size;
const added = after - before;
totalAddedForThisColor += added;
if (added === 0) {
noNewScrolls++;
} else {
noNewScrolls = 0;
}
writeUrls(urls);
console.log(`[${label}] Scroll ${i + 1}/${maxScrollsPerColor}: +${added}, total ${after}, no-new ${noNewScrolls}`);
if (noNewScrolls >= stopAfterNoNewScrolls) {
break;
}
await page.mouse.wheel(0, 2500);
await page.waitForTimeout(1500);
}
return totalAddedForThisColor;
}
(async () => {
const existingUrls = readExistingUrls();
const urls = new Set(existingUrls);
const log = readLog();
console.log(`Existing URLs in ${outputFile}: ${existingUrls.length}`);
const browser = await chromium.launch({ headless: !headed });
const context = await browser.newContext({
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
viewport: { width: 1365, height: 900 },
locale: "en-US",
timezoneId: "America/New_York"
});
const page = await context.newPage();
const runRecord = {
started_at: new Date().toISOString(),
existing_at_start: existingUrls.length,
colors_attempted: []
};
for (const color of colorParams) {
if (log.completed_colors[color]) {
console.log(`Skipping completed color: ${color}`);
continue;
}
const url = `${baseUrl}?color=${encodeURIComponent(color)}`;
console.log("");
console.log(`Opening color filter: ${color}`);
console.log(url);
try {
const response = await page.goto(url, {
waitUntil: "domcontentloaded",
timeout: 60000
});
const status = response ? response.status() : "unknown";
console.log(`HTTP status: ${status}`);
await page.waitForTimeout(5000);
const before = urls.size;
const addedDuringScroll = await scrollAndCollect(page, urls, color);
const after = urls.size;
const netAdded = after - before;
log.completed_colors[color] = {
url,
http_status: status,
added: netAdded,
added_during_scroll: addedDuringScroll,
total_after: after,
completed_at: new Date().toISOString()
};
runRecord.colors_attempted.push({
color,
url,
http_status: status,
added: netAdded,
total_after: after
});
writeLog(log);
writeUrls(urls);
console.log(`Color complete: ${color}; added ${netAdded}; total ${after}`);
// Polite pause between filters.
await page.waitForTimeout(3000);
} catch (err) {
console.log(`Color failed: ${color}; ${err.message}`);
runRecord.colors_attempted.push({
color,
url,
added: 0,
error: err.message
});
writeLog(log);
}
}
runRecord.finished_at = new Date().toISOString();
runRecord.final_total = urls.size;
runRecord.new_this_run = urls.size - existingUrls.length;
log.runs.push(runRecord);
writeLog(log);
writeUrls(urls);
console.log("");
console.log("Color-param discovery complete.");
console.log(`Existing at start: ${existingUrls.length}`);
console.log(`Final total: ${urls.size}`);
console.log(`New this run: ${urls.size - existingUrls.length}`);
console.log(`Output: ${outputFile}`);
console.log(`Log: ${logFile}`);
await browser.close();
})();
'@
Set-Content -Path ".\discover-prismatic-by-color-param.js" -Value $js -Encoding UTF8
}
try {
Install-PlaywrightIfNeeded -Requested:$InstallPlaywright
Write-NodeDiscoveryScript
Write-Host "Running color-param URL discovery..."
$nodeArgs = @(
".\discover-prismatic-by-color-param.js",
"--max-scrolls-per-color=$MaxScrollsPerColor",
"--stop-after-no-new-scrolls=$StopAfterNoNewScrolls"
)
if ($Headed) {
$nodeArgs += "--headed"
}
node @nodeArgs
}
catch {
Write-Error $_.Exception.Message
exit 1
}
@@ -0,0 +1,410 @@
# Get-Product-Info-Resumable.ps1
#
# Resumable, slow/polite Prismatic Powders product scraper.
#
# Inputs:
# .\product-urls.txt
#
# Outputs:
# .\prismatic_powders.json
# .\prismatic-scrape-progress.log
#
# First-time setup:
# Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass
# .\Get-Product-Info-Resumable.ps1 -InstallPlaywright -Headed -MaxProducts 5
#
# Normal full run:
# .\Get-Product-Info-Resumable.ps1
#
# Test first 25 remaining:
# .\Get-Product-Info-Resumable.ps1 -MaxProducts 25 -Headed
#
# Retry failed URLs too:
# .\Get-Product-Info-Resumable.ps1 -RetryErrors
#
# Slow it down more:
# .\Get-Product-Info-Resumable.ps1 -MinDelaySeconds 12 -MaxDelaySeconds 25
param(
[switch]$InstallPlaywright,
[switch]$Headed,
[string]$InputFile = ".\product-urls.txt",
[string]$OutputJson = ".\prismatic_powders.json",
[string]$ProgressLog = ".\prismatic-scrape-progress.log",
[int]$MinDelaySeconds = 4,
[int]$MaxDelaySeconds = 10,
[int]$PageSettleSeconds = 4,
# 0 means no limit.
[int]$MaxProducts = 0,
# By default, URLs in errors are skipped on resume.
# Use -RetryErrors to try failed URLs again.
[switch]$RetryErrors
)
$ErrorActionPreference = "Stop"
function Ensure-NodeAvailable {
if (-not (Get-Command node -ErrorAction SilentlyContinue)) {
throw "Node.js is required. Install Node.js LTS from https://nodejs.org/"
}
if (-not (Get-Command npm -ErrorAction SilentlyContinue)) {
throw "npm is required. It usually comes with Node.js."
}
}
function Install-PlaywrightIfNeeded {
param([bool]$Requested)
Ensure-NodeAvailable
if ($Requested -or -not (Test-Path ".\node_modules\playwright")) {
Write-Host "Installing Playwright package locally..."
npm init -y | Out-Null
npm install playwright | Out-Null
Write-Host "Installing Playwright Chromium browser..."
npx playwright install chromium
}
}
function Write-NodeScraper {
$js = @'
const fs = require("fs");
const { chromium } = require("playwright");
const headed = process.argv.includes("--headed");
const retryErrors = process.argv.includes("--retry-errors");
function getArgValue(name, defaultValue) {
const prefix = `--${name}=`;
const found = process.argv.find(x => x.startsWith(prefix));
return found ? found.slice(prefix.length) : defaultValue;
}
const inputFile = getArgValue("input-file", "product-urls.txt");
const outputJson = getArgValue("output-json", "prismatic_powders.json");
const progressLog = getArgValue("progress-log", "prismatic-scrape-progress.log");
const minDelaySeconds = parseInt(getArgValue("min-delay-seconds", "8"), 10);
const maxDelaySeconds = parseInt(getArgValue("max-delay-seconds", "18"), 10);
const pageSettleSeconds = parseInt(getArgValue("page-settle-seconds", "4"), 10);
const maxProducts = parseInt(getArgValue("max-products", "0"), 10);
function clean(text) {
return (text || "").replace(/\s+/g, " ").trim();
}
function cleanUrl(url) {
return (url || "").split("?")[0].split("#")[0].trim();
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
function randomDelayMs() {
const minMs = Math.max(0, minDelaySeconds * 1000);
const maxMs = Math.max(minMs, maxDelaySeconds * 1000);
return Math.floor(minMs + Math.random() * (maxMs - minMs + 1));
}
function logLine(message) {
const line = `[${new Date().toISOString()}] ${message}`;
console.log(line);
fs.appendFileSync(progressLog, line + "\r\n", "utf8");
}
function absoluteUrl(baseUrl, maybeUrl) {
if (!maybeUrl) return "";
try {
return new URL(maybeUrl, baseUrl).href;
} catch {
return maybeUrl;
}
}
function loadInputUrls() {
if (!fs.existsSync(inputFile)) {
throw new Error(`Input file not found: ${inputFile}`);
}
const urls = fs.readFileSync(inputFile, "utf8")
.split(/\r?\n/)
.map(cleanUrl)
.filter(Boolean)
.filter(x => !x.startsWith("#"))
.filter(x => /\/shop\/powder-coating-colors\/[A-Z0-9-]+\//i.test(x));
return [...new Set(urls)];
}
function loadOutput() {
if (!fs.existsSync(outputJson)) {
return { results: [], errors: [] };
}
try {
const parsed = JSON.parse(fs.readFileSync(outputJson, "utf8"));
if (Array.isArray(parsed)) {
return { results: parsed, errors: [] };
}
return {
results: Array.isArray(parsed.results) ? parsed.results : [],
errors: Array.isArray(parsed.errors) ? parsed.errors : []
};
} catch (err) {
const backup = `${outputJson}.invalid-${Date.now()}.bak`;
fs.copyFileSync(outputJson, backup);
throw new Error(`Could not parse existing ${outputJson}. Backed it up to ${backup}. Error: ${err.message}`);
}
}
function saveOutput(data) {
const tempFile = `${outputJson}.tmp`;
fs.writeFileSync(tempFile, JSON.stringify(data, null, 2), "utf8");
fs.renameSync(tempFile, outputJson);
}
function parsePriceTiers(plainText) {
const priceMatches = [...plainText.matchAll(/(\d+\s*-\s*\d+\s*lbs|\d+\s*\+\s*lbs)\s*\$([\d.]+)/gi)];
return priceMatches.map(m => {
const rangeText = clean(m[1]);
const price = parseFloat(m[2]);
let min = null;
let max = null;
const rangeMatch = rangeText.match(/(\d+)\s*-\s*(\d+)/);
if (rangeMatch) {
min = parseInt(rangeMatch[1], 10);
max = parseInt(rangeMatch[2], 10);
}
const plusMatch = rangeText.match(/(\d+)\s*\+/);
if (plusMatch) {
min = parseInt(plusMatch[1], 10);
max = null;
}
return { min, max, price };
});
}
async function getLinkByText(page, patterns) {
const links = await page.locator("a").evaluateAll((anchors) =>
anchors.map(a => ({
text: (a.innerText || a.textContent || "").replace(/\s+/g, " ").trim(),
href: a.getAttribute("href") || ""
}))
);
for (const link of links) {
if (patterns.some(p => new RegExp(p, "i").test(link.text))) {
return absoluteUrl(page.url(), link.href);
}
}
return "";
}
async function getSampleImageUrl(page) {
const imageUrls = await page.locator("img").evaluateAll((imgs) =>
imgs.map(img =>
img.currentSrc ||
img.src ||
img.getAttribute("src") ||
img.getAttribute("data-src") ||
""
).filter(Boolean)
);
return (
imageUrls.find(src => /images\.nicindustries\.com/i.test(src) && !/thumbnail/i.test(src)) ||
imageUrls.find(src => /images\.nicindustries\.com/i.test(src)) ||
imageUrls.find(src => /prismatic|powder|color/i.test(src)) ||
""
);
}
async function parseProduct(page, url) {
logLine(`Scraping ${url}`);
const response = await page.goto(url, {
waitUntil: "domcontentloaded",
timeout: 60000
});
await page.waitForTimeout(pageSettleSeconds * 1000);
const status = response ? response.status() : 0;
const pageTitle = clean(await page.title().catch(() => ""));
const plainText = clean(await page.locator("body").innerText().catch(() => ""));
logLine(`HTTP status ${status}; title "${pageTitle}"`);
if (status === 403 || /^403 Forbidden$/i.test(pageTitle) || /^403 Forbidden$/i.test(plainText)) {
throw new Error("403 Forbidden returned by site.");
}
if (status === 404 || /404|Page Not Found/i.test(pageTitle)) {
throw new Error("404 Not Found returned by site.");
}
const title = clean(await page.locator("h1").first().innerText().catch(() => ""));
const skuMatch = plainText.match(/Item:\s*([A-Z0-9-]+)/i);
const sku = skuMatch ? skuMatch[1] : "";
if (!sku && !title) {
throw new Error("Could not find SKU or title on product page.");
}
const descMatch = plainText.match(/Description:\s*(.*?)(WARNING:|What does this match\?|$)/is);
const description = descMatch ? clean(descMatch[1]) : "";
const priceTiers = parsePriceTiers(plainText);
const safetyDataSheetUrl = await getLinkByText(page, ["Safety Data Sheet", "\\bSDS\\b"]);
const applicationGuideUrl = await getLinkByText(page, ["Application Guide"]);
const technicalDataSheetUrl = await getLinkByText(page, ["Tech Data Sheet", "Technical Data Sheet", "\\bTDS\\b"]);
const sampleImageUrl = await getSampleImageUrl(page);
return {
sku,
color_name: title,
description,
price_tiers: priceTiers,
safety_data_sheet_url: safetyDataSheetUrl,
technical_data_sheet_url: technicalDataSheetUrl,
application_guide_url: applicationGuideUrl,
sample_image_url: sampleImageUrl,
product_url: url,
scraped_at: new Date().toISOString()
};
}
(async () => {
const allUrls = loadInputUrls();
const data = loadOutput();
const completedUrls = new Set(data.results.map(r => cleanUrl(r.product_url)).filter(Boolean));
const errorUrls = new Set(data.errors.map(e => cleanUrl(e.product_url)).filter(Boolean));
let remainingUrls = allUrls.filter(url => {
if (completedUrls.has(url)) return false;
if (!retryErrors && errorUrls.has(url)) return false;
return true;
});
if (maxProducts > 0) {
remainingUrls = remainingUrls.slice(0, maxProducts);
}
logLine(`Input URLs: ${allUrls.length}`);
logLine(`Already scraped: ${completedUrls.size}`);
logLine(`Existing errors: ${errorUrls.size}`);
logLine(`Retry errors: ${retryErrors ? "yes" : "no"}`);
logLine(`This run target count: ${remainingUrls.length}`);
logLine(`Delay range: ${minDelaySeconds}-${maxDelaySeconds} seconds; page settle: ${pageSettleSeconds} seconds`);
if (remainingUrls.length === 0) {
logLine("Nothing to scrape. Done.");
saveOutput(data);
return;
}
const browser = await chromium.launch({
headless: !headed
});
const context = await browser.newContext({
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
viewport: { width: 1365, height: 900 },
locale: "en-US",
timezoneId: "America/New_York"
});
const page = await context.newPage();
let processedThisRun = 0;
for (const url of remainingUrls) {
try {
const row = await parseProduct(page, url);
// If retrying an old error, keep the old error history but avoid duplicate successful result.
if (!completedUrls.has(url)) {
data.results.push(row);
completedUrls.add(url);
}
processedThisRun++;
saveOutput(data);
logLine(`Saved result ${processedThisRun}/${remainingUrls.length}: ${row.sku || "(no sku)"} ${row.color_name || ""}`);
} catch (err) {
const errorRecord = {
product_url: url,
error: err.message,
scraped_at: new Date().toISOString()
};
data.errors.push(errorRecord);
saveOutput(data);
logLine(`ERROR ${url}: ${err.message}`);
}
const delay = randomDelayMs();
logLine(`Waiting ${(delay / 1000).toFixed(1)} seconds before next product...`);
await sleep(delay);
}
await browser.close();
logLine(`Done. Results: ${data.results.length}; Errors: ${data.errors.length}; Output: ${outputJson}`);
})();
'@
Set-Content -Path ".\prismatic-browser-scraper.js" -Value $js -Encoding UTF8
}
try {
Install-PlaywrightIfNeeded -Requested:$InstallPlaywright
Write-NodeScraper
Write-Host "Running resumable browser scraper..."
$nodeArgs = @(
".\prismatic-browser-scraper.js",
"--input-file=$InputFile",
"--output-json=$OutputJson",
"--progress-log=$ProgressLog",
"--min-delay-seconds=$MinDelaySeconds",
"--max-delay-seconds=$MaxDelaySeconds",
"--page-settle-seconds=$PageSettleSeconds",
"--max-products=$MaxProducts"
)
if ($Headed) {
$nodeArgs += "--headed"
}
if ($RetryErrors) {
$nodeArgs += "--retry-errors"
}
node @nodeArgs
}
catch {
Write-Error $_.Exception.Message
exit 1
}
@@ -0,0 +1,410 @@
# Get-Product-Info-Resumable.ps1
#
# Resumable, slow/polite Prismatic Powders product scraper.
#
# Inputs:
# .\product-urls.txt
#
# Outputs:
# .\prismatic_powders.json
# .\prismatic-scrape-progress.log
#
# First-time setup:
# Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass
# .\Get-Product-Info-Resumable.ps1 -InstallPlaywright -Headed -MaxProducts 5
#
# Normal full run:
# .\Get-Product-Info-Resumable.ps1
#
# Test first 25 remaining:
# .\Get-Product-Info-Resumable.ps1 -MaxProducts 25 -Headed
#
# Retry failed URLs too:
# .\Get-Product-Info-Resumable.ps1 -RetryErrors
#
# Slow it down more:
# .\Get-Product-Info-Resumable.ps1 -MinDelaySeconds 12 -MaxDelaySeconds 25
param(
[switch]$InstallPlaywright,
[switch]$Headed,
[string]$InputFile = ".\product-urls.txt",
[string]$OutputJson = ".\prismatic_powders.json",
[string]$ProgressLog = ".\prismatic-scrape-progress.log",
[int]$MinDelaySeconds = 8,
[int]$MaxDelaySeconds = 18,
[int]$PageSettleSeconds = 4,
# 0 means no limit.
[int]$MaxProducts = 0,
# By default, URLs in errors are skipped on resume.
# Use -RetryErrors to try failed URLs again.
[switch]$RetryErrors
)
$ErrorActionPreference = "Stop"
function Ensure-NodeAvailable {
if (-not (Get-Command node -ErrorAction SilentlyContinue)) {
throw "Node.js is required. Install Node.js LTS from https://nodejs.org/"
}
if (-not (Get-Command npm -ErrorAction SilentlyContinue)) {
throw "npm is required. It usually comes with Node.js."
}
}
function Install-PlaywrightIfNeeded {
param([bool]$Requested)
Ensure-NodeAvailable
if ($Requested -or -not (Test-Path ".\node_modules\playwright")) {
Write-Host "Installing Playwright package locally..."
npm init -y | Out-Null
npm install playwright | Out-Null
Write-Host "Installing Playwright Chromium browser..."
npx playwright install chromium
}
}
function Write-NodeScraper {
$js = @'
const fs = require("fs");
const { chromium } = require("playwright");
const headed = process.argv.includes("--headed");
const retryErrors = process.argv.includes("--retry-errors");
function getArgValue(name, defaultValue) {
const prefix = `--${name}=`;
const found = process.argv.find(x => x.startsWith(prefix));
return found ? found.slice(prefix.length) : defaultValue;
}
const inputFile = getArgValue("input-file", "product-urls.txt");
const outputJson = getArgValue("output-json", "prismatic_powders.json");
const progressLog = getArgValue("progress-log", "prismatic-scrape-progress.log");
const minDelaySeconds = parseInt(getArgValue("min-delay-seconds", "8"), 10);
const maxDelaySeconds = parseInt(getArgValue("max-delay-seconds", "18"), 10);
const pageSettleSeconds = parseInt(getArgValue("page-settle-seconds", "4"), 10);
const maxProducts = parseInt(getArgValue("max-products", "0"), 10);
function clean(text) {
return (text || "").replace(/\s+/g, " ").trim();
}
function cleanUrl(url) {
return (url || "").split("?")[0].split("#")[0].trim();
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
function randomDelayMs() {
const minMs = Math.max(0, minDelaySeconds * 1000);
const maxMs = Math.max(minMs, maxDelaySeconds * 1000);
return Math.floor(minMs + Math.random() * (maxMs - minMs + 1));
}
function logLine(message) {
const line = `[${new Date().toISOString()}] ${message}`;
console.log(line);
fs.appendFileSync(progressLog, line + "\r\n", "utf8");
}
function absoluteUrl(baseUrl, maybeUrl) {
if (!maybeUrl) return "";
try {
return new URL(maybeUrl, baseUrl).href;
} catch {
return maybeUrl;
}
}
function loadInputUrls() {
if (!fs.existsSync(inputFile)) {
throw new Error(`Input file not found: ${inputFile}`);
}
const urls = fs.readFileSync(inputFile, "utf8")
.split(/\r?\n/)
.map(cleanUrl)
.filter(Boolean)
.filter(x => !x.startsWith("#"))
.filter(x => /\/shop\/powder-coating-colors\/[A-Z0-9-]+\//i.test(x));
return [...new Set(urls)];
}
function loadOutput() {
if (!fs.existsSync(outputJson)) {
return { results: [], errors: [] };
}
try {
const parsed = JSON.parse(fs.readFileSync(outputJson, "utf8"));
if (Array.isArray(parsed)) {
return { results: parsed, errors: [] };
}
return {
results: Array.isArray(parsed.results) ? parsed.results : [],
errors: Array.isArray(parsed.errors) ? parsed.errors : []
};
} catch (err) {
const backup = `${outputJson}.invalid-${Date.now()}.bak`;
fs.copyFileSync(outputJson, backup);
throw new Error(`Could not parse existing ${outputJson}. Backed it up to ${backup}. Error: ${err.message}`);
}
}
function saveOutput(data) {
const tempFile = `${outputJson}.tmp`;
fs.writeFileSync(tempFile, JSON.stringify(data, null, 2), "utf8");
fs.renameSync(tempFile, outputJson);
}
function parsePriceTiers(plainText) {
const priceMatches = [...plainText.matchAll(/(\d+\s*-\s*\d+\s*lbs|\d+\s*\+\s*lbs)\s*\$([\d.]+)/gi)];
return priceMatches.map(m => {
const rangeText = clean(m[1]);
const price = parseFloat(m[2]);
let min = null;
let max = null;
const rangeMatch = rangeText.match(/(\d+)\s*-\s*(\d+)/);
if (rangeMatch) {
min = parseInt(rangeMatch[1], 10);
max = parseInt(rangeMatch[2], 10);
}
const plusMatch = rangeText.match(/(\d+)\s*\+/);
if (plusMatch) {
min = parseInt(plusMatch[1], 10);
max = null;
}
return { min, max, price };
});
}
async function getLinkByText(page, patterns) {
const links = await page.locator("a").evaluateAll((anchors) =>
anchors.map(a => ({
text: (a.innerText || a.textContent || "").replace(/\s+/g, " ").trim(),
href: a.getAttribute("href") || ""
}))
);
for (const link of links) {
if (patterns.some(p => new RegExp(p, "i").test(link.text))) {
return absoluteUrl(page.url(), link.href);
}
}
return "";
}
async function getSampleImageUrl(page) {
const imageUrls = await page.locator("img").evaluateAll((imgs) =>
imgs.map(img =>
img.currentSrc ||
img.src ||
img.getAttribute("src") ||
img.getAttribute("data-src") ||
""
).filter(Boolean)
);
return (
imageUrls.find(src => /images\.nicindustries\.com/i.test(src) && !/thumbnail/i.test(src)) ||
imageUrls.find(src => /images\.nicindustries\.com/i.test(src)) ||
imageUrls.find(src => /prismatic|powder|color/i.test(src)) ||
""
);
}
async function parseProduct(page, url) {
logLine(`Scraping ${url}`);
const response = await page.goto(url, {
waitUntil: "domcontentloaded",
timeout: 60000
});
await page.waitForTimeout(pageSettleSeconds * 1000);
const status = response ? response.status() : 0;
const pageTitle = clean(await page.title().catch(() => ""));
const plainText = clean(await page.locator("body").innerText().catch(() => ""));
logLine(`HTTP status ${status}; title "${pageTitle}"`);
if (status === 403 || /^403 Forbidden$/i.test(pageTitle) || /^403 Forbidden$/i.test(plainText)) {
throw new Error("403 Forbidden returned by site.");
}
if (status === 404 || /404|Page Not Found/i.test(pageTitle)) {
throw new Error("404 Not Found returned by site.");
}
const title = clean(await page.locator("h1").first().innerText().catch(() => ""));
const skuMatch = plainText.match(/Item:\s*([A-Z0-9-]+)/i);
const sku = skuMatch ? skuMatch[1] : "";
if (!sku && !title) {
throw new Error("Could not find SKU or title on product page.");
}
const descMatch = plainText.match(/Description:\s*(.*?)(WARNING:|What does this match\?|$)/is);
const description = descMatch ? clean(descMatch[1]) : "";
const priceTiers = parsePriceTiers(plainText);
const safetyDataSheetUrl = await getLinkByText(page, ["Safety Data Sheet", "\\bSDS\\b"]);
const applicationGuideUrl = await getLinkByText(page, ["Application Guide"]);
const technicalDataSheetUrl = await getLinkByText(page, ["Tech Data Sheet", "Technical Data Sheet", "\\bTDS\\b"]);
const sampleImageUrl = await getSampleImageUrl(page);
return {
sku,
color_name: title,
description,
price_tiers: priceTiers,
safety_data_sheet_url: safetyDataSheetUrl,
technical_data_sheet_url: technicalDataSheetUrl,
application_guide_url: applicationGuideUrl,
sample_image_url: sampleImageUrl,
product_url: url,
scraped_at: new Date().toISOString()
};
}
(async () => {
const allUrls = loadInputUrls();
const data = loadOutput();
const completedUrls = new Set(data.results.map(r => cleanUrl(r.product_url)).filter(Boolean));
const errorUrls = new Set(data.errors.map(e => cleanUrl(e.product_url)).filter(Boolean));
let remainingUrls = allUrls.filter(url => {
if (completedUrls.has(url)) return false;
if (!retryErrors && errorUrls.has(url)) return false;
return true;
});
if (maxProducts > 0) {
remainingUrls = remainingUrls.slice(0, maxProducts);
}
logLine(`Input URLs: ${allUrls.length}`);
logLine(`Already scraped: ${completedUrls.size}`);
logLine(`Existing errors: ${errorUrls.size}`);
logLine(`Retry errors: ${retryErrors ? "yes" : "no"}`);
logLine(`This run target count: ${remainingUrls.length}`);
logLine(`Delay range: ${minDelaySeconds}-${maxDelaySeconds} seconds; page settle: ${pageSettleSeconds} seconds`);
if (remainingUrls.length === 0) {
logLine("Nothing to scrape. Done.");
saveOutput(data);
return;
}
const browser = await chromium.launch({
headless: !headed
});
const context = await browser.newContext({
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
viewport: { width: 1365, height: 900 },
locale: "en-US",
timezoneId: "America/New_York"
});
const page = await context.newPage();
let processedThisRun = 0;
for (const url of remainingUrls) {
try {
const row = await parseProduct(page, url);
// If retrying an old error, keep the old error history but avoid duplicate successful result.
if (!completedUrls.has(url)) {
data.results.push(row);
completedUrls.add(url);
}
processedThisRun++;
saveOutput(data);
logLine(`Saved result ${processedThisRun}/${remainingUrls.length}: ${row.sku || "(no sku)"} ${row.color_name || ""}`);
} catch (err) {
const errorRecord = {
product_url: url,
error: err.message,
scraped_at: new Date().toISOString()
};
data.errors.push(errorRecord);
saveOutput(data);
logLine(`ERROR ${url}: ${err.message}`);
}
const delay = randomDelayMs();
logLine(`Waiting ${(delay / 1000).toFixed(1)} seconds before next product...`);
await sleep(delay);
}
await browser.close();
logLine(`Done. Results: ${data.results.length}; Errors: ${data.errors.length}; Output: ${outputJson}`);
})();
'@
Set-Content -Path ".\prismatic-browser-scraper.js" -Value $js -Encoding UTF8
}
try {
Install-PlaywrightIfNeeded -Requested:$InstallPlaywright
Write-NodeScraper
Write-Host "Running resumable browser scraper..."
$nodeArgs = @(
".\prismatic-browser-scraper.js",
"--input-file=$InputFile",
"--output-json=$OutputJson",
"--progress-log=$ProgressLog",
"--min-delay-seconds=$MinDelaySeconds",
"--max-delay-seconds=$MaxDelaySeconds",
"--page-settle-seconds=$PageSettleSeconds",
"--max-products=$MaxProducts"
)
if ($Headed) {
$nodeArgs += "--headed"
}
if ($RetryErrors) {
$nodeArgs += "--retry-errors"
}
node @nodeArgs
}
catch {
Write-Error $_.Exception.Message
exit 1
}
+265
View File
@@ -0,0 +1,265 @@
# Crawl and Index Prismatic Colors - Known-Good Style JSON.ps1
#
# Rollback to the earlier working browser pattern:
# - Playwright Chromium
# - Full Chrome-style User-Agent
# - JSON output
# - Structured price tiers
# - Color matches from #collection-list
#
# First-time setup:
# Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass
# .\Crawl-and-Index-Prismatic-colors-known-good-json.ps1 -InstallPlaywright
#
# Normal run:
# .\Crawl-and-Index-Prismatic-colors-known-good-json.ps1
#
# Watch browser:
# .\Crawl-and-Index-Prismatic-colors-known-good-json.ps1 -Headed
param(
[switch]$InstallPlaywright,
[switch]$Headed
)
$ErrorActionPreference = "Stop"
function Ensure-NodeAvailable {
if (-not (Get-Command node -ErrorAction SilentlyContinue)) {
throw "Node.js is required. Install Node.js LTS from https://nodejs.org/"
}
if (-not (Get-Command npm -ErrorAction SilentlyContinue)) {
throw "npm is required. It usually comes with Node.js."
}
}
function Install-PlaywrightIfNeeded {
param([bool]$Requested)
Ensure-NodeAvailable
if ($Requested -or -not (Test-Path ".\node_modules\playwright")) {
Write-Host "Installing Playwright package locally..."
npm init -y | Out-Null
npm install playwright | Out-Null
Write-Host "Installing Playwright Chromium browser..."
npx playwright install chromium
}
}
function Write-NodeScraper {
# Single-quoted here-string prevents PowerShell from interpreting JavaScript regex/template strings.
$js = @'
const fs = require("fs");
const { chromium } = require("playwright");
const headed = process.argv.includes("--headed");
const productUrls = [
"https://www.prismaticpowders.com/shop/powder-coating-colors/PSS-11248/high-gloss-black"
];
const outputJson = "prismatic_powders.json";
function clean(text) {
return (text || "").replace(/\s+/g, " ").trim();
}
function absoluteUrl(baseUrl, maybeUrl) {
if (!maybeUrl) return "";
try {
return new URL(maybeUrl, baseUrl).href;
} catch {
return maybeUrl;
}
}
function unique(items) {
return [...new Set(items.filter(Boolean).map(clean).filter(Boolean))];
}
async function getLinkByText(page, patterns) {
const links = await page.locator("a").evaluateAll((anchors) =>
anchors.map(a => ({
text: (a.innerText || a.textContent || "").replace(/\s+/g, " ").trim(),
href: a.getAttribute("href") || ""
}))
);
for (const link of links) {
if (patterns.some(p => new RegExp(p, "i").test(link.text))) {
return absoluteUrl(page.url(), link.href);
}
}
return "";
}
function parsePriceTiers(plainText) {
const priceMatches = [...plainText.matchAll(/(\d+\s*-\s*\d+\s*lbs|\d+\s*\+\s*lbs)\s*\$([\d.]+)/gi)];
return priceMatches.map(m => {
const rangeText = clean(m[1]);
const price = parseFloat(m[2]);
let min = null;
let max = null;
const rangeMatch = rangeText.match(/(\d+)\s*-\s*(\d+)/);
if (rangeMatch) {
min = parseInt(rangeMatch[1], 10);
max = parseInt(rangeMatch[2], 10);
}
const plusMatch = rangeText.match(/(\d+)\s*\+/);
if (plusMatch) {
min = parseInt(plusMatch[1], 10);
max = null;
}
return {
min,
max,
price
};
});
}
async function getSampleImageUrl(page) {
const imageUrls = await page.locator("img").evaluateAll((imgs) =>
imgs.map(img =>
img.currentSrc ||
img.src ||
img.getAttribute("src") ||
img.getAttribute("data-src") ||
""
).filter(Boolean)
);
return (
imageUrls.find(src => /images\.nicindustries\.com/i.test(src) && !/thumbnail/i.test(src)) ||
imageUrls.find(src => /images\.nicindustries\.com/i.test(src)) ||
imageUrls.find(src => /prismatic|powder|color/i.test(src)) ||
""
);
}
async function parseProduct(page, url) {
console.log(`Scraping ${url}`);
const response = await page.goto(url, {
waitUntil: "domcontentloaded",
timeout: 60000
});
await page.waitForTimeout(3000);
const status = response ? response.status() : 0;
const pageTitle = clean(await page.title().catch(() => ""));
const plainText = clean(await page.locator("body").innerText().catch(() => ""));
console.log(`HTTP status: ${status}`);
console.log(`Page title: ${pageTitle}`);
// Do not silently output a fake product if blocked.
if (status === 403 || /^403 Forbidden$/i.test(pageTitle) || /^403 Forbidden$/i.test(plainText)) {
throw new Error("403 Forbidden returned by site.");
}
const title = clean(await page.locator("h1").first().innerText().catch(() => ""));
const skuMatch = plainText.match(/Item:\s*([A-Z0-9-]+)/i);
const sku = skuMatch ? skuMatch[1] : "";
const descMatch = plainText.match(/Description:\s*(.*?)(WARNING:|What does this match\?|$)/is);
const description = descMatch ? clean(descMatch[1]) : "";
const priceTiers = parsePriceTiers(plainText);
const safetyDataSheetUrl = await getLinkByText(page, ["Safety Data Sheet", "\\bSDS\\b"]);
const applicationGuideUrl = await getLinkByText(page, ["Application Guide"]);
const technicalDataSheetUrl = await getLinkByText(page, ["Tech Data Sheet", "Technical Data Sheet", "\\bTDS\\b"]);
const sampleImageUrl = await getSampleImageUrl(page);
return {
sku,
color_name: title,
description,
price_tiers: priceTiers,
safety_data_sheet_url: safetyDataSheetUrl,
technical_data_sheet_url: technicalDataSheetUrl,
application_guide_url: applicationGuideUrl,
sample_image_url: sampleImageUrl,
product_url: url,
scraped_at: new Date().toISOString()
};
}
(async () => {
const browser = await chromium.launch({
headless: !headed
});
const context = await browser.newContext({
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
viewport: { width: 1365, height: 900 },
locale: "en-US",
timezoneId: "America/New_York"
});
const page = await context.newPage();
const results = [];
const errors = [];
for (const url of productUrls) {
try {
const row = await parseProduct(page, url);
results.push(row);
await page.waitForTimeout(3000);
} catch (err) {
console.warn(`Failed ${url}: ${err.message}`);
errors.push({
product_url: url,
error: err.message,
scraped_at: new Date().toISOString()
});
}
}
await browser.close();
// If you prefer only the array, change this to JSON.stringify(results, null, 2)
const output = {
results,
errors
};
fs.writeFileSync(outputJson, JSON.stringify(output, null, 2), "utf8");
console.log(`Done. Output: ${outputJson}`);
})();
'@
Set-Content -Path ".\prismatic-browser-scraper.js" -Value $js -Encoding UTF8
}
try {
Install-PlaywrightIfNeeded -Requested:$InstallPlaywright
Write-NodeScraper
Write-Host "Running browser scraper..."
if ($Headed) {
node .\prismatic-browser-scraper.js --headed
}
else {
node .\prismatic-browser-scraper.js
}
}
catch {
Write-Error $_.Exception.Message
exit 1
}
@@ -0,0 +1,319 @@
# Discover-Prismatic-Product-Urls-By-ColorParam.ps1
#
# Discovers Prismatic Powders product URLs by visiting color filter URLs like:
# https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_red
#
# Outputs:
# .\product-urls.txt
# .\color-discovery-log.json
#
# First-time setup:
# Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass
# .\Discover-Prismatic-Product-Urls-By-ColorParam.ps1 -InstallPlaywright -Headed
#
# Normal run:
# .\Discover-Prismatic-Product-Urls-By-ColorParam.ps1
#
# Watch browser:
# .\Discover-Prismatic-Product-Urls-By-ColorParam.ps1 -Headed
param(
[switch]$InstallPlaywright,
[switch]$Headed,
[int]$MaxScrollsPerColor = 180,
[int]$StopAfterNoNewScrolls = 10
)
$ErrorActionPreference = "Stop"
function Ensure-NodeAvailable {
if (-not (Get-Command node -ErrorAction SilentlyContinue)) {
throw "Node.js is required. Install Node.js LTS from https://nodejs.org/"
}
if (-not (Get-Command npm -ErrorAction SilentlyContinue)) {
throw "npm is required. It usually comes with Node.js."
}
}
function Install-PlaywrightIfNeeded {
param([bool]$Requested)
Ensure-NodeAvailable
if ($Requested -or -not (Test-Path ".\node_modules\playwright")) {
Write-Host "Installing Playwright package locally..."
npm init -y | Out-Null
npm install playwright | Out-Null
Write-Host "Installing Playwright Chromium browser..."
npx playwright install chromium
}
}
function Write-NodeDiscoveryScript {
$js = @'
const fs = require("fs");
const { chromium } = require("playwright");
const headed = process.argv.includes("--headed");
function getArgValue(name, defaultValue) {
const prefix = `--${name}=`;
const found = process.argv.find(x => x.startsWith(prefix));
return found ? found.slice(prefix.length) : defaultValue;
}
const maxScrollsPerColor = parseInt(getArgValue("max-scrolls-per-color", "180"), 10);
const stopAfterNoNewScrolls = parseInt(getArgValue("stop-after-no-new-scrolls", "10"), 10);
const baseUrl = "https://www.prismaticpowders.com/shop/powder-coating-colors";
const outputFile = "product-urls.txt";
const logFile = "color-discovery-log.json";
// Update this list if you find more color params in the site HTML.
const colorParams = [
"pris_black",
"pris_blue",
"pris_bronze",
"pris_brown",
"pris_clear",
"pris_copper",
"pris_gold",
"pris_gray",
"pris_green",
"pris_orange",
"pris_pink",
"pris_purple",
"pris_red",
"pris_silver",
"pris_tan",
"pris_white",
"pris_yellow"
];
function cleanUrl(url) {
return (url || "").split("?")[0].split("#")[0].trim();
}
function isProductUrl(url) {
return /\/shop\/powder-coating-colors\/[A-Z0-9-]+\//i.test(url || "");
}
function readExistingUrls() {
if (!fs.existsSync(outputFile)) return [];
return fs.readFileSync(outputFile, "utf8")
.split(/\r?\n/)
.map(cleanUrl)
.filter(Boolean);
}
function writeUrls(urls) {
const sorted = [...urls].sort();
fs.writeFileSync(outputFile, sorted.join("\r\n") + "\r\n", "utf8");
}
function readLog() {
if (!fs.existsSync(logFile)) {
return {
completed_colors: {},
runs: []
};
}
try {
return JSON.parse(fs.readFileSync(logFile, "utf8"));
} catch {
return {
completed_colors: {},
runs: []
};
}
}
function writeLog(log) {
fs.writeFileSync(logFile, JSON.stringify(log, null, 2), "utf8");
}
async function collectProductLinks(page) {
const links = await page.locator("a").evaluateAll(anchors =>
anchors
.map(a => a.href)
.filter(Boolean)
.filter(h => /\/shop\/powder-coating-colors\/[A-Z0-9-]+\//i.test(h))
);
return links.map(cleanUrl).filter(Boolean);
}
async function scrollAndCollect(page, urls, label) {
let noNewScrolls = 0;
let totalAddedForThisColor = 0;
for (let i = 0; i < maxScrollsPerColor; i++) {
const before = urls.size;
for (const link of await collectProductLinks(page)) {
urls.add(link);
}
const after = urls.size;
const added = after - before;
totalAddedForThisColor += added;
if (added === 0) {
noNewScrolls++;
} else {
noNewScrolls = 0;
}
writeUrls(urls);
console.log(`[${label}] Scroll ${i + 1}/${maxScrollsPerColor}: +${added}, total ${after}, no-new ${noNewScrolls}`);
if (noNewScrolls >= stopAfterNoNewScrolls) {
break;
}
await page.mouse.wheel(0, 2500);
await page.waitForTimeout(1500);
}
return totalAddedForThisColor;
}
(async () => {
const existingUrls = readExistingUrls();
const urls = new Set(existingUrls);
const log = readLog();
console.log(`Existing URLs in ${outputFile}: ${existingUrls.length}`);
const browser = await chromium.launch({ headless: !headed });
const context = await browser.newContext({
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
viewport: { width: 1365, height: 900 },
locale: "en-US",
timezoneId: "America/New_York"
});
const page = await context.newPage();
const runRecord = {
started_at: new Date().toISOString(),
existing_at_start: existingUrls.length,
colors_attempted: []
};
for (const color of colorParams) {
if (log.completed_colors[color]) {
console.log(`Skipping completed color: ${color}`);
continue;
}
const url = `${baseUrl}?color=${encodeURIComponent(color)}`;
console.log("");
console.log(`Opening color filter: ${color}`);
console.log(url);
try {
const response = await page.goto(url, {
waitUntil: "domcontentloaded",
timeout: 60000
});
const status = response ? response.status() : "unknown";
console.log(`HTTP status: ${status}`);
await page.waitForTimeout(5000);
const before = urls.size;
const addedDuringScroll = await scrollAndCollect(page, urls, color);
const after = urls.size;
const netAdded = after - before;
log.completed_colors[color] = {
url,
http_status: status,
added: netAdded,
added_during_scroll: addedDuringScroll,
total_after: after,
completed_at: new Date().toISOString()
};
runRecord.colors_attempted.push({
color,
url,
http_status: status,
added: netAdded,
total_after: after
});
writeLog(log);
writeUrls(urls);
console.log(`Color complete: ${color}; added ${netAdded}; total ${after}`);
// Polite pause between filters.
await page.waitForTimeout(3000);
} catch (err) {
console.log(`Color failed: ${color}; ${err.message}`);
runRecord.colors_attempted.push({
color,
url,
added: 0,
error: err.message
});
writeLog(log);
}
}
runRecord.finished_at = new Date().toISOString();
runRecord.final_total = urls.size;
runRecord.new_this_run = urls.size - existingUrls.length;
log.runs.push(runRecord);
writeLog(log);
writeUrls(urls);
console.log("");
console.log("Color-param discovery complete.");
console.log(`Existing at start: ${existingUrls.length}`);
console.log(`Final total: ${urls.size}`);
console.log(`New this run: ${urls.size - existingUrls.length}`);
console.log(`Output: ${outputFile}`);
console.log(`Log: ${logFile}`);
await browser.close();
})();
'@
Set-Content -Path ".\discover-prismatic-by-color-param.js" -Value $js -Encoding UTF8
}
try {
Install-PlaywrightIfNeeded -Requested:$InstallPlaywright
Write-NodeDiscoveryScript
Write-Host "Running color-param URL discovery..."
$nodeArgs = @(
".\discover-prismatic-by-color-param.js",
"--max-scrolls-per-color=$MaxScrollsPerColor",
"--stop-after-no-new-scrolls=$StopAfterNoNewScrolls"
)
if ($Headed) {
$nodeArgs += "--headed"
}
node @nodeArgs
}
catch {
Write-Error $_.Exception.Message
exit 1
}
@@ -0,0 +1,265 @@
# Crawl and Index Prismatic Colors - Known-Good Style JSON.ps1
#
# Rollback to the earlier working browser pattern:
# - Playwright Chromium
# - Full Chrome-style User-Agent
# - JSON output
# - Structured price tiers
# - Color matches from #collection-list
#
# First-time setup:
# Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass
# .\Crawl-and-Index-Prismatic-colors-known-good-json.ps1 -InstallPlaywright
#
# Normal run:
# .\Crawl-and-Index-Prismatic-colors-known-good-json.ps1
#
# Watch browser:
# .\Crawl-and-Index-Prismatic-colors-known-good-json.ps1 -Headed
param(
[switch]$InstallPlaywright,
[switch]$Headed
)
$ErrorActionPreference = "Stop"
function Ensure-NodeAvailable {
if (-not (Get-Command node -ErrorAction SilentlyContinue)) {
throw "Node.js is required. Install Node.js LTS from https://nodejs.org/"
}
if (-not (Get-Command npm -ErrorAction SilentlyContinue)) {
throw "npm is required. It usually comes with Node.js."
}
}
function Install-PlaywrightIfNeeded {
param([bool]$Requested)
Ensure-NodeAvailable
if ($Requested -or -not (Test-Path ".\node_modules\playwright")) {
Write-Host "Installing Playwright package locally..."
npm init -y | Out-Null
npm install playwright | Out-Null
Write-Host "Installing Playwright Chromium browser..."
npx playwright install chromium
}
}
function Write-NodeScraper {
# Single-quoted here-string prevents PowerShell from interpreting JavaScript regex/template strings.
$js = @'
const fs = require("fs");
const { chromium } = require("playwright");
const headed = process.argv.includes("--headed");
const productUrls = [
"https://www.prismaticpowders.com/shop/powder-coating-colors/PSS-11248/high-gloss-black"
];
const outputJson = "prismatic_powders.json";
function clean(text) {
return (text || "").replace(/\s+/g, " ").trim();
}
function absoluteUrl(baseUrl, maybeUrl) {
if (!maybeUrl) return "";
try {
return new URL(maybeUrl, baseUrl).href;
} catch {
return maybeUrl;
}
}
function unique(items) {
return [...new Set(items.filter(Boolean).map(clean).filter(Boolean))];
}
async function getLinkByText(page, patterns) {
const links = await page.locator("a").evaluateAll((anchors) =>
anchors.map(a => ({
text: (a.innerText || a.textContent || "").replace(/\s+/g, " ").trim(),
href: a.getAttribute("href") || ""
}))
);
for (const link of links) {
if (patterns.some(p => new RegExp(p, "i").test(link.text))) {
return absoluteUrl(page.url(), link.href);
}
}
return "";
}
function parsePriceTiers(plainText) {
const priceMatches = [...plainText.matchAll(/(\d+\s*-\s*\d+\s*lbs|\d+\s*\+\s*lbs)\s*\$([\d.]+)/gi)];
return priceMatches.map(m => {
const rangeText = clean(m[1]);
const price = parseFloat(m[2]);
let min = null;
let max = null;
const rangeMatch = rangeText.match(/(\d+)\s*-\s*(\d+)/);
if (rangeMatch) {
min = parseInt(rangeMatch[1], 10);
max = parseInt(rangeMatch[2], 10);
}
const plusMatch = rangeText.match(/(\d+)\s*\+/);
if (plusMatch) {
min = parseInt(plusMatch[1], 10);
max = null;
}
return {
min,
max,
price
};
});
}
async function getSampleImageUrl(page) {
const imageUrls = await page.locator("img").evaluateAll((imgs) =>
imgs.map(img =>
img.currentSrc ||
img.src ||
img.getAttribute("src") ||
img.getAttribute("data-src") ||
""
).filter(Boolean)
);
return (
imageUrls.find(src => /images\.nicindustries\.com/i.test(src) && !/thumbnail/i.test(src)) ||
imageUrls.find(src => /images\.nicindustries\.com/i.test(src)) ||
imageUrls.find(src => /prismatic|powder|color/i.test(src)) ||
""
);
}
async function parseProduct(page, url) {
console.log(`Scraping ${url}`);
const response = await page.goto(url, {
waitUntil: "domcontentloaded",
timeout: 60000
});
await page.waitForTimeout(3000);
const status = response ? response.status() : 0;
const pageTitle = clean(await page.title().catch(() => ""));
const plainText = clean(await page.locator("body").innerText().catch(() => ""));
console.log(`HTTP status: ${status}`);
console.log(`Page title: ${pageTitle}`);
// Do not silently output a fake product if blocked.
if (status === 403 || /^403 Forbidden$/i.test(pageTitle) || /^403 Forbidden$/i.test(plainText)) {
throw new Error("403 Forbidden returned by site.");
}
const title = clean(await page.locator("h1").first().innerText().catch(() => ""));
const skuMatch = plainText.match(/Item:\s*([A-Z0-9-]+)/i);
const sku = skuMatch ? skuMatch[1] : "";
const descMatch = plainText.match(/Description:\s*(.*?)(WARNING:|What does this match\?|$)/is);
const description = descMatch ? clean(descMatch[1]) : "";
const priceTiers = parsePriceTiers(plainText);
const safetyDataSheetUrl = await getLinkByText(page, ["Safety Data Sheet", "\\bSDS\\b"]);
const applicationGuideUrl = await getLinkByText(page, ["Application Guide"]);
const technicalDataSheetUrl = await getLinkByText(page, ["Tech Data Sheet", "Technical Data Sheet", "\\bTDS\\b"]);
const sampleImageUrl = await getSampleImageUrl(page);
return {
sku,
color_name: title,
description,
price_tiers: priceTiers,
safety_data_sheet_url: safetyDataSheetUrl,
technical_data_sheet_url: technicalDataSheetUrl,
application_guide_url: applicationGuideUrl,
sample_image_url: sampleImageUrl,
product_url: url,
scraped_at: new Date().toISOString()
};
}
(async () => {
const browser = await chromium.launch({
headless: !headed
});
const context = await browser.newContext({
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
viewport: { width: 1365, height: 900 },
locale: "en-US",
timezoneId: "America/New_York"
});
const page = await context.newPage();
const results = [];
const errors = [];
for (const url of productUrls) {
try {
const row = await parseProduct(page, url);
results.push(row);
await page.waitForTimeout(3000);
} catch (err) {
console.warn(`Failed ${url}: ${err.message}`);
errors.push({
product_url: url,
error: err.message,
scraped_at: new Date().toISOString()
});
}
}
await browser.close();
// If you prefer only the array, change this to JSON.stringify(results, null, 2)
const output = {
results,
errors
};
fs.writeFileSync(outputJson, JSON.stringify(output, null, 2), "utf8");
console.log(`Done. Output: ${outputJson}`);
})();
'@
Set-Content -Path ".\prismatic-browser-scraper.js" -Value $js -Encoding UTF8
}
try {
Install-PlaywrightIfNeeded -Requested:$InstallPlaywright
Write-NodeScraper
Write-Host "Running browser scraper..."
if ($Headed) {
node .\prismatic-browser-scraper.js --headed
}
else {
node .\prismatic-browser-scraper.js
}
}
catch {
Write-Error $_.Exception.Message
exit 1
}
Binary file not shown.
@@ -0,0 +1,270 @@
{
"completed_colors": {
"pris_black": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_black",
"http_status": 200,
"added": 472,
"added_during_scroll": 472,
"total_after": 472,
"completed_at": "2026-04-30T00:47:46.289Z"
},
"pris_blue": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_blue",
"http_status": 200,
"added": 948,
"added_during_scroll": 948,
"total_after": 1420,
"completed_at": "2026-04-30T00:49:25.145Z"
},
"pris_bronze": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_bronze",
"http_status": 200,
"added": 358,
"added_during_scroll": 358,
"total_after": 1778,
"completed_at": "2026-04-30T00:50:18.466Z"
},
"pris_brown": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_brown",
"http_status": 200,
"added": 373,
"added_during_scroll": 373,
"total_after": 2151,
"completed_at": "2026-04-30T00:51:18.033Z"
},
"pris_clear": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_clear",
"http_status": 200,
"added": 19,
"added_during_scroll": 19,
"total_after": 2170,
"completed_at": "2026-04-30T00:51:42.889Z"
},
"pris_copper": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_copper",
"http_status": 200,
"added": 1094,
"added_during_scroll": 1094,
"total_after": 3264,
"completed_at": "2026-04-30T00:56:34.934Z"
},
"pris_gold": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_gold",
"http_status": 200,
"added": 152,
"added_during_scroll": 152,
"total_after": 3416,
"completed_at": "2026-04-30T00:57:26.775Z"
},
"pris_gray": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_gray",
"http_status": 200,
"added": 0,
"added_during_scroll": 0,
"total_after": 3416,
"completed_at": "2026-04-30T00:57:49.624Z"
},
"pris_green": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_green",
"http_status": 200,
"added": 0,
"added_during_scroll": 0,
"total_after": 3416,
"completed_at": "2026-04-30T00:58:12.277Z"
},
"pris_orange": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_orange",
"http_status": 200,
"added": 233,
"added_during_scroll": 233,
"total_after": 3649,
"completed_at": "2026-04-30T00:59:06.776Z"
},
"pris_pink": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_pink",
"http_status": 200,
"added": 169,
"added_during_scroll": 169,
"total_after": 3818,
"completed_at": "2026-04-30T00:59:49.323Z"
},
"pris_purple": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_purple",
"http_status": 200,
"added": 182,
"added_during_scroll": 182,
"total_after": 4000,
"completed_at": "2026-04-30T01:00:38.111Z"
},
"pris_red": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_red",
"http_status": 200,
"added": 346,
"added_during_scroll": 346,
"total_after": 4346,
"completed_at": "2026-04-30T01:01:51.910Z"
},
"pris_silver": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_silver",
"http_status": 200,
"added": 210,
"added_during_scroll": 210,
"total_after": 4556,
"completed_at": "2026-04-30T01:02:51.835Z"
},
"pris_tan": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_tan",
"http_status": 200,
"added": 219,
"added_during_scroll": 219,
"total_after": 4775,
"completed_at": "2026-04-30T01:03:43.244Z"
},
"pris_white": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_white",
"http_status": 200,
"added": 218,
"added_during_scroll": 218,
"total_after": 4993,
"completed_at": "2026-04-30T01:04:39.931Z"
},
"pris_yellow": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_yellow",
"http_status": 200,
"added": 199,
"added_during_scroll": 199,
"total_after": 5192,
"completed_at": "2026-04-30T01:05:31.945Z"
}
},
"runs": [
{
"started_at": "2026-04-30T00:46:47.692Z",
"existing_at_start": 0,
"colors_attempted": [
{
"color": "pris_black",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_black",
"http_status": 200,
"added": 472,
"total_after": 472
},
{
"color": "pris_blue",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_blue",
"http_status": 200,
"added": 948,
"total_after": 1420
},
{
"color": "pris_bronze",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_bronze",
"http_status": 200,
"added": 358,
"total_after": 1778
},
{
"color": "pris_brown",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_brown",
"http_status": 200,
"added": 373,
"total_after": 2151
},
{
"color": "pris_clear",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_clear",
"http_status": 200,
"added": 19,
"total_after": 2170
},
{
"color": "pris_copper",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_copper",
"http_status": 200,
"added": 1094,
"total_after": 3264
},
{
"color": "pris_gold",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_gold",
"http_status": 200,
"added": 152,
"total_after": 3416
},
{
"color": "pris_gray",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_gray",
"http_status": 200,
"added": 0,
"total_after": 3416
},
{
"color": "pris_green",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_green",
"http_status": 200,
"added": 0,
"total_after": 3416
},
{
"color": "pris_orange",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_orange",
"http_status": 200,
"added": 233,
"total_after": 3649
},
{
"color": "pris_pink",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_pink",
"http_status": 200,
"added": 169,
"total_after": 3818
},
{
"color": "pris_purple",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_purple",
"http_status": 200,
"added": 182,
"total_after": 4000
},
{
"color": "pris_red",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_red",
"http_status": 200,
"added": 346,
"total_after": 4346
},
{
"color": "pris_silver",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_silver",
"http_status": 200,
"added": 210,
"total_after": 4556
},
{
"color": "pris_tan",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_tan",
"http_status": 200,
"added": 219,
"total_after": 4775
},
{
"color": "pris_white",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_white",
"http_status": 200,
"added": 218,
"total_after": 4993
},
{
"color": "pris_yellow",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_yellow",
"http_status": 200,
"added": 199,
"total_after": 5192
}
],
"finished_at": "2026-04-30T01:05:34.987Z",
"final_total": 5192,
"new_this_run": 5192
}
]
}
@@ -0,0 +1,237 @@
const fs = require("fs");
const { chromium } = require("playwright");
const headed = process.argv.includes("--headed");
function getArgValue(name, defaultValue) {
const prefix = `--${name}=`;
const found = process.argv.find(x => x.startsWith(prefix));
return found ? found.slice(prefix.length) : defaultValue;
}
const maxScrollsPerColor = parseInt(getArgValue("max-scrolls-per-color", "180"), 10);
const stopAfterNoNewScrolls = parseInt(getArgValue("stop-after-no-new-scrolls", "10"), 10);
const baseUrl = "https://www.prismaticpowders.com/shop/powder-coating-colors";
const outputFile = "product-urls.txt";
const logFile = "color-discovery-log.json";
// Update this list if you find more color params in the site HTML.
const colorParams = [
"pris_black",
"pris_blue",
"pris_bronze",
"pris_brown",
"pris_clear",
"pris_copper",
"pris_gold",
"pris_gray",
"pris_green",
"pris_orange",
"pris_pink",
"pris_purple",
"pris_red",
"pris_silver",
"pris_tan",
"pris_white",
"pris_yellow"
];
function cleanUrl(url) {
return (url || "").split("?")[0].split("#")[0].trim();
}
function isProductUrl(url) {
return /\/shop\/powder-coating-colors\/[A-Z0-9-]+\//i.test(url || "");
}
function readExistingUrls() {
if (!fs.existsSync(outputFile)) return [];
return fs.readFileSync(outputFile, "utf8")
.split(/\r?\n/)
.map(cleanUrl)
.filter(Boolean);
}
function writeUrls(urls) {
const sorted = [...urls].sort();
fs.writeFileSync(outputFile, sorted.join("\r\n") + "\r\n", "utf8");
}
function readLog() {
if (!fs.existsSync(logFile)) {
return {
completed_colors: {},
runs: []
};
}
try {
return JSON.parse(fs.readFileSync(logFile, "utf8"));
} catch {
return {
completed_colors: {},
runs: []
};
}
}
function writeLog(log) {
fs.writeFileSync(logFile, JSON.stringify(log, null, 2), "utf8");
}
async function collectProductLinks(page) {
const links = await page.locator("a").evaluateAll(anchors =>
anchors
.map(a => a.href)
.filter(Boolean)
.filter(h => /\/shop\/powder-coating-colors\/[A-Z0-9-]+\//i.test(h))
);
return links.map(cleanUrl).filter(Boolean);
}
async function scrollAndCollect(page, urls, label) {
let noNewScrolls = 0;
let totalAddedForThisColor = 0;
for (let i = 0; i < maxScrollsPerColor; i++) {
const before = urls.size;
for (const link of await collectProductLinks(page)) {
urls.add(link);
}
const after = urls.size;
const added = after - before;
totalAddedForThisColor += added;
if (added === 0) {
noNewScrolls++;
} else {
noNewScrolls = 0;
}
writeUrls(urls);
console.log(`[${label}] Scroll ${i + 1}/${maxScrollsPerColor}: +${added}, total ${after}, no-new ${noNewScrolls}`);
if (noNewScrolls >= stopAfterNoNewScrolls) {
break;
}
await page.mouse.wheel(0, 2500);
await page.waitForTimeout(1500);
}
return totalAddedForThisColor;
}
(async () => {
const existingUrls = readExistingUrls();
const urls = new Set(existingUrls);
const log = readLog();
console.log(`Existing URLs in ${outputFile}: ${existingUrls.length}`);
const browser = await chromium.launch({ headless: !headed });
const context = await browser.newContext({
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
viewport: { width: 1365, height: 900 },
locale: "en-US",
timezoneId: "America/New_York"
});
const page = await context.newPage();
const runRecord = {
started_at: new Date().toISOString(),
existing_at_start: existingUrls.length,
colors_attempted: []
};
for (const color of colorParams) {
if (log.completed_colors[color]) {
console.log(`Skipping completed color: ${color}`);
continue;
}
const url = `${baseUrl}?color=${encodeURIComponent(color)}`;
console.log("");
console.log(`Opening color filter: ${color}`);
console.log(url);
try {
const response = await page.goto(url, {
waitUntil: "domcontentloaded",
timeout: 60000
});
const status = response ? response.status() : "unknown";
console.log(`HTTP status: ${status}`);
await page.waitForTimeout(5000);
const before = urls.size;
const addedDuringScroll = await scrollAndCollect(page, urls, color);
const after = urls.size;
const netAdded = after - before;
log.completed_colors[color] = {
url,
http_status: status,
added: netAdded,
added_during_scroll: addedDuringScroll,
total_after: after,
completed_at: new Date().toISOString()
};
runRecord.colors_attempted.push({
color,
url,
http_status: status,
added: netAdded,
total_after: after
});
writeLog(log);
writeUrls(urls);
console.log(`Color complete: ${color}; added ${netAdded}; total ${after}`);
// Polite pause between filters.
await page.waitForTimeout(3000);
} catch (err) {
console.log(`Color failed: ${color}; ${err.message}`);
runRecord.colors_attempted.push({
color,
url,
added: 0,
error: err.message
});
writeLog(log);
}
}
runRecord.finished_at = new Date().toISOString();
runRecord.final_total = urls.size;
runRecord.new_this_run = urls.size - existingUrls.length;
log.runs.push(runRecord);
writeLog(log);
writeUrls(urls);
console.log("");
console.log("Color-param discovery complete.");
console.log(`Existing at start: ${existingUrls.length}`);
console.log(`Final total: ${urls.size}`);
console.log(`New this run: ${urls.size - existingUrls.length}`);
console.log(`Output: ${outputFile}`);
console.log(`Log: ${logFile}`);
await browser.close();
})();
+60
View File
@@ -0,0 +1,60 @@
{
"name": "web-scraping",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "web-scraping",
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"playwright": "^1.59.1"
}
},
"node_modules/fsevents": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
"integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
"hasInstallScript": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
}
},
"node_modules/playwright": {
"version": "1.59.1",
"resolved": "https://registry.npmjs.org/playwright/-/playwright-1.59.1.tgz",
"integrity": "sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw==",
"license": "Apache-2.0",
"dependencies": {
"playwright-core": "1.59.1"
},
"bin": {
"playwright": "cli.js"
},
"engines": {
"node": ">=18"
},
"optionalDependencies": {
"fsevents": "2.3.2"
}
},
"node_modules/playwright-core": {
"version": "1.59.1",
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.59.1.tgz",
"integrity": "sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg==",
"license": "Apache-2.0",
"bin": {
"playwright-core": "cli.js"
},
"engines": {
"node": ">=18"
}
}
}
}
+16
View File
@@ -0,0 +1,16 @@
{
"name": "web-scraping",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"type": "commonjs",
"dependencies": {
"playwright": "^1.59.1"
}
}
@@ -0,0 +1,189 @@
const fs = require("fs");
const { chromium } = require("playwright");
const headed = process.argv.includes("--headed");
const productUrls = [
"https://www.prismaticpowders.com/shop/powder-coating-colors/PSS-11248/high-gloss-black"
];
const outputJson = "prismatic_powders.json";
function clean(text) {
return (text || "").replace(/\s+/g, " ").trim();
}
function absoluteUrl(baseUrl, maybeUrl) {
if (!maybeUrl) return "";
try {
return new URL(maybeUrl, baseUrl).href;
} catch {
return maybeUrl;
}
}
function unique(items) {
return [...new Set(items.filter(Boolean).map(clean).filter(Boolean))];
}
async function getLinkByText(page, patterns) {
const links = await page.locator("a").evaluateAll((anchors) =>
anchors.map(a => ({
text: (a.innerText || a.textContent || "").replace(/\s+/g, " ").trim(),
href: a.getAttribute("href") || ""
}))
);
for (const link of links) {
if (patterns.some(p => new RegExp(p, "i").test(link.text))) {
return absoluteUrl(page.url(), link.href);
}
}
return "";
}
function parsePriceTiers(plainText) {
const priceMatches = [...plainText.matchAll(/(\d+\s*-\s*\d+\s*lbs|\d+\s*\+\s*lbs)\s*\$([\d.]+)/gi)];
return priceMatches.map(m => {
const rangeText = clean(m[1]);
const price = parseFloat(m[2]);
let min = null;
let max = null;
const rangeMatch = rangeText.match(/(\d+)\s*-\s*(\d+)/);
if (rangeMatch) {
min = parseInt(rangeMatch[1], 10);
max = parseInt(rangeMatch[2], 10);
}
const plusMatch = rangeText.match(/(\d+)\s*\+/);
if (plusMatch) {
min = parseInt(plusMatch[1], 10);
max = null;
}
return {
min,
max,
price
};
});
}
async function getSampleImageUrl(page) {
const imageUrls = await page.locator("img").evaluateAll((imgs) =>
imgs.map(img =>
img.currentSrc ||
img.src ||
img.getAttribute("src") ||
img.getAttribute("data-src") ||
""
).filter(Boolean)
);
return (
imageUrls.find(src => /images\.nicindustries\.com/i.test(src) && !/thumbnail/i.test(src)) ||
imageUrls.find(src => /images\.nicindustries\.com/i.test(src)) ||
imageUrls.find(src => /prismatic|powder|color/i.test(src)) ||
""
);
}
async function parseProduct(page, url) {
console.log(`Scraping ${url}`);
const response = await page.goto(url, {
waitUntil: "domcontentloaded",
timeout: 60000
});
await page.waitForTimeout(3000);
const status = response ? response.status() : 0;
const pageTitle = clean(await page.title().catch(() => ""));
const plainText = clean(await page.locator("body").innerText().catch(() => ""));
console.log(`HTTP status: ${status}`);
console.log(`Page title: ${pageTitle}`);
// Do not silently output a fake product if blocked.
if (status === 403 || /^403 Forbidden$/i.test(pageTitle) || /^403 Forbidden$/i.test(plainText)) {
throw new Error("403 Forbidden returned by site.");
}
const title = clean(await page.locator("h1").first().innerText().catch(() => ""));
const skuMatch = plainText.match(/Item:\s*([A-Z0-9-]+)/i);
const sku = skuMatch ? skuMatch[1] : "";
const descMatch = plainText.match(/Description:\s*(.*?)(WARNING:|What does this match\?|$)/is);
const description = descMatch ? clean(descMatch[1]) : "";
const priceTiers = parsePriceTiers(plainText);
const safetyDataSheetUrl = await getLinkByText(page, ["Safety Data Sheet", "\\bSDS\\b"]);
const applicationGuideUrl = await getLinkByText(page, ["Application Guide"]);
const technicalDataSheetUrl = await getLinkByText(page, ["Tech Data Sheet", "Technical Data Sheet", "\\bTDS\\b"]);
const sampleImageUrl = await getSampleImageUrl(page);
return {
sku,
color_name: title,
description,
price_tiers: priceTiers,
safety_data_sheet_url: safetyDataSheetUrl,
technical_data_sheet_url: technicalDataSheetUrl,
application_guide_url: applicationGuideUrl,
sample_image_url: sampleImageUrl,
product_url: url,
scraped_at: new Date().toISOString()
};
}
(async () => {
const browser = await chromium.launch({
headless: !headed
});
const context = await browser.newContext({
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
viewport: { width: 1365, height: 900 },
locale: "en-US",
timezoneId: "America/New_York"
});
const page = await context.newPage();
const results = [];
const errors = [];
for (const url of productUrls) {
try {
const row = await parseProduct(page, url);
results.push(row);
await page.waitForTimeout(3000);
} catch (err) {
console.warn(`Failed ${url}: ${err.message}`);
errors.push({
product_url: url,
error: err.message,
scraped_at: new Date().toISOString()
});
}
}
await browser.close();
// If you prefer only the array, change this to JSON.stringify(results, null, 2)
const output = {
results,
errors
};
fs.writeFileSync(outputJson, JSON.stringify(output, null, 2), "utf8");
console.log(`Done. Output: ${outputJson}`);
})();
@@ -0,0 +1,33 @@
{
"results": [
{
"sku": "PSS-11248",
"color_name": "High Gloss Black",
"description": "Prismatic Powders developed High Gloss Black to be the only high gloss black powder coating you will ever need. It has an incredibly deep, mirror like finish with amazing flow out, yielding the highest gloss, true black finish available in a powder coating. High Gloss Black is a polyester solid tone and is the best option to use as a base coat with all of our clear metallics, because of its mirror-like finish. Gloss Units: 85+ Proposition 65 Warning",
"price_tiers": [
{
"min": 1,
"max": 49,
"price": 12.47
},
{
"min": 50,
"max": 199,
"price": 11.85
},
{
"min": 200,
"max": null,
"price": 11.22
}
],
"safety_data_sheet_url": "https://images.nicindustries.com/prismatic/documents/8099/prismatic-powders-p-series-sds-dt20260126212318272746.pdf?1769462600",
"technical_data_sheet_url": "https://images.nicindustries.com/prismatic/documents/5536/pss-11248-high-gloss-black-tds-dt20240111230300658308.pdf?1705014182",
"application_guide_url": "https://images.nicindustries.com/prismatic/documents/2274/prismatic-powders-application-guide-dt20230508192819506132.pdf?1683574101",
"sample_image_url": "https://images.nicindustries.com/prismatic/products/15027/high-gloss-black-pss-11248-dt20251107212621375559-thumbnail.jpg?size=600",
"product_url": "https://www.prismaticpowders.com/shop/powder-coating-colors/PSS-11248/high-gloss-black",
"scraped_at": "2026-04-30T12:52:36.244Z"
}
],
"errors": []
}
File diff suppressed because it is too large Load Diff
Binary file not shown.
@@ -0,0 +1,270 @@
{
"completed_colors": {
"pris_black": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_black",
"http_status": 200,
"added": 472,
"added_during_scroll": 472,
"total_after": 472,
"completed_at": "2026-04-30T00:47:46.289Z"
},
"pris_blue": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_blue",
"http_status": 200,
"added": 948,
"added_during_scroll": 948,
"total_after": 1420,
"completed_at": "2026-04-30T00:49:25.145Z"
},
"pris_bronze": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_bronze",
"http_status": 200,
"added": 358,
"added_during_scroll": 358,
"total_after": 1778,
"completed_at": "2026-04-30T00:50:18.466Z"
},
"pris_brown": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_brown",
"http_status": 200,
"added": 373,
"added_during_scroll": 373,
"total_after": 2151,
"completed_at": "2026-04-30T00:51:18.033Z"
},
"pris_clear": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_clear",
"http_status": 200,
"added": 19,
"added_during_scroll": 19,
"total_after": 2170,
"completed_at": "2026-04-30T00:51:42.889Z"
},
"pris_copper": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_copper",
"http_status": 200,
"added": 1094,
"added_during_scroll": 1094,
"total_after": 3264,
"completed_at": "2026-04-30T00:56:34.934Z"
},
"pris_gold": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_gold",
"http_status": 200,
"added": 152,
"added_during_scroll": 152,
"total_after": 3416,
"completed_at": "2026-04-30T00:57:26.775Z"
},
"pris_gray": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_gray",
"http_status": 200,
"added": 0,
"added_during_scroll": 0,
"total_after": 3416,
"completed_at": "2026-04-30T00:57:49.624Z"
},
"pris_green": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_green",
"http_status": 200,
"added": 0,
"added_during_scroll": 0,
"total_after": 3416,
"completed_at": "2026-04-30T00:58:12.277Z"
},
"pris_orange": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_orange",
"http_status": 200,
"added": 233,
"added_during_scroll": 233,
"total_after": 3649,
"completed_at": "2026-04-30T00:59:06.776Z"
},
"pris_pink": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_pink",
"http_status": 200,
"added": 169,
"added_during_scroll": 169,
"total_after": 3818,
"completed_at": "2026-04-30T00:59:49.323Z"
},
"pris_purple": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_purple",
"http_status": 200,
"added": 182,
"added_during_scroll": 182,
"total_after": 4000,
"completed_at": "2026-04-30T01:00:38.111Z"
},
"pris_red": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_red",
"http_status": 200,
"added": 346,
"added_during_scroll": 346,
"total_after": 4346,
"completed_at": "2026-04-30T01:01:51.910Z"
},
"pris_silver": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_silver",
"http_status": 200,
"added": 210,
"added_during_scroll": 210,
"total_after": 4556,
"completed_at": "2026-04-30T01:02:51.835Z"
},
"pris_tan": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_tan",
"http_status": 200,
"added": 219,
"added_during_scroll": 219,
"total_after": 4775,
"completed_at": "2026-04-30T01:03:43.244Z"
},
"pris_white": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_white",
"http_status": 200,
"added": 218,
"added_during_scroll": 218,
"total_after": 4993,
"completed_at": "2026-04-30T01:04:39.931Z"
},
"pris_yellow": {
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_yellow",
"http_status": 200,
"added": 199,
"added_during_scroll": 199,
"total_after": 5192,
"completed_at": "2026-04-30T01:05:31.945Z"
}
},
"runs": [
{
"started_at": "2026-04-30T00:46:47.692Z",
"existing_at_start": 0,
"colors_attempted": [
{
"color": "pris_black",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_black",
"http_status": 200,
"added": 472,
"total_after": 472
},
{
"color": "pris_blue",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_blue",
"http_status": 200,
"added": 948,
"total_after": 1420
},
{
"color": "pris_bronze",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_bronze",
"http_status": 200,
"added": 358,
"total_after": 1778
},
{
"color": "pris_brown",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_brown",
"http_status": 200,
"added": 373,
"total_after": 2151
},
{
"color": "pris_clear",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_clear",
"http_status": 200,
"added": 19,
"total_after": 2170
},
{
"color": "pris_copper",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_copper",
"http_status": 200,
"added": 1094,
"total_after": 3264
},
{
"color": "pris_gold",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_gold",
"http_status": 200,
"added": 152,
"total_after": 3416
},
{
"color": "pris_gray",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_gray",
"http_status": 200,
"added": 0,
"total_after": 3416
},
{
"color": "pris_green",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_green",
"http_status": 200,
"added": 0,
"total_after": 3416
},
{
"color": "pris_orange",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_orange",
"http_status": 200,
"added": 233,
"total_after": 3649
},
{
"color": "pris_pink",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_pink",
"http_status": 200,
"added": 169,
"total_after": 3818
},
{
"color": "pris_purple",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_purple",
"http_status": 200,
"added": 182,
"total_after": 4000
},
{
"color": "pris_red",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_red",
"http_status": 200,
"added": 346,
"total_after": 4346
},
{
"color": "pris_silver",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_silver",
"http_status": 200,
"added": 210,
"total_after": 4556
},
{
"color": "pris_tan",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_tan",
"http_status": 200,
"added": 219,
"total_after": 4775
},
{
"color": "pris_white",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_white",
"http_status": 200,
"added": 218,
"total_after": 4993
},
{
"color": "pris_yellow",
"url": "https://www.prismaticpowders.com/shop/powder-coating-colors?color=pris_yellow",
"http_status": 200,
"added": 199,
"total_after": 5192
}
],
"finished_at": "2026-04-30T01:05:34.987Z",
"final_total": 5192,
"new_this_run": 5192
}
]
}
@@ -0,0 +1,237 @@
const fs = require("fs");
const { chromium } = require("playwright");
const headed = process.argv.includes("--headed");
function getArgValue(name, defaultValue) {
const prefix = `--${name}=`;
const found = process.argv.find(x => x.startsWith(prefix));
return found ? found.slice(prefix.length) : defaultValue;
}
const maxScrollsPerColor = parseInt(getArgValue("max-scrolls-per-color", "180"), 10);
const stopAfterNoNewScrolls = parseInt(getArgValue("stop-after-no-new-scrolls", "10"), 10);
const baseUrl = "https://www.prismaticpowders.com/shop/powder-coating-colors";
const outputFile = "product-urls.txt";
const logFile = "color-discovery-log.json";
// Update this list if you find more color params in the site HTML.
const colorParams = [
"pris_black",
"pris_blue",
"pris_bronze",
"pris_brown",
"pris_clear",
"pris_copper",
"pris_gold",
"pris_gray",
"pris_green",
"pris_orange",
"pris_pink",
"pris_purple",
"pris_red",
"pris_silver",
"pris_tan",
"pris_white",
"pris_yellow"
];
function cleanUrl(url) {
return (url || "").split("?")[0].split("#")[0].trim();
}
function isProductUrl(url) {
return /\/shop\/powder-coating-colors\/[A-Z0-9-]+\//i.test(url || "");
}
function readExistingUrls() {
if (!fs.existsSync(outputFile)) return [];
return fs.readFileSync(outputFile, "utf8")
.split(/\r?\n/)
.map(cleanUrl)
.filter(Boolean);
}
function writeUrls(urls) {
const sorted = [...urls].sort();
fs.writeFileSync(outputFile, sorted.join("\r\n") + "\r\n", "utf8");
}
function readLog() {
if (!fs.existsSync(logFile)) {
return {
completed_colors: {},
runs: []
};
}
try {
return JSON.parse(fs.readFileSync(logFile, "utf8"));
} catch {
return {
completed_colors: {},
runs: []
};
}
}
function writeLog(log) {
fs.writeFileSync(logFile, JSON.stringify(log, null, 2), "utf8");
}
async function collectProductLinks(page) {
const links = await page.locator("a").evaluateAll(anchors =>
anchors
.map(a => a.href)
.filter(Boolean)
.filter(h => /\/shop\/powder-coating-colors\/[A-Z0-9-]+\//i.test(h))
);
return links.map(cleanUrl).filter(Boolean);
}
async function scrollAndCollect(page, urls, label) {
let noNewScrolls = 0;
let totalAddedForThisColor = 0;
for (let i = 0; i < maxScrollsPerColor; i++) {
const before = urls.size;
for (const link of await collectProductLinks(page)) {
urls.add(link);
}
const after = urls.size;
const added = after - before;
totalAddedForThisColor += added;
if (added === 0) {
noNewScrolls++;
} else {
noNewScrolls = 0;
}
writeUrls(urls);
console.log(`[${label}] Scroll ${i + 1}/${maxScrollsPerColor}: +${added}, total ${after}, no-new ${noNewScrolls}`);
if (noNewScrolls >= stopAfterNoNewScrolls) {
break;
}
await page.mouse.wheel(0, 2500);
await page.waitForTimeout(1500);
}
return totalAddedForThisColor;
}
(async () => {
const existingUrls = readExistingUrls();
const urls = new Set(existingUrls);
const log = readLog();
console.log(`Existing URLs in ${outputFile}: ${existingUrls.length}`);
const browser = await chromium.launch({ headless: !headed });
const context = await browser.newContext({
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
viewport: { width: 1365, height: 900 },
locale: "en-US",
timezoneId: "America/New_York"
});
const page = await context.newPage();
const runRecord = {
started_at: new Date().toISOString(),
existing_at_start: existingUrls.length,
colors_attempted: []
};
for (const color of colorParams) {
if (log.completed_colors[color]) {
console.log(`Skipping completed color: ${color}`);
continue;
}
const url = `${baseUrl}?color=${encodeURIComponent(color)}`;
console.log("");
console.log(`Opening color filter: ${color}`);
console.log(url);
try {
const response = await page.goto(url, {
waitUntil: "domcontentloaded",
timeout: 60000
});
const status = response ? response.status() : "unknown";
console.log(`HTTP status: ${status}`);
await page.waitForTimeout(5000);
const before = urls.size;
const addedDuringScroll = await scrollAndCollect(page, urls, color);
const after = urls.size;
const netAdded = after - before;
log.completed_colors[color] = {
url,
http_status: status,
added: netAdded,
added_during_scroll: addedDuringScroll,
total_after: after,
completed_at: new Date().toISOString()
};
runRecord.colors_attempted.push({
color,
url,
http_status: status,
added: netAdded,
total_after: after
});
writeLog(log);
writeUrls(urls);
console.log(`Color complete: ${color}; added ${netAdded}; total ${after}`);
// Polite pause between filters.
await page.waitForTimeout(3000);
} catch (err) {
console.log(`Color failed: ${color}; ${err.message}`);
runRecord.colors_attempted.push({
color,
url,
added: 0,
error: err.message
});
writeLog(log);
}
}
runRecord.finished_at = new Date().toISOString();
runRecord.final_total = urls.size;
runRecord.new_this_run = urls.size - existingUrls.length;
log.runs.push(runRecord);
writeLog(log);
writeUrls(urls);
console.log("");
console.log("Color-param discovery complete.");
console.log(`Existing at start: ${existingUrls.length}`);
console.log(`Final total: ${urls.size}`);
console.log(`New this run: ${urls.size - existingUrls.length}`);
console.log(`Output: ${outputFile}`);
console.log(`Log: ${logFile}`);
await browser.close();
})();
+60
View File
@@ -0,0 +1,60 @@
{
"name": "web-scraping",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "web-scraping",
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"playwright": "^1.59.1"
}
},
"node_modules/fsevents": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
"integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
"hasInstallScript": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
}
},
"node_modules/playwright": {
"version": "1.59.1",
"resolved": "https://registry.npmjs.org/playwright/-/playwright-1.59.1.tgz",
"integrity": "sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw==",
"license": "Apache-2.0",
"dependencies": {
"playwright-core": "1.59.1"
},
"bin": {
"playwright": "cli.js"
},
"engines": {
"node": ">=18"
},
"optionalDependencies": {
"fsevents": "2.3.2"
}
},
"node_modules/playwright-core": {
"version": "1.59.1",
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.59.1.tgz",
"integrity": "sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg==",
"license": "Apache-2.0",
"bin": {
"playwright-core": "cli.js"
},
"engines": {
"node": ">=18"
}
}
}
}
+16
View File
@@ -0,0 +1,16 @@
{
"name": "web-scraping",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"type": "commonjs",
"dependencies": {
"playwright": "^1.59.1"
}
}
@@ -0,0 +1,298 @@
const fs = require("fs");
const { chromium } = require("playwright");
const headed = process.argv.includes("--headed");
const retryErrors = process.argv.includes("--retry-errors");
function getArgValue(name, defaultValue) {
const prefix = `--${name}=`;
const found = process.argv.find(x => x.startsWith(prefix));
return found ? found.slice(prefix.length) : defaultValue;
}
const inputFile = getArgValue("input-file", "product-urls.txt");
const outputJson = getArgValue("output-json", "prismatic_powders.json");
const progressLog = getArgValue("progress-log", "prismatic-scrape-progress.log");
const minDelaySeconds = parseInt(getArgValue("min-delay-seconds", "8"), 10);
const maxDelaySeconds = parseInt(getArgValue("max-delay-seconds", "18"), 10);
const pageSettleSeconds = parseInt(getArgValue("page-settle-seconds", "4"), 10);
const maxProducts = parseInt(getArgValue("max-products", "0"), 10);
function clean(text) {
return (text || "").replace(/\s+/g, " ").trim();
}
function cleanUrl(url) {
return (url || "").split("?")[0].split("#")[0].trim();
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
function randomDelayMs() {
const minMs = Math.max(0, minDelaySeconds * 1000);
const maxMs = Math.max(minMs, maxDelaySeconds * 1000);
return Math.floor(minMs + Math.random() * (maxMs - minMs + 1));
}
function logLine(message) {
const line = `[${new Date().toISOString()}] ${message}`;
console.log(line);
fs.appendFileSync(progressLog, line + "\r\n", "utf8");
}
function absoluteUrl(baseUrl, maybeUrl) {
if (!maybeUrl) return "";
try {
return new URL(maybeUrl, baseUrl).href;
} catch {
return maybeUrl;
}
}
function loadInputUrls() {
if (!fs.existsSync(inputFile)) {
throw new Error(`Input file not found: ${inputFile}`);
}
const urls = fs.readFileSync(inputFile, "utf8")
.split(/\r?\n/)
.map(cleanUrl)
.filter(Boolean)
.filter(x => !x.startsWith("#"))
.filter(x => /\/shop\/powder-coating-colors\/[A-Z0-9-]+\//i.test(x));
return [...new Set(urls)];
}
function loadOutput() {
if (!fs.existsSync(outputJson)) {
return { results: [], errors: [] };
}
try {
const parsed = JSON.parse(fs.readFileSync(outputJson, "utf8"));
if (Array.isArray(parsed)) {
return { results: parsed, errors: [] };
}
return {
results: Array.isArray(parsed.results) ? parsed.results : [],
errors: Array.isArray(parsed.errors) ? parsed.errors : []
};
} catch (err) {
const backup = `${outputJson}.invalid-${Date.now()}.bak`;
fs.copyFileSync(outputJson, backup);
throw new Error(`Could not parse existing ${outputJson}. Backed it up to ${backup}. Error: ${err.message}`);
}
}
function saveOutput(data) {
const tempFile = `${outputJson}.tmp`;
fs.writeFileSync(tempFile, JSON.stringify(data, null, 2), "utf8");
fs.renameSync(tempFile, outputJson);
}
function parsePriceTiers(plainText) {
const priceMatches = [...plainText.matchAll(/(\d+\s*-\s*\d+\s*lbs|\d+\s*\+\s*lbs)\s*\$([\d.]+)/gi)];
return priceMatches.map(m => {
const rangeText = clean(m[1]);
const price = parseFloat(m[2]);
let min = null;
let max = null;
const rangeMatch = rangeText.match(/(\d+)\s*-\s*(\d+)/);
if (rangeMatch) {
min = parseInt(rangeMatch[1], 10);
max = parseInt(rangeMatch[2], 10);
}
const plusMatch = rangeText.match(/(\d+)\s*\+/);
if (plusMatch) {
min = parseInt(plusMatch[1], 10);
max = null;
}
return { min, max, price };
});
}
async function getLinkByText(page, patterns) {
const links = await page.locator("a").evaluateAll((anchors) =>
anchors.map(a => ({
text: (a.innerText || a.textContent || "").replace(/\s+/g, " ").trim(),
href: a.getAttribute("href") || ""
}))
);
for (const link of links) {
if (patterns.some(p => new RegExp(p, "i").test(link.text))) {
return absoluteUrl(page.url(), link.href);
}
}
return "";
}
async function getSampleImageUrl(page) {
const imageUrls = await page.locator("img").evaluateAll((imgs) =>
imgs.map(img =>
img.currentSrc ||
img.src ||
img.getAttribute("src") ||
img.getAttribute("data-src") ||
""
).filter(Boolean)
);
return (
imageUrls.find(src => /images\.nicindustries\.com/i.test(src) && !/thumbnail/i.test(src)) ||
imageUrls.find(src => /images\.nicindustries\.com/i.test(src)) ||
imageUrls.find(src => /prismatic|powder|color/i.test(src)) ||
""
);
}
async function parseProduct(page, url) {
logLine(`Scraping ${url}`);
const response = await page.goto(url, {
waitUntil: "domcontentloaded",
timeout: 60000
});
await page.waitForTimeout(pageSettleSeconds * 1000);
const status = response ? response.status() : 0;
const pageTitle = clean(await page.title().catch(() => ""));
const plainText = clean(await page.locator("body").innerText().catch(() => ""));
logLine(`HTTP status ${status}; title "${pageTitle}"`);
if (status === 403 || /^403 Forbidden$/i.test(pageTitle) || /^403 Forbidden$/i.test(plainText)) {
throw new Error("403 Forbidden returned by site.");
}
if (status === 404 || /404|Page Not Found/i.test(pageTitle)) {
throw new Error("404 Not Found returned by site.");
}
const title = clean(await page.locator("h1").first().innerText().catch(() => ""));
const skuMatch = plainText.match(/Item:\s*([A-Z0-9-]+)/i);
const sku = skuMatch ? skuMatch[1] : "";
if (!sku && !title) {
throw new Error("Could not find SKU or title on product page.");
}
const descMatch = plainText.match(/Description:\s*(.*?)(WARNING:|What does this match\?|$)/is);
const description = descMatch ? clean(descMatch[1]) : "";
const priceTiers = parsePriceTiers(plainText);
const safetyDataSheetUrl = await getLinkByText(page, ["Safety Data Sheet", "\\bSDS\\b"]);
const applicationGuideUrl = await getLinkByText(page, ["Application Guide"]);
const technicalDataSheetUrl = await getLinkByText(page, ["Tech Data Sheet", "Technical Data Sheet", "\\bTDS\\b"]);
const sampleImageUrl = await getSampleImageUrl(page);
return {
sku,
color_name: title,
description,
price_tiers: priceTiers,
safety_data_sheet_url: safetyDataSheetUrl,
technical_data_sheet_url: technicalDataSheetUrl,
application_guide_url: applicationGuideUrl,
sample_image_url: sampleImageUrl,
product_url: url,
scraped_at: new Date().toISOString()
};
}
(async () => {
const allUrls = loadInputUrls();
const data = loadOutput();
const completedUrls = new Set(data.results.map(r => cleanUrl(r.product_url)).filter(Boolean));
const errorUrls = new Set(data.errors.map(e => cleanUrl(e.product_url)).filter(Boolean));
let remainingUrls = allUrls.filter(url => {
if (completedUrls.has(url)) return false;
if (!retryErrors && errorUrls.has(url)) return false;
return true;
});
if (maxProducts > 0) {
remainingUrls = remainingUrls.slice(0, maxProducts);
}
logLine(`Input URLs: ${allUrls.length}`);
logLine(`Already scraped: ${completedUrls.size}`);
logLine(`Existing errors: ${errorUrls.size}`);
logLine(`Retry errors: ${retryErrors ? "yes" : "no"}`);
logLine(`This run target count: ${remainingUrls.length}`);
logLine(`Delay range: ${minDelaySeconds}-${maxDelaySeconds} seconds; page settle: ${pageSettleSeconds} seconds`);
if (remainingUrls.length === 0) {
logLine("Nothing to scrape. Done.");
saveOutput(data);
return;
}
const browser = await chromium.launch({
headless: !headed
});
const context = await browser.newContext({
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
viewport: { width: 1365, height: 900 },
locale: "en-US",
timezoneId: "America/New_York"
});
const page = await context.newPage();
let processedThisRun = 0;
for (const url of remainingUrls) {
try {
const row = await parseProduct(page, url);
// If retrying an old error, keep the old error history but avoid duplicate successful result.
if (!completedUrls.has(url)) {
data.results.push(row);
completedUrls.add(url);
}
processedThisRun++;
saveOutput(data);
logLine(`Saved result ${processedThisRun}/${remainingUrls.length}: ${row.sku || "(no sku)"} ${row.color_name || ""}`);
} catch (err) {
const errorRecord = {
product_url: url,
error: err.message,
scraped_at: new Date().toISOString()
};
data.errors.push(errorRecord);
saveOutput(data);
logLine(`ERROR ${url}: ${err.message}`);
}
const delay = randomDelayMs();
logLine(`Waiting ${(delay / 1000).toFixed(1)} seconds before next product...`);
await sleep(delay);
}
await browser.close();
logLine(`Done. Results: ${data.results.length}; Errors: ${data.errors.length}; Output: ${outputJson}`);
})();
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large Load Diff