diff --git a/scripts/Prismatic Data Scraper/.gitignore b/scripts/Prismatic Data Scraper/.gitignore
new file mode 100644
index 0000000..657abbc
--- /dev/null
+++ b/scripts/Prismatic Data Scraper/.gitignore
@@ -0,0 +1,8 @@
+# Build output
+bin/
+obj/
+
+# Transient scrape artifacts
+*.tmp
+*.invalid-*.bak
+prismatic-sync.log
diff --git a/scripts/Prismatic Data Scraper/Infrastructure/BrowserSession.cs b/scripts/Prismatic Data Scraper/Infrastructure/BrowserSession.cs
new file mode 100644
index 0000000..0443aa5
--- /dev/null
+++ b/scripts/Prismatic Data Scraper/Infrastructure/BrowserSession.cs
@@ -0,0 +1,43 @@
+using Microsoft.Playwright;
+
+namespace PrismaticSync.Infrastructure;
+
+///
+/// A headless Chromium session with a realistic desktop fingerprint (UA, viewport, locale,
+/// timezone) — matching the original scraper's settings to look like a normal browser.
+///
+public sealed class BrowserSession : IAsyncDisposable
+{
+ private IPlaywright? _pw;
+ private IBrowser? _browser;
+ private IBrowserContext? _context;
+
+ public IPage Page { get; private set; } = null!;
+
+ public static async Task CreateAsync(bool headed)
+ {
+ var session = new BrowserSession();
+ session._pw = await Playwright.CreateAsync();
+ session._browser = await session._pw.Chromium.LaunchAsync(new BrowserTypeLaunchOptions
+ {
+ Headless = !headed
+ });
+ session._context = await session._browser.NewContextAsync(new BrowserNewContextOptions
+ {
+ UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
+ "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
+ ViewportSize = new ViewportSize { Width = 1365, Height = 900 },
+ Locale = "en-US",
+ TimezoneId = "America/New_York"
+ });
+ session.Page = await session._context.NewPageAsync();
+ return session;
+ }
+
+ public async ValueTask DisposeAsync()
+ {
+ if (_context is not null) await _context.CloseAsync();
+ if (_browser is not null) await _browser.CloseAsync();
+ _pw?.Dispose();
+ }
+}
diff --git a/scripts/Prismatic Data Scraper/Infrastructure/JsonStore.cs b/scripts/Prismatic Data Scraper/Infrastructure/JsonStore.cs
new file mode 100644
index 0000000..5ee544b
--- /dev/null
+++ b/scripts/Prismatic Data Scraper/Infrastructure/JsonStore.cs
@@ -0,0 +1,65 @@
+using System.Text.Json;
+using PrismaticSync.Models;
+
+namespace PrismaticSync.Infrastructure;
+
+/// Loads/saves the scrape output and the URL list, with atomic writes so a crash mid-save can't corrupt them.
+public static class JsonStore
+{
+ private static readonly JsonSerializerOptions WriteOptions = new() { WriteIndented = true };
+ private static readonly JsonSerializerOptions ReadOptions = new() { PropertyNameCaseInsensitive = true };
+
+ public static ScrapeOutput LoadOutput(string path)
+ {
+ if (!File.Exists(path))
+ return new ScrapeOutput();
+
+ var json = File.ReadAllText(path);
+ try
+ {
+ // Tolerate a bare array (older output format) as well as { results, errors }.
+ if (json.TrimStart().StartsWith("["))
+ {
+ var results = JsonSerializer.Deserialize>(json, ReadOptions) ?? new();
+ return new ScrapeOutput { Results = results };
+ }
+ return JsonSerializer.Deserialize(json, ReadOptions) ?? new ScrapeOutput();
+ }
+ catch (Exception ex)
+ {
+ var backup = $"{path}.invalid-{DateTimeOffset.UtcNow.ToUnixTimeSeconds()}.bak";
+ File.Copy(path, backup, overwrite: true);
+ throw new InvalidOperationException($"Could not parse {path}. Backed it up to {backup}. {ex.Message}");
+ }
+ }
+
+ public static void SaveOutput(string path, ScrapeOutput data)
+ {
+ var tmp = path + ".tmp";
+ File.WriteAllText(tmp, JsonSerializer.Serialize(data, WriteOptions));
+ File.Move(tmp, path, overwrite: true);
+ }
+
+ public static List LoadUrls(string path)
+ {
+ if (!File.Exists(path))
+ return new List();
+
+ return File.ReadAllLines(path)
+ .Select(CleanUrl)
+ .Where(u => u.Length > 0 && !u.StartsWith("#"))
+ .Distinct(StringComparer.OrdinalIgnoreCase)
+ .ToList();
+ }
+
+ public static void SaveUrls(string path, IEnumerable urls)
+ {
+ var sorted = urls.Distinct(StringComparer.OrdinalIgnoreCase).OrderBy(u => u, StringComparer.OrdinalIgnoreCase);
+ var tmp = path + ".tmp";
+ File.WriteAllText(tmp, string.Join(Environment.NewLine, sorted) + Environment.NewLine);
+ File.Move(tmp, path, overwrite: true);
+ }
+
+ public static string CleanUrl(string? url) =>
+ (url ?? string.Empty).Split('?')[0].Split('#')[0].Trim();
+}
diff --git a/scripts/Prismatic Data Scraper/Infrastructure/Log.cs b/scripts/Prismatic Data Scraper/Infrastructure/Log.cs
new file mode 100644
index 0000000..6a6df6b
--- /dev/null
+++ b/scripts/Prismatic Data Scraper/Infrastructure/Log.cs
@@ -0,0 +1,49 @@
+namespace PrismaticSync.Infrastructure;
+
+///
+/// Minimal timestamped logger — writes to the console and appends to a rolling log file so an
+/// unattended (Task Scheduler) run leaves an audit trail. Intentionally dependency-free.
+///
+public static class Log
+{
+ private static string _logFile = "prismatic-sync.log";
+ private static readonly object Gate = new();
+
+ public static void Configure(string logFile) => _logFile = logFile;
+
+ public static void Info(string message) => Write("INFO", message);
+ public static void Warn(string message) => Write("WARN", message);
+ public static void Error(string message) => Write("ERROR", message);
+
+ private static void Write(string level, string message)
+ {
+ var line = $"[{DateTime.UtcNow:yyyy-MM-ddTHH:mm:ssZ}] {level,-5} {message}";
+
+ // Live console stream (visible on a manual run); color-code so warnings/errors stand out.
+ lock (Gate)
+ {
+ var color = level switch
+ {
+ "WARN" => ConsoleColor.Yellow,
+ "ERROR" => ConsoleColor.Red,
+ _ => (ConsoleColor?)null
+ };
+
+ if (color is { } c)
+ {
+ var previous = Console.ForegroundColor;
+ Console.ForegroundColor = c;
+ Console.WriteLine(line);
+ Console.ForegroundColor = previous;
+ }
+ else
+ {
+ Console.WriteLine(line);
+ }
+
+ // File trail — never let logging break a run.
+ try { File.AppendAllText(_logFile, line + Environment.NewLine); }
+ catch { /* ignore */ }
+ }
+ }
+}
diff --git a/scripts/Prismatic Data Scraper/Infrastructure/SyncConfig.cs b/scripts/Prismatic Data Scraper/Infrastructure/SyncConfig.cs
new file mode 100644
index 0000000..04e3020
--- /dev/null
+++ b/scripts/Prismatic Data Scraper/Infrastructure/SyncConfig.cs
@@ -0,0 +1,69 @@
+namespace PrismaticSync.Infrastructure;
+
+/// Strongly-typed config bound from the "Sync" section of appsettings.json.
+public class SyncConfig
+{
+ public string BaseUrl { get; set; } = "https://www.prismaticpowders.com";
+ public string ColorsPath { get; set; } = "/shop/powder-coating-colors";
+
+ public string ProductUrlsFile { get; set; } = "product-urls.txt";
+ public string OutputJsonFile { get; set; } = "prismatic_powders.json";
+ public string LogFile { get; set; } = "prismatic-sync.log";
+
+ /// Politeness delay between product scrapes (randomized within the range).
+ public int MinDelaySeconds { get; set; } = 6;
+ public int MaxDelaySeconds { get; set; } = 14;
+
+ /// On a 403/block, cool down this many seconds × the consecutive-block count, then retry.
+ public int BlockedCooldownSeconds { get; set; } = 120;
+
+ /// Upper bound on a single cooldown so escalation can't run away.
+ public int BlockedCooldownMaxSeconds { get; set; } = 600;
+
+ /// How many times to cool-down-and-retry a blocked product before recording it as an error.
+ public int BlockedMaxRetries { get; set; } = 3;
+
+ /// Take a longer rest after this many products (0 disables). Eases load and looks less robotic.
+ public int LongRestEveryProducts { get; set; } = 150;
+
+ /// Length of the periodic long rest, in seconds.
+ public int LongRestSeconds { get; set; } = 45;
+
+ /// Extra settle time after a product page loads before reading it.
+ public int PageSettleSeconds { get; set; } = 4;
+
+ /// Pause after each scroll while a listing lazy-loads more items.
+ public int ScrollWaitMs { get; set; } = 1500;
+
+ /// Hard cap on scrolls per listing, as a safety stop.
+ public int MaxScrolls { get; set; } = 400;
+
+ /// Full discovery: stop a listing after this many scrolls add no new links.
+ public int StopAfterNoNewScrolls { get; set; } = 10;
+
+ ///
+ /// Incremental discovery: stop the newest-first listing after this many consecutive scrolls
+ /// that surfaced only already-known URLs — i.e. we've scrolled past the new products.
+ ///
+ public int StopAfterKnownScrolls { get; set; } = 8;
+
+ /// Color filter params used by full discovery.
+ public string[] ColorParams { get; set; } = Array.Empty();
+
+ public ImportConfig Import { get; set; } = new();
+
+ public string ColorsUrl => $"{BaseUrl.TrimEnd('/')}{ColorsPath}";
+}
+
+/// Where and how to push the scraped catalog into the app.
+public class ImportConfig
+{
+ /// Full URL of the app's token-authenticated catalog import endpoint.
+ public string EndpointUrl { get; set; } = "";
+
+ /// Shared secret sent in the X-Import-Token header. Must match the app's config.
+ public string Token { get; set; } = "";
+
+ /// Vendor name applied to every record on import.
+ public string VendorName { get; set; } = "Prismatic Powders";
+}
diff --git a/scripts/Prismatic Data Scraper/Models/ScrapeModels.cs b/scripts/Prismatic Data Scraper/Models/ScrapeModels.cs
new file mode 100644
index 0000000..f30d5ef
--- /dev/null
+++ b/scripts/Prismatic Data Scraper/Models/ScrapeModels.cs
@@ -0,0 +1,45 @@
+using System.Text.Json.Serialization;
+
+namespace PrismaticSync.Models;
+
+///
+/// On-disk scrape output. Shape matches the app's catalog import (a top-level "results" array of
+/// snake_case product records), so the JSON drops straight into the import endpoint. "errors" tracks
+/// failed URLs for resumable re-runs.
+///
+public class ScrapeOutput
+{
+ [JsonPropertyName("results")] public List Results { get; set; } = new();
+ [JsonPropertyName("errors")] public List Errors { get; set; } = new();
+}
+
+/// One scraped product, in the import's expected field shape.
+public class ProductRecord
+{
+ [JsonPropertyName("sku")] public string Sku { get; set; } = "";
+ [JsonPropertyName("color_name")] public string ColorName { get; set; } = "";
+ [JsonPropertyName("description")] public string Description { get; set; } = "";
+ [JsonPropertyName("price_tiers")] public List PriceTiers { get; set; } = new();
+ [JsonPropertyName("safety_data_sheet_url")] public string SafetyDataSheetUrl { get; set; } = "";
+ [JsonPropertyName("technical_data_sheet_url")] public string TechnicalDataSheetUrl { get; set; } = "";
+ [JsonPropertyName("application_guide_url")] public string ApplicationGuideUrl { get; set; } = "";
+ [JsonPropertyName("sample_image_url")] public string SampleImageUrl { get; set; } = "";
+ [JsonPropertyName("product_url")] public string ProductUrl { get; set; } = "";
+ [JsonPropertyName("scraped_at")] public DateTime ScrapedAt { get; set; }
+}
+
+/// A quantity-break price tier — {min, max, price}. max is null for an open-ended top tier.
+public class PriceTier
+{
+ [JsonPropertyName("min")] public int? Min { get; set; }
+ [JsonPropertyName("max")] public int? Max { get; set; }
+ [JsonPropertyName("price")] public decimal Price { get; set; }
+}
+
+/// A URL that failed to scrape, kept so resumable runs can skip or retry it.
+public class ScrapeError
+{
+ [JsonPropertyName("product_url")] public string ProductUrl { get; set; } = "";
+ [JsonPropertyName("error")] public string Error { get; set; } = "";
+ [JsonPropertyName("scraped_at")] public DateTime ScrapedAt { get; set; }
+}
diff --git a/scripts/Prismatic Data Scraper/PrismaticSync.csproj b/scripts/Prismatic Data Scraper/PrismaticSync.csproj
new file mode 100644
index 0000000..775228e
--- /dev/null
+++ b/scripts/Prismatic Data Scraper/PrismaticSync.csproj
@@ -0,0 +1,36 @@
+
+
+
+
+ Exe
+ net8.0
+ enable
+ enable
+ PrismaticSync
+ PrismaticSync
+ true
+
+
+
+
+
+
+
+
+
+
+
+ PreserveNewest
+
+
+
+
diff --git a/scripts/Prismatic Data Scraper/Program.cs b/scripts/Prismatic Data Scraper/Program.cs
new file mode 100644
index 0000000..4c5b624
--- /dev/null
+++ b/scripts/Prismatic Data Scraper/Program.cs
@@ -0,0 +1,106 @@
+using Microsoft.Extensions.Configuration;
+using PrismaticSync.Infrastructure;
+using PrismaticSync.Services;
+
+// ── Load config ───────────────────────────────────────────────────────────────
+var configRoot = new ConfigurationBuilder()
+ .SetBasePath(AppContext.BaseDirectory)
+ .AddJsonFile("appsettings.json", optional: false)
+ .Build();
+
+var config = configRoot.GetSection("Sync").Get() ?? new SyncConfig();
+Log.Configure(config.LogFile);
+
+// ── Parse args ────────────────────────────────────────────────────────────────
+var command = args.Length > 0 && !args[0].StartsWith("--") ? args[0].ToLowerInvariant() : "run";
+var headed = args.Contains("--headed");
+var retryErrors = args.Contains("--retry-errors");
+var maxProducts = GetIntArg("--max-products", 0);
+// "run" refreshes products older than 30 days by default; explicit commands default to new-only.
+var refreshOlderThanDays = GetIntArg("--refresh-older-than", command == "run" ? 30 : 0);
+
+Log.Info($"PrismaticSync — command '{command}' (headed={headed}, refreshOlderThan={refreshOlderThanDays}d, maxProducts={maxProducts})");
+
+try
+{
+ switch (command)
+ {
+ case "discover-new":
+ await WithBrowser(d => new PrismaticDiscoverer(d, config).DiscoverNewAsync());
+ break;
+
+ case "discover-full":
+ await WithBrowser(d => new PrismaticDiscoverer(d, config).DiscoverFullAsync());
+ break;
+
+ case "scrape":
+ await WithBrowser(d => new PrismaticScraper(d, config).ScrapeAsync(refreshOlderThanDays, maxProducts, retryErrors));
+ break;
+
+ case "push":
+ await new CatalogPusher(config).PushAsync();
+ break;
+
+ case "run":
+ // The scheduled default: find new colors, scrape new + stale, then push.
+ await WithBrowser(async d =>
+ {
+ await new PrismaticDiscoverer(d, config).DiscoverNewAsync();
+ await new PrismaticScraper(d, config).ScrapeAsync(refreshOlderThanDays, maxProducts, retryErrors);
+ });
+ await new CatalogPusher(config).PushAsync();
+ break;
+
+ default:
+ PrintUsage();
+ return 1;
+ }
+
+ Log.Info("Done.");
+ return 0;
+}
+catch (Exception ex)
+{
+ Log.Error($"Fatal: {ex}");
+ return 1;
+}
+
+// ── Helpers ───────────────────────────────────────────────────────────────────
+async Task WithBrowser(Func action)
+{
+ await using var session = await BrowserSession.CreateAsync(headed);
+ await action(session);
+}
+
+int GetIntArg(string name, int fallback)
+{
+ var prefix = name + "=";
+ var found = args.FirstOrDefault(a => a.StartsWith(prefix, StringComparison.OrdinalIgnoreCase));
+ return found is not null && int.TryParse(found[prefix.Length..], out var value) ? value : fallback;
+}
+
+void PrintUsage()
+{
+ Console.WriteLine(
+ """
+ PrismaticSync — scrape Prismatic Powders and push to the app catalog.
+
+ Usage: PrismaticSync [command] [options]
+
+ Commands:
+ run (default) discover-new + scrape (new + stale) + push
+ discover-new Incremental discovery via newest-first sort (cheap; finds new colors)
+ discover-full Full discovery across all color filters (heavy; reconciles the whole set)
+ scrape Scrape product pages from the URL list (resumable)
+ push Push the scraped JSON to the import endpoint
+
+ Options:
+ --refresh-older-than=N Re-scrape products whose data is older than N days (default 30 for 'run')
+ --max-products=N Cap products scraped this run (0 = no cap)
+ --retry-errors Retry URLs previously recorded as errors
+ --headed Show the browser window (debugging)
+
+ Config: appsettings.json (delays, file paths, import endpoint + token).
+ First run on a new machine: dotnet build, then `pwsh bin/Debug/net8.0/playwright.ps1 install chromium`.
+ """);
+}
diff --git a/scripts/Prismatic Data Scraper/README.md b/scripts/Prismatic Data Scraper/README.md
new file mode 100644
index 0000000..52d9d38
--- /dev/null
+++ b/scripts/Prismatic Data Scraper/README.md
@@ -0,0 +1,82 @@
+# PrismaticSync
+
+A standalone .NET console tool that scrapes the Prismatic Powders catalog and pushes it into the
+Powder Coating Logix catalog import endpoint. It exists because Prismatic has **no API** (unlike
+Columbia Coatings) — so the data has to be scraped via browser automation.
+
+> **Runs on a workstation you control — never on the deployed app server.** Scraping from the cloud
+> app's IP would get blocked and isn't appropriate. This tool is deliberately *not* part of
+> `PowderCoating.sln`; build and run it independently.
+
+## First-time setup (per machine)
+
+```powershell
+cd "scripts/Prismatic Data Scraper"
+dotnet build
+pwsh bin/Debug/net8.0/playwright.ps1 install chromium # one-time browser download
+```
+
+## Commands
+
+```powershell
+dotnet run -- run # default: discover-new + scrape (new + stale >30d) + push
+dotnet run -- discover-new # cheap: find newly-added colors (newest-first, stops at known)
+dotnet run -- discover-full # heavy: crawl all color filters (reconcile whole set / removals)
+dotnet run -- scrape # scrape product pages from product-urls.txt (resumable)
+dotnet run -- scrape --refresh-older-than=30 # also re-scrape products older than 30 days (price changes)
+dotnet run -- push # push prismatic_powders.json to the import endpoint
+```
+
+Options: `--max-products=N`, `--retry-errors`, `--headed` (show the browser for debugging).
+
+Everything streams to the console live (warnings/errors in color) **and** to `prismatic-sync.log`.
+
+## Operating model (suggested cadence)
+
+| Run | Command | Cadence | Why |
+|-----|---------|---------|-----|
+| Find new colors | `run` (does discover-new + scrape-new) | Weekly | Cheap; Prismatic adds colors often |
+| Price refresh | `scrape --refresh-older-than=30` then `push` | Monthly | Re-scrapes stale products to catch price changes (slow, ~hours) |
+| Full reconcile | `discover-full` then `scrape` | Quarterly | Catches removed/discontinued colors |
+
+A full scrape of ~5,000 products takes hours (polite delays). It saves after every product and is
+fully resumable, so stop/restart any time.
+
+## Politeness / anti-block
+
+Configurable in `appsettings.json`: randomized 6–14s base delay, an escalating **cooldown + retry on
+403** (so a temporary block doesn't get you hard-banned mid-run), and a periodic long rest. Leave
+these conservative — getting blocked is worse than being slow, and Prismatic is a partner.
+
+## Pushing into the app
+
+Set `Sync.Import.EndpointUrl` + `Sync.Import.Token` in `appsettings.json`. The tool POSTs the JSON
+with an `X-Import-Token` header to the app's token-authenticated import endpoint, which runs it
+through the same upsert as the Columbia sync. If the endpoint isn't configured, `push` is skipped and
+you upload `prismatic_powders.json` manually via the Powder Catalog admin page.
+
+> **App-side dependency:** the token-authenticated import endpoint must exist in the web app for
+> unattended push to work. Until then, use the manual upload.
+
+## Scheduling (Windows Task Scheduler)
+
+Point a scheduled task at the published exe (or `dotnet run`). Example weekly task command:
+
+```
+Program/script: C:\Tools\PrismaticSync\PrismaticSync.exe
+Arguments: run
+Start in: C:\Tools\PrismaticSync
+```
+
+Publish a self-contained build to drop on the workstation:
+
+```powershell
+dotnet publish -c Release -r win-x64 --self-contained false -o C:\Tools\PrismaticSync
+pwsh C:\Tools\PrismaticSync\playwright.ps1 install chromium
+```
+
+## The long game
+
+This is the interim path. The durable endgame is a real Prismatic **API** (the partnership), at which
+point this tool is replaced by a clean in-app sync like Columbia's — reusing the same upsert,
+propagation, and discontinued handling.
diff --git a/scripts/Prismatic Data Scraper/Services/CatalogPusher.cs b/scripts/Prismatic Data Scraper/Services/CatalogPusher.cs
new file mode 100644
index 0000000..d16b708
--- /dev/null
+++ b/scripts/Prismatic Data Scraper/Services/CatalogPusher.cs
@@ -0,0 +1,63 @@
+using System.Text;
+using PrismaticSync.Infrastructure;
+
+namespace PrismaticSync.Services;
+
+///
+/// Pushes the scraped JSON to the app's token-authenticated catalog import endpoint. When no
+/// endpoint is configured it no-ops (the JSON is still on disk for a manual upload), so the tool is
+/// useful before the endpoint exists.
+///
+public class CatalogPusher
+{
+ private readonly SyncConfig _config;
+
+ public CatalogPusher(SyncConfig config) => _config = config;
+
+ public async Task PushAsync()
+ {
+ if (string.IsNullOrWhiteSpace(_config.Import.EndpointUrl))
+ {
+ Log.Warn($"No import endpoint configured (Sync.Import.EndpointUrl) — skipping push. " +
+ $"Upload {_config.OutputJsonFile} manually via the Powder Catalog admin instead.");
+ return false;
+ }
+
+ if (!File.Exists(_config.OutputJsonFile))
+ {
+ Log.Warn($"Output file {_config.OutputJsonFile} not found — nothing to push.");
+ return false;
+ }
+
+ var json = await File.ReadAllTextAsync(_config.OutputJsonFile);
+ Log.Info($"Pushing {_config.OutputJsonFile} to {_config.Import.EndpointUrl} (vendor: {_config.Import.VendorName})...");
+
+ using var http = new HttpClient { Timeout = TimeSpan.FromMinutes(5) };
+ using var request = new HttpRequestMessage(HttpMethod.Post, _config.Import.EndpointUrl);
+ request.Headers.Add("X-Import-Token", _config.Import.Token);
+ request.Headers.Add("X-Vendor-Name", _config.Import.VendorName);
+ request.Content = new StringContent(json, Encoding.UTF8, "application/json");
+
+ try
+ {
+ using var response = await http.SendAsync(request);
+ var body = await response.Content.ReadAsStringAsync();
+
+ if (response.IsSuccessStatusCode)
+ {
+ Log.Info($"Push succeeded ({(int)response.StatusCode}): {Trim(body)}");
+ return true;
+ }
+
+ Log.Error($"Push failed ({(int)response.StatusCode}): {Trim(body)}");
+ return false;
+ }
+ catch (Exception ex)
+ {
+ Log.Error($"Push error: {ex.Message}");
+ return false;
+ }
+ }
+
+ private static string Trim(string s) => s.Length > 500 ? s[..500] + "…" : s;
+}
diff --git a/scripts/Prismatic Data Scraper/Services/PrismaticDiscoverer.cs b/scripts/Prismatic Data Scraper/Services/PrismaticDiscoverer.cs
new file mode 100644
index 0000000..12e86e0
--- /dev/null
+++ b/scripts/Prismatic Data Scraper/Services/PrismaticDiscoverer.cs
@@ -0,0 +1,138 @@
+using System.Text.RegularExpressions;
+using Microsoft.Playwright;
+using PrismaticSync.Infrastructure;
+
+namespace PrismaticSync.Services;
+
+///
+/// Discovers product URLs from the Prismatic color listing (infinite-scroll). Two modes:
+/// incremental (newest-first via ?category=created_at, stop once we reach already-known
+/// URLs) for cheap frequent runs, and full (every color filter to the bottom) for occasional
+/// reconciliation. Both append to the URL list file.
+///
+public class PrismaticDiscoverer
+{
+ private static readonly Regex ProductUrlRegex =
+ new(@"/shop/powder-coating-colors/[A-Z0-9-]+/", RegexOptions.IgnoreCase | RegexOptions.Compiled);
+
+ private readonly BrowserSession _session;
+ private readonly SyncConfig _config;
+
+ public PrismaticDiscoverer(BrowserSession session, SyncConfig config)
+ {
+ _session = session;
+ _config = config;
+ }
+
+ ///
+ /// Incremental discovery: crawl the newest-first listing and stop once a run of consecutive
+ /// scrolls surfaces only already-known URLs — meaning we've scrolled past the new products.
+ /// Returns the count of newly found URLs.
+ ///
+ public async Task DiscoverNewAsync()
+ {
+ var known = new HashSet(JsonStore.LoadUrls(_config.ProductUrlsFile), StringComparer.OrdinalIgnoreCase);
+ var startCount = known.Count;
+ Log.Info($"Incremental discovery (newest first). Known URLs: {startCount}");
+
+ await GotoAsync($"{_config.ColorsUrl}?category=created_at");
+
+ var knownStreak = 0;
+ for (var i = 0; i < _config.MaxScrolls; i++)
+ {
+ var addedNew = 0;
+ foreach (var link in await CollectProductLinksAsync())
+ if (known.Add(link)) addedNew++;
+
+ JsonStore.SaveUrls(_config.ProductUrlsFile, known);
+ knownStreak = addedNew == 0 ? knownStreak + 1 : 0;
+ Log.Info($"Scroll {i + 1}: +{addedNew} new, total {known.Count}, known-streak {knownStreak}");
+
+ if (knownStreak >= _config.StopAfterKnownScrolls)
+ {
+ Log.Info("Reached known territory — stopping incremental discovery.");
+ break;
+ }
+
+ await ScrollAsync();
+ }
+
+ var newCount = known.Count - startCount;
+ Log.Info($"Incremental discovery done. New URLs: {newCount}; total {known.Count}");
+ return newCount;
+ }
+
+ ///
+ /// Full discovery: crawl every color filter to the bottom. Heavier — use occasionally to
+ /// reconcile the whole set (e.g. to notice colors that have been removed). Returns new URL count.
+ ///
+ public async Task DiscoverFullAsync()
+ {
+ var known = new HashSet(JsonStore.LoadUrls(_config.ProductUrlsFile), StringComparer.OrdinalIgnoreCase);
+ var startCount = known.Count;
+ Log.Info($"Full discovery across {_config.ColorParams.Length} color filters. Known URLs: {startCount}");
+
+ foreach (var color in _config.ColorParams)
+ {
+ Log.Info($"Color filter: {color}");
+ try
+ {
+ await GotoAsync($"{_config.ColorsUrl}?color={Uri.EscapeDataString(color)}");
+
+ var noNew = 0;
+ for (var i = 0; i < _config.MaxScrolls; i++)
+ {
+ var added = 0;
+ foreach (var link in await CollectProductLinksAsync())
+ if (known.Add(link)) added++;
+
+ JsonStore.SaveUrls(_config.ProductUrlsFile, known);
+ noNew = added == 0 ? noNew + 1 : 0;
+ if (noNew >= _config.StopAfterNoNewScrolls)
+ break;
+
+ await ScrollAsync();
+ }
+
+ Log.Info($"Color {color} done. Total {known.Count}");
+ await _session.Page.WaitForTimeoutAsync(3000);
+ }
+ catch (Exception ex)
+ {
+ Log.Warn($"Color {color} failed: {ex.Message}");
+ }
+ }
+
+ var newCount = known.Count - startCount;
+ Log.Info($"Full discovery done. New this run: {newCount}; total {known.Count}");
+ return newCount;
+ }
+
+ private async Task GotoAsync(string url)
+ {
+ await _session.Page.GotoAsync(url, new PageGotoOptions
+ {
+ WaitUntil = WaitUntilState.DOMContentLoaded,
+ Timeout = 60000
+ });
+ await _session.Page.WaitForTimeoutAsync(_config.PageSettleSeconds * 1000);
+ }
+
+ private async Task ScrollAsync()
+ {
+ await _session.Page.Mouse.WheelAsync(0, 2500);
+ await _session.Page.WaitForTimeoutAsync(_config.ScrollWaitMs);
+ }
+
+ private async Task> CollectProductLinksAsync()
+ {
+ var hrefs = await _session.Page.EvalOnSelectorAllAsync(
+ "a", "els => els.map(a => a.href).filter(Boolean)");
+
+ return hrefs
+ .Where(h => ProductUrlRegex.IsMatch(h))
+ .Select(JsonStore.CleanUrl)
+ .Where(u => u.Length > 0)
+ .ToList();
+ }
+}
diff --git a/scripts/Prismatic Data Scraper/Services/PrismaticScraper.cs b/scripts/Prismatic Data Scraper/Services/PrismaticScraper.cs
new file mode 100644
index 0000000..38b24d9
--- /dev/null
+++ b/scripts/Prismatic Data Scraper/Services/PrismaticScraper.cs
@@ -0,0 +1,295 @@
+using System.Diagnostics;
+using System.Globalization;
+using System.Text.RegularExpressions;
+using Microsoft.Playwright;
+using PrismaticSync.Infrastructure;
+using PrismaticSync.Models;
+
+namespace PrismaticSync.Services;
+
+///
+/// Scrapes individual Prismatic product pages into s. Resumable (skips
+/// already-scraped URLs, optionally retries past errors) and supports a refresh window so stale
+/// records get re-scraped to catch price changes. Saves after every product so a long run can be
+/// stopped and resumed safely, and logs continuously — including the delay between products — so a
+/// manual run always shows it's alive.
+///
+public class PrismaticScraper
+{
+ private static readonly Regex ProductUrlRegex =
+ new(@"/shop/powder-coating-colors/[A-Z0-9-]+/", RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ private static readonly Regex SkuRegex =
+ new(@"Item:\s*([A-Z0-9-]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ private static readonly Regex DescRegex =
+ new(@"Description:\s*(.*?)(WARNING:|What does this match\?|$)", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled);
+ private static readonly Regex PriceTierRegex =
+ new(@"(\d+\s*-\s*\d+\s*lbs|\d+\s*\+\s*lbs)\s*\$([\d.]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ private static readonly Regex RangeRegex = new(@"(\d+)\s*-\s*(\d+)", RegexOptions.Compiled);
+ private static readonly Regex PlusRegex = new(@"(\d+)\s*\+", RegexOptions.Compiled);
+ private static readonly Regex WhitespaceRegex = new(@"\s+", RegexOptions.Compiled);
+
+ private readonly BrowserSession _session;
+ private readonly SyncConfig _config;
+ private readonly Random _random = new();
+
+ public PrismaticScraper(BrowserSession session, SyncConfig config)
+ {
+ _session = session;
+ _config = config;
+ }
+
+ ///
+ /// Scrapes products needing work: those not yet scraped, plus (when
+ /// > 0) any whose data is older than that window. Returns (scraped, errors).
+ ///
+ public async Task<(int Scraped, int Errors)> ScrapeAsync(int refreshOlderThanDays, int maxProducts, bool retryErrors)
+ {
+ var allUrls = JsonStore.LoadUrls(_config.ProductUrlsFile)
+ .Where(u => ProductUrlRegex.IsMatch(u))
+ .ToList();
+
+ var data = JsonStore.LoadOutput(_config.OutputJsonFile);
+
+ // Index existing results by URL (keep the most recent if the file has dupes).
+ var resultByUrl = data.Results
+ .GroupBy(r => JsonStore.CleanUrl(r.ProductUrl), StringComparer.OrdinalIgnoreCase)
+ .ToDictionary(g => g.Key, g => g.OrderByDescending(r => r.ScrapedAt).First(), StringComparer.OrdinalIgnoreCase);
+
+ var errorUrls = new HashSet(
+ data.Errors.Select(e => JsonStore.CleanUrl(e.ProductUrl)), StringComparer.OrdinalIgnoreCase);
+
+ var staleCutoff = DateTime.UtcNow.AddDays(-Math.Max(0, refreshOlderThanDays));
+
+ var toScrape = new List();
+ foreach (var url in allUrls)
+ {
+ if (resultByUrl.TryGetValue(url, out var existing))
+ {
+ if (refreshOlderThanDays > 0 && existing.ScrapedAt < staleCutoff)
+ toScrape.Add(url); // stale → refresh for price changes
+ }
+ else
+ {
+ if (retryErrors || !errorUrls.Contains(url))
+ toScrape.Add(url); // never scraped (skip known errors unless retrying)
+ }
+ }
+
+ if (maxProducts > 0)
+ toScrape = toScrape.Take(maxProducts).ToList();
+
+ var total = toScrape.Count;
+ Log.Info($"URLs: {allUrls.Count}; already scraped: {resultByUrl.Count}; errors on file: {errorUrls.Count}");
+ Log.Info($"To scrape this run: {total} (refresh older than {refreshOlderThanDays}d, retry errors: {retryErrors})");
+
+ if (total == 0)
+ {
+ Log.Info("Nothing to scrape. Done.");
+ return (0, 0);
+ }
+
+ var avgDelaySec = (_config.MinDelaySeconds + _config.MaxDelaySeconds) / 2.0;
+ var etaMinutes = total * (avgDelaySec + _config.PageSettleSeconds + 2) / 60.0;
+ Log.Info($"Estimated run time: ~{FormatDuration(TimeSpan.FromMinutes(etaMinutes))} " +
+ $"(grab a coffee if that's a while — it saves after every product and is resumable).");
+
+ var stopwatch = Stopwatch.StartNew();
+ int scraped = 0, errors = 0, index = 0, consecutiveBlocks = 0;
+
+ foreach (var url in toScrape)
+ {
+ index++;
+
+ for (var attempt = 1; ; attempt++)
+ {
+ try
+ {
+ var row = await ParseProductAsync(url, index, total);
+
+ if (resultByUrl.TryGetValue(url, out var existing))
+ data.Results[data.Results.IndexOf(existing)] = row;
+ else
+ data.Results.Add(row);
+
+ resultByUrl[url] = row;
+ data.Errors.RemoveAll(e => JsonStore.CleanUrl(e.ProductUrl).Equals(url, StringComparison.OrdinalIgnoreCase));
+
+ scraped++;
+ consecutiveBlocks = 0;
+ JsonStore.SaveOutput(_config.OutputJsonFile, data);
+
+ var basePrice = row.PriceTiers.Count > 0 ? row.PriceTiers.Min(t => t.Price) : 0m;
+ Log.Info($"[{index}/{total}] Saved {row.Sku} \"{row.ColorName}\" " +
+ $"({row.PriceTiers.Count} tier(s), base ${basePrice:0.00}) | elapsed {FormatDuration(stopwatch.Elapsed)}");
+ break;
+ }
+ catch (Exception ex) when (IsBlocked(ex) && attempt <= _config.BlockedMaxRetries)
+ {
+ // Site pushed back — back off (escalating) and retry the SAME product rather
+ // than barreling on, which is how an unattended run gets hard-banned.
+ consecutiveBlocks++;
+ var cooldown = Math.Min(_config.BlockedCooldownSeconds * consecutiveBlocks, _config.BlockedCooldownMaxSeconds);
+ Log.Warn($"[{index}/{total}] Blocked (403), attempt {attempt}. Cooling down {cooldown}s, then retrying this product...");
+ await Task.Delay(cooldown * 1000);
+ }
+ catch (Exception ex)
+ {
+ data.Errors.Add(new ScrapeError { ProductUrl = url, Error = ex.Message, ScrapedAt = DateTime.UtcNow });
+ JsonStore.SaveOutput(_config.OutputJsonFile, data);
+ errors++;
+ Log.Error($"[{index}/{total}] {url} -> {ex.Message}");
+ break;
+ }
+ }
+
+ // Periodic longer rest — eases server load and avoids a robotic, evenly-spaced cadence.
+ if (_config.LongRestEveryProducts > 0 && index % _config.LongRestEveryProducts == 0 && index < total)
+ {
+ Log.Info($"Resting {_config.LongRestSeconds}s after {index} products...");
+ await Task.Delay(_config.LongRestSeconds * 1000);
+ }
+
+ if (index < total)
+ {
+ var delayMs = RandomDelayMs();
+ Log.Info($"[{index}/{total}] Waiting {delayMs / 1000.0:0.0}s before next product...");
+ await Task.Delay(delayMs);
+ }
+ }
+
+ Log.Info($"Scrape complete. Scraped {scraped}, errors {errors}. Total results on file: {data.Results.Count}. " +
+ $"Took {FormatDuration(stopwatch.Elapsed)}.");
+ return (scraped, errors);
+ }
+
+ private async Task ParseProductAsync(string url, int index, int total)
+ {
+ Log.Info($"[{index}/{total}] Scraping {url}");
+
+ var response = await _session.Page.GotoAsync(url, new PageGotoOptions
+ {
+ WaitUntil = WaitUntilState.DOMContentLoaded,
+ Timeout = 60000
+ });
+ await _session.Page.WaitForTimeoutAsync(_config.PageSettleSeconds * 1000);
+
+ var status = response?.Status ?? 0;
+ var title = Clean(await SafeTextAsync(() => _session.Page.TitleAsync()));
+ var plainText = Clean(await SafeTextAsync(() => _session.Page.Locator("body").InnerTextAsync()));
+
+ if (status == 403 || Regex.IsMatch(title, @"^403 Forbidden$", RegexOptions.IgnoreCase))
+ throw new Exception("403 Forbidden returned by site.");
+ if (status == 404 || Regex.IsMatch(title, @"404|Page Not Found", RegexOptions.IgnoreCase))
+ throw new Exception("404 Not Found returned by site.");
+
+ var colorName = Clean(await SafeTextAsync(() => _session.Page.Locator("h1").First.InnerTextAsync()));
+
+ var skuMatch = SkuRegex.Match(plainText);
+ var sku = skuMatch.Success ? skuMatch.Groups[1].Value : "";
+ if (string.IsNullOrEmpty(sku) && string.IsNullOrEmpty(colorName))
+ throw new Exception("Could not find SKU or title on product page.");
+
+ var descMatch = DescRegex.Match(plainText);
+ var description = descMatch.Success ? Clean(descMatch.Groups[1].Value) : "";
+
+ return new ProductRecord
+ {
+ Sku = sku,
+ ColorName = colorName,
+ Description = description,
+ PriceTiers = ParsePriceTiers(plainText),
+ SafetyDataSheetUrl = await GetLinkByTextAsync(new[] { "Safety Data Sheet", @"\bSDS\b" }),
+ TechnicalDataSheetUrl = await GetLinkByTextAsync(new[] { "Tech Data Sheet", "Technical Data Sheet", @"\bTDS\b" }),
+ ApplicationGuideUrl = await GetLinkByTextAsync(new[] { "Application Guide" }),
+ SampleImageUrl = await GetSampleImageUrlAsync(),
+ ProductUrl = url,
+ ScrapedAt = DateTime.UtcNow
+ };
+ }
+
+ private static List ParsePriceTiers(string text)
+ {
+ var tiers = new List();
+ foreach (Match m in PriceTierRegex.Matches(text))
+ {
+ if (!decimal.TryParse(m.Groups[2].Value, NumberStyles.Any, CultureInfo.InvariantCulture, out var price))
+ continue;
+
+ var rangeText = Clean(m.Groups[1].Value);
+ int? min = null, max = null;
+
+ var range = RangeRegex.Match(rangeText);
+ if (range.Success)
+ {
+ min = int.Parse(range.Groups[1].Value);
+ max = int.Parse(range.Groups[2].Value);
+ }
+
+ var plus = PlusRegex.Match(rangeText);
+ if (plus.Success)
+ {
+ min = int.Parse(plus.Groups[1].Value);
+ max = null;
+ }
+
+ tiers.Add(new PriceTier { Min = min, Max = max, Price = price });
+ }
+ return tiers;
+ }
+
+ /// Returns the href of the first link whose text matches any pattern. Uses a single eval
+ /// returning "texthref" pairs to avoid object deserialization quirks.
+ private async Task GetLinkByTextAsync(string[] patterns)
+ {
+ var combined = await _session.Page.EvalOnSelectorAllAsync(
+ "a",
+ "els => els.map(a => ((a.innerText || a.textContent || '').replace(/\\s+/g, ' ').trim()) " +
+ "+ String.fromCharCode(1) + (a.href || ''))");
+
+ foreach (var entry in combined)
+ {
+ var parts = entry.Split('');
+ var text = parts.Length > 0 ? parts[0] : "";
+ var href = parts.Length > 1 ? parts[1] : "";
+ if (href.Length > 0 && patterns.Any(p => Regex.IsMatch(text, p, RegexOptions.IgnoreCase)))
+ return href;
+ }
+ return "";
+ }
+
+ private async Task GetSampleImageUrlAsync()
+ {
+ var srcs = await _session.Page.EvalOnSelectorAllAsync(
+ "img",
+ "els => els.map(i => i.currentSrc || i.src || i.getAttribute('src') || i.getAttribute('data-src') || '')" +
+ ".filter(Boolean)");
+
+ return srcs.FirstOrDefault(s => Regex.IsMatch(s, @"images\.nicindustries\.com", RegexOptions.IgnoreCase)
+ && !Regex.IsMatch(s, "thumbnail", RegexOptions.IgnoreCase))
+ ?? srcs.FirstOrDefault(s => Regex.IsMatch(s, @"images\.nicindustries\.com", RegexOptions.IgnoreCase))
+ ?? srcs.FirstOrDefault(s => Regex.IsMatch(s, "prismatic|powder|color", RegexOptions.IgnoreCase))
+ ?? "";
+ }
+
+ private static bool IsBlocked(Exception ex) =>
+ ex.Message.Contains("403", StringComparison.OrdinalIgnoreCase);
+
+ private static async Task SafeTextAsync(Func> fn)
+ {
+ try { return await fn(); } catch { return ""; }
+ }
+
+ private static string Clean(string? text) => WhitespaceRegex.Replace(text ?? "", " ").Trim();
+
+ private int RandomDelayMs()
+ {
+ var min = Math.Max(0, _config.MinDelaySeconds * 1000);
+ var max = Math.Max(min, _config.MaxDelaySeconds * 1000);
+ return _random.Next(min, max + 1);
+ }
+
+ private static string FormatDuration(TimeSpan t) =>
+ t.TotalHours >= 1 ? $"{(int)t.TotalHours}h {t.Minutes}m" :
+ t.TotalMinutes >= 1 ? $"{(int)t.TotalMinutes}m {t.Seconds}s" :
+ $"{t.Seconds}s";
+}
diff --git a/scripts/Prismatic Data Scraper/appsettings.json b/scripts/Prismatic Data Scraper/appsettings.json
new file mode 100644
index 0000000..90062bf
--- /dev/null
+++ b/scripts/Prismatic Data Scraper/appsettings.json
@@ -0,0 +1,38 @@
+{
+ "Sync": {
+ "BaseUrl": "https://www.prismaticpowders.com",
+ "ColorsPath": "/shop/powder-coating-colors",
+
+ "ProductUrlsFile": "product-urls.txt",
+ "OutputJsonFile": "prismatic_powders.json",
+ "LogFile": "prismatic-sync.log",
+
+ "MinDelaySeconds": 6,
+ "MaxDelaySeconds": 14,
+ "PageSettleSeconds": 4,
+
+ "BlockedCooldownSeconds": 120,
+ "BlockedCooldownMaxSeconds": 600,
+ "BlockedMaxRetries": 3,
+ "LongRestEveryProducts": 150,
+ "LongRestSeconds": 45,
+
+ "ScrollWaitMs": 1500,
+ "MaxScrolls": 400,
+ "StopAfterNoNewScrolls": 10,
+ "StopAfterKnownScrolls": 8,
+
+ "ColorParams": [
+ "pris_black", "pris_blue", "pris_bronze", "pris_brown", "pris_clear",
+ "pris_copper", "pris_gold", "pris_gray", "pris_green", "pris_orange",
+ "pris_pink", "pris_purple", "pris_red", "pris_silver", "pris_tan",
+ "pris_white", "pris_yellow"
+ ],
+
+ "Import": {
+ "EndpointUrl": "",
+ "Token": "",
+ "VendorName": "Prismatic Powders"
+ }
+ }
+}