c59d55529f
Standalone .NET 8 console app (not part of the main solution) that scrapes the Prismatic Powders catalog via Playwright and pushes it into the app's catalog import. Prismatic has no API, so this runs on a workstation (Task Scheduler), never the deployed server. - Discovery: incremental newest-first via ?category=created_at (stops once it reaches already-known URLs — cheap, finds new colors) and a full all-colors crawl for occasional reconcile. - Scraper: resumable product-page scrape (sku/color/description/price tiers/ SDS/TDS/app-guide/image), with --refresh-older-than to re-scrape stale products and catch price changes. Output matches the app import format so it flows through the same shared upsert as the Columbia sync. - Resilience: brisk randomized base delay, escalating 403 cooldown-and-retry to avoid hard bans, periodic rest. All configurable. - Visibility: streams every product + the inter-product wait to the console (colored) and a log file, with an up-front ETA. - Push: token-authenticated POST to the app import endpoint (skips to manual upload when unconfigured). The app-side token import endpoint is a separate follow-up. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
107 lines
4.3 KiB
C#
107 lines
4.3 KiB
C#
using Microsoft.Extensions.Configuration;
|
|
using PrismaticSync.Infrastructure;
|
|
using PrismaticSync.Services;
|
|
|
|
// ── Load config ───────────────────────────────────────────────────────────────
|
|
var configRoot = new ConfigurationBuilder()
|
|
.SetBasePath(AppContext.BaseDirectory)
|
|
.AddJsonFile("appsettings.json", optional: false)
|
|
.Build();
|
|
|
|
var config = configRoot.GetSection("Sync").Get<SyncConfig>() ?? new SyncConfig();
|
|
Log.Configure(config.LogFile);
|
|
|
|
// ── Parse args ────────────────────────────────────────────────────────────────
|
|
var command = args.Length > 0 && !args[0].StartsWith("--") ? args[0].ToLowerInvariant() : "run";
|
|
var headed = args.Contains("--headed");
|
|
var retryErrors = args.Contains("--retry-errors");
|
|
var maxProducts = GetIntArg("--max-products", 0);
|
|
// "run" refreshes products older than 30 days by default; explicit commands default to new-only.
|
|
var refreshOlderThanDays = GetIntArg("--refresh-older-than", command == "run" ? 30 : 0);
|
|
|
|
Log.Info($"PrismaticSync — command '{command}' (headed={headed}, refreshOlderThan={refreshOlderThanDays}d, maxProducts={maxProducts})");
|
|
|
|
try
|
|
{
|
|
switch (command)
|
|
{
|
|
case "discover-new":
|
|
await WithBrowser(d => new PrismaticDiscoverer(d, config).DiscoverNewAsync());
|
|
break;
|
|
|
|
case "discover-full":
|
|
await WithBrowser(d => new PrismaticDiscoverer(d, config).DiscoverFullAsync());
|
|
break;
|
|
|
|
case "scrape":
|
|
await WithBrowser(d => new PrismaticScraper(d, config).ScrapeAsync(refreshOlderThanDays, maxProducts, retryErrors));
|
|
break;
|
|
|
|
case "push":
|
|
await new CatalogPusher(config).PushAsync();
|
|
break;
|
|
|
|
case "run":
|
|
// The scheduled default: find new colors, scrape new + stale, then push.
|
|
await WithBrowser(async d =>
|
|
{
|
|
await new PrismaticDiscoverer(d, config).DiscoverNewAsync();
|
|
await new PrismaticScraper(d, config).ScrapeAsync(refreshOlderThanDays, maxProducts, retryErrors);
|
|
});
|
|
await new CatalogPusher(config).PushAsync();
|
|
break;
|
|
|
|
default:
|
|
PrintUsage();
|
|
return 1;
|
|
}
|
|
|
|
Log.Info("Done.");
|
|
return 0;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
Log.Error($"Fatal: {ex}");
|
|
return 1;
|
|
}
|
|
|
|
// ── Helpers ───────────────────────────────────────────────────────────────────
|
|
async Task WithBrowser(Func<BrowserSession, Task> action)
|
|
{
|
|
await using var session = await BrowserSession.CreateAsync(headed);
|
|
await action(session);
|
|
}
|
|
|
|
int GetIntArg(string name, int fallback)
|
|
{
|
|
var prefix = name + "=";
|
|
var found = args.FirstOrDefault(a => a.StartsWith(prefix, StringComparison.OrdinalIgnoreCase));
|
|
return found is not null && int.TryParse(found[prefix.Length..], out var value) ? value : fallback;
|
|
}
|
|
|
|
void PrintUsage()
|
|
{
|
|
Console.WriteLine(
|
|
"""
|
|
PrismaticSync — scrape Prismatic Powders and push to the app catalog.
|
|
|
|
Usage: PrismaticSync [command] [options]
|
|
|
|
Commands:
|
|
run (default) discover-new + scrape (new + stale) + push
|
|
discover-new Incremental discovery via newest-first sort (cheap; finds new colors)
|
|
discover-full Full discovery across all color filters (heavy; reconciles the whole set)
|
|
scrape Scrape product pages from the URL list (resumable)
|
|
push Push the scraped JSON to the import endpoint
|
|
|
|
Options:
|
|
--refresh-older-than=N Re-scrape products whose data is older than N days (default 30 for 'run')
|
|
--max-products=N Cap products scraped this run (0 = no cap)
|
|
--retry-errors Retry URLs previously recorded as errors
|
|
--headed Show the browser window (debugging)
|
|
|
|
Config: appsettings.json (delays, file paths, import endpoint + token).
|
|
First run on a new machine: dotnet build, then `pwsh bin/Debug/net8.0/playwright.ps1 install chromium`.
|
|
""");
|
|
}
|