Add PrismaticSync console tool for unattended Prismatic catalog sync
Standalone .NET 8 console app (not part of the main solution) that scrapes the Prismatic Powders catalog via Playwright and pushes it into the app's catalog import. Prismatic has no API, so this runs on a workstation (Task Scheduler), never the deployed server. - Discovery: incremental newest-first via ?category=created_at (stops once it reaches already-known URLs — cheap, finds new colors) and a full all-colors crawl for occasional reconcile. - Scraper: resumable product-page scrape (sku/color/description/price tiers/ SDS/TDS/app-guide/image), with --refresh-older-than to re-scrape stale products and catch price changes. Output matches the app import format so it flows through the same shared upsert as the Columbia sync. - Resilience: brisk randomized base delay, escalating 403 cooldown-and-retry to avoid hard bans, periodic rest. All configurable. - Visibility: streams every product + the inter-product wait to the console (colored) and a log file, with an up-front ETA. - Push: token-authenticated POST to the app import endpoint (skips to manual upload when unconfigured). The app-side token import endpoint is a separate follow-up. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,63 @@
|
||||
using System.Text;
|
||||
using PrismaticSync.Infrastructure;
|
||||
|
||||
namespace PrismaticSync.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Pushes the scraped JSON to the app's token-authenticated catalog import endpoint. When no
|
||||
/// endpoint is configured it no-ops (the JSON is still on disk for a manual upload), so the tool is
|
||||
/// useful before the endpoint exists.
|
||||
/// </summary>
|
||||
public class CatalogPusher
|
||||
{
|
||||
private readonly SyncConfig _config;
|
||||
|
||||
public CatalogPusher(SyncConfig config) => _config = config;
|
||||
|
||||
public async Task<bool> PushAsync()
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(_config.Import.EndpointUrl))
|
||||
{
|
||||
Log.Warn($"No import endpoint configured (Sync.Import.EndpointUrl) — skipping push. " +
|
||||
$"Upload {_config.OutputJsonFile} manually via the Powder Catalog admin instead.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!File.Exists(_config.OutputJsonFile))
|
||||
{
|
||||
Log.Warn($"Output file {_config.OutputJsonFile} not found — nothing to push.");
|
||||
return false;
|
||||
}
|
||||
|
||||
var json = await File.ReadAllTextAsync(_config.OutputJsonFile);
|
||||
Log.Info($"Pushing {_config.OutputJsonFile} to {_config.Import.EndpointUrl} (vendor: {_config.Import.VendorName})...");
|
||||
|
||||
using var http = new HttpClient { Timeout = TimeSpan.FromMinutes(5) };
|
||||
using var request = new HttpRequestMessage(HttpMethod.Post, _config.Import.EndpointUrl);
|
||||
request.Headers.Add("X-Import-Token", _config.Import.Token);
|
||||
request.Headers.Add("X-Vendor-Name", _config.Import.VendorName);
|
||||
request.Content = new StringContent(json, Encoding.UTF8, "application/json");
|
||||
|
||||
try
|
||||
{
|
||||
using var response = await http.SendAsync(request);
|
||||
var body = await response.Content.ReadAsStringAsync();
|
||||
|
||||
if (response.IsSuccessStatusCode)
|
||||
{
|
||||
Log.Info($"Push succeeded ({(int)response.StatusCode}): {Trim(body)}");
|
||||
return true;
|
||||
}
|
||||
|
||||
Log.Error($"Push failed ({(int)response.StatusCode}): {Trim(body)}");
|
||||
return false;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Log.Error($"Push error: {ex.Message}");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static string Trim(string s) => s.Length > 500 ? s[..500] + "…" : s;
|
||||
}
|
||||
@@ -0,0 +1,138 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using Microsoft.Playwright;
|
||||
using PrismaticSync.Infrastructure;
|
||||
|
||||
namespace PrismaticSync.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Discovers product URLs from the Prismatic color listing (infinite-scroll). Two modes:
|
||||
/// incremental (newest-first via <c>?category=created_at</c>, stop once we reach already-known
|
||||
/// URLs) for cheap frequent runs, and full (every color filter to the bottom) for occasional
|
||||
/// reconciliation. Both append to the URL list file.
|
||||
/// </summary>
|
||||
public class PrismaticDiscoverer
|
||||
{
|
||||
private static readonly Regex ProductUrlRegex =
|
||||
new(@"/shop/powder-coating-colors/[A-Z0-9-]+/", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
|
||||
private readonly BrowserSession _session;
|
||||
private readonly SyncConfig _config;
|
||||
|
||||
public PrismaticDiscoverer(BrowserSession session, SyncConfig config)
|
||||
{
|
||||
_session = session;
|
||||
_config = config;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Incremental discovery: crawl the newest-first listing and stop once a run of consecutive
|
||||
/// scrolls surfaces only already-known URLs — meaning we've scrolled past the new products.
|
||||
/// Returns the count of newly found URLs.
|
||||
/// </summary>
|
||||
public async Task<int> DiscoverNewAsync()
|
||||
{
|
||||
var known = new HashSet<string>(JsonStore.LoadUrls(_config.ProductUrlsFile), StringComparer.OrdinalIgnoreCase);
|
||||
var startCount = known.Count;
|
||||
Log.Info($"Incremental discovery (newest first). Known URLs: {startCount}");
|
||||
|
||||
await GotoAsync($"{_config.ColorsUrl}?category=created_at");
|
||||
|
||||
var knownStreak = 0;
|
||||
for (var i = 0; i < _config.MaxScrolls; i++)
|
||||
{
|
||||
var addedNew = 0;
|
||||
foreach (var link in await CollectProductLinksAsync())
|
||||
if (known.Add(link)) addedNew++;
|
||||
|
||||
JsonStore.SaveUrls(_config.ProductUrlsFile, known);
|
||||
knownStreak = addedNew == 0 ? knownStreak + 1 : 0;
|
||||
Log.Info($"Scroll {i + 1}: +{addedNew} new, total {known.Count}, known-streak {knownStreak}");
|
||||
|
||||
if (knownStreak >= _config.StopAfterKnownScrolls)
|
||||
{
|
||||
Log.Info("Reached known territory — stopping incremental discovery.");
|
||||
break;
|
||||
}
|
||||
|
||||
await ScrollAsync();
|
||||
}
|
||||
|
||||
var newCount = known.Count - startCount;
|
||||
Log.Info($"Incremental discovery done. New URLs: {newCount}; total {known.Count}");
|
||||
return newCount;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Full discovery: crawl every color filter to the bottom. Heavier — use occasionally to
|
||||
/// reconcile the whole set (e.g. to notice colors that have been removed). Returns new URL count.
|
||||
/// </summary>
|
||||
public async Task<int> DiscoverFullAsync()
|
||||
{
|
||||
var known = new HashSet<string>(JsonStore.LoadUrls(_config.ProductUrlsFile), StringComparer.OrdinalIgnoreCase);
|
||||
var startCount = known.Count;
|
||||
Log.Info($"Full discovery across {_config.ColorParams.Length} color filters. Known URLs: {startCount}");
|
||||
|
||||
foreach (var color in _config.ColorParams)
|
||||
{
|
||||
Log.Info($"Color filter: {color}");
|
||||
try
|
||||
{
|
||||
await GotoAsync($"{_config.ColorsUrl}?color={Uri.EscapeDataString(color)}");
|
||||
|
||||
var noNew = 0;
|
||||
for (var i = 0; i < _config.MaxScrolls; i++)
|
||||
{
|
||||
var added = 0;
|
||||
foreach (var link in await CollectProductLinksAsync())
|
||||
if (known.Add(link)) added++;
|
||||
|
||||
JsonStore.SaveUrls(_config.ProductUrlsFile, known);
|
||||
noNew = added == 0 ? noNew + 1 : 0;
|
||||
if (noNew >= _config.StopAfterNoNewScrolls)
|
||||
break;
|
||||
|
||||
await ScrollAsync();
|
||||
}
|
||||
|
||||
Log.Info($"Color {color} done. Total {known.Count}");
|
||||
await _session.Page.WaitForTimeoutAsync(3000);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Log.Warn($"Color {color} failed: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
var newCount = known.Count - startCount;
|
||||
Log.Info($"Full discovery done. New this run: {newCount}; total {known.Count}");
|
||||
return newCount;
|
||||
}
|
||||
|
||||
private async Task GotoAsync(string url)
|
||||
{
|
||||
await _session.Page.GotoAsync(url, new PageGotoOptions
|
||||
{
|
||||
WaitUntil = WaitUntilState.DOMContentLoaded,
|
||||
Timeout = 60000
|
||||
});
|
||||
await _session.Page.WaitForTimeoutAsync(_config.PageSettleSeconds * 1000);
|
||||
}
|
||||
|
||||
private async Task ScrollAsync()
|
||||
{
|
||||
await _session.Page.Mouse.WheelAsync(0, 2500);
|
||||
await _session.Page.WaitForTimeoutAsync(_config.ScrollWaitMs);
|
||||
}
|
||||
|
||||
private async Task<List<string>> CollectProductLinksAsync()
|
||||
{
|
||||
var hrefs = await _session.Page.EvalOnSelectorAllAsync<string[]>(
|
||||
"a", "els => els.map(a => a.href).filter(Boolean)");
|
||||
|
||||
return hrefs
|
||||
.Where(h => ProductUrlRegex.IsMatch(h))
|
||||
.Select(JsonStore.CleanUrl)
|
||||
.Where(u => u.Length > 0)
|
||||
.ToList();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,295 @@
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.Text.RegularExpressions;
|
||||
using Microsoft.Playwright;
|
||||
using PrismaticSync.Infrastructure;
|
||||
using PrismaticSync.Models;
|
||||
|
||||
namespace PrismaticSync.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Scrapes individual Prismatic product pages into <see cref="ProductRecord"/>s. Resumable (skips
|
||||
/// already-scraped URLs, optionally retries past errors) and supports a refresh window so stale
|
||||
/// records get re-scraped to catch price changes. Saves after every product so a long run can be
|
||||
/// stopped and resumed safely, and logs continuously — including the delay between products — so a
|
||||
/// manual run always shows it's alive.
|
||||
/// </summary>
|
||||
public class PrismaticScraper
|
||||
{
|
||||
private static readonly Regex ProductUrlRegex =
|
||||
new(@"/shop/powder-coating-colors/[A-Z0-9-]+/", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
private static readonly Regex SkuRegex =
|
||||
new(@"Item:\s*([A-Z0-9-]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
private static readonly Regex DescRegex =
|
||||
new(@"Description:\s*(.*?)(WARNING:|What does this match\?|$)", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled);
|
||||
private static readonly Regex PriceTierRegex =
|
||||
new(@"(\d+\s*-\s*\d+\s*lbs|\d+\s*\+\s*lbs)\s*\$([\d.]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
private static readonly Regex RangeRegex = new(@"(\d+)\s*-\s*(\d+)", RegexOptions.Compiled);
|
||||
private static readonly Regex PlusRegex = new(@"(\d+)\s*\+", RegexOptions.Compiled);
|
||||
private static readonly Regex WhitespaceRegex = new(@"\s+", RegexOptions.Compiled);
|
||||
|
||||
private readonly BrowserSession _session;
|
||||
private readonly SyncConfig _config;
|
||||
private readonly Random _random = new();
|
||||
|
||||
public PrismaticScraper(BrowserSession session, SyncConfig config)
|
||||
{
|
||||
_session = session;
|
||||
_config = config;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Scrapes products needing work: those not yet scraped, plus (when <paramref name="refreshOlderThanDays"/>
|
||||
/// > 0) any whose data is older than that window. Returns (scraped, errors).
|
||||
/// </summary>
|
||||
public async Task<(int Scraped, int Errors)> ScrapeAsync(int refreshOlderThanDays, int maxProducts, bool retryErrors)
|
||||
{
|
||||
var allUrls = JsonStore.LoadUrls(_config.ProductUrlsFile)
|
||||
.Where(u => ProductUrlRegex.IsMatch(u))
|
||||
.ToList();
|
||||
|
||||
var data = JsonStore.LoadOutput(_config.OutputJsonFile);
|
||||
|
||||
// Index existing results by URL (keep the most recent if the file has dupes).
|
||||
var resultByUrl = data.Results
|
||||
.GroupBy(r => JsonStore.CleanUrl(r.ProductUrl), StringComparer.OrdinalIgnoreCase)
|
||||
.ToDictionary(g => g.Key, g => g.OrderByDescending(r => r.ScrapedAt).First(), StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
var errorUrls = new HashSet<string>(
|
||||
data.Errors.Select(e => JsonStore.CleanUrl(e.ProductUrl)), StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
var staleCutoff = DateTime.UtcNow.AddDays(-Math.Max(0, refreshOlderThanDays));
|
||||
|
||||
var toScrape = new List<string>();
|
||||
foreach (var url in allUrls)
|
||||
{
|
||||
if (resultByUrl.TryGetValue(url, out var existing))
|
||||
{
|
||||
if (refreshOlderThanDays > 0 && existing.ScrapedAt < staleCutoff)
|
||||
toScrape.Add(url); // stale → refresh for price changes
|
||||
}
|
||||
else
|
||||
{
|
||||
if (retryErrors || !errorUrls.Contains(url))
|
||||
toScrape.Add(url); // never scraped (skip known errors unless retrying)
|
||||
}
|
||||
}
|
||||
|
||||
if (maxProducts > 0)
|
||||
toScrape = toScrape.Take(maxProducts).ToList();
|
||||
|
||||
var total = toScrape.Count;
|
||||
Log.Info($"URLs: {allUrls.Count}; already scraped: {resultByUrl.Count}; errors on file: {errorUrls.Count}");
|
||||
Log.Info($"To scrape this run: {total} (refresh older than {refreshOlderThanDays}d, retry errors: {retryErrors})");
|
||||
|
||||
if (total == 0)
|
||||
{
|
||||
Log.Info("Nothing to scrape. Done.");
|
||||
return (0, 0);
|
||||
}
|
||||
|
||||
var avgDelaySec = (_config.MinDelaySeconds + _config.MaxDelaySeconds) / 2.0;
|
||||
var etaMinutes = total * (avgDelaySec + _config.PageSettleSeconds + 2) / 60.0;
|
||||
Log.Info($"Estimated run time: ~{FormatDuration(TimeSpan.FromMinutes(etaMinutes))} " +
|
||||
$"(grab a coffee if that's a while — it saves after every product and is resumable).");
|
||||
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
int scraped = 0, errors = 0, index = 0, consecutiveBlocks = 0;
|
||||
|
||||
foreach (var url in toScrape)
|
||||
{
|
||||
index++;
|
||||
|
||||
for (var attempt = 1; ; attempt++)
|
||||
{
|
||||
try
|
||||
{
|
||||
var row = await ParseProductAsync(url, index, total);
|
||||
|
||||
if (resultByUrl.TryGetValue(url, out var existing))
|
||||
data.Results[data.Results.IndexOf(existing)] = row;
|
||||
else
|
||||
data.Results.Add(row);
|
||||
|
||||
resultByUrl[url] = row;
|
||||
data.Errors.RemoveAll(e => JsonStore.CleanUrl(e.ProductUrl).Equals(url, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
scraped++;
|
||||
consecutiveBlocks = 0;
|
||||
JsonStore.SaveOutput(_config.OutputJsonFile, data);
|
||||
|
||||
var basePrice = row.PriceTiers.Count > 0 ? row.PriceTiers.Min(t => t.Price) : 0m;
|
||||
Log.Info($"[{index}/{total}] Saved {row.Sku} \"{row.ColorName}\" " +
|
||||
$"({row.PriceTiers.Count} tier(s), base ${basePrice:0.00}) | elapsed {FormatDuration(stopwatch.Elapsed)}");
|
||||
break;
|
||||
}
|
||||
catch (Exception ex) when (IsBlocked(ex) && attempt <= _config.BlockedMaxRetries)
|
||||
{
|
||||
// Site pushed back — back off (escalating) and retry the SAME product rather
|
||||
// than barreling on, which is how an unattended run gets hard-banned.
|
||||
consecutiveBlocks++;
|
||||
var cooldown = Math.Min(_config.BlockedCooldownSeconds * consecutiveBlocks, _config.BlockedCooldownMaxSeconds);
|
||||
Log.Warn($"[{index}/{total}] Blocked (403), attempt {attempt}. Cooling down {cooldown}s, then retrying this product...");
|
||||
await Task.Delay(cooldown * 1000);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
data.Errors.Add(new ScrapeError { ProductUrl = url, Error = ex.Message, ScrapedAt = DateTime.UtcNow });
|
||||
JsonStore.SaveOutput(_config.OutputJsonFile, data);
|
||||
errors++;
|
||||
Log.Error($"[{index}/{total}] {url} -> {ex.Message}");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Periodic longer rest — eases server load and avoids a robotic, evenly-spaced cadence.
|
||||
if (_config.LongRestEveryProducts > 0 && index % _config.LongRestEveryProducts == 0 && index < total)
|
||||
{
|
||||
Log.Info($"Resting {_config.LongRestSeconds}s after {index} products...");
|
||||
await Task.Delay(_config.LongRestSeconds * 1000);
|
||||
}
|
||||
|
||||
if (index < total)
|
||||
{
|
||||
var delayMs = RandomDelayMs();
|
||||
Log.Info($"[{index}/{total}] Waiting {delayMs / 1000.0:0.0}s before next product...");
|
||||
await Task.Delay(delayMs);
|
||||
}
|
||||
}
|
||||
|
||||
Log.Info($"Scrape complete. Scraped {scraped}, errors {errors}. Total results on file: {data.Results.Count}. " +
|
||||
$"Took {FormatDuration(stopwatch.Elapsed)}.");
|
||||
return (scraped, errors);
|
||||
}
|
||||
|
||||
private async Task<ProductRecord> ParseProductAsync(string url, int index, int total)
|
||||
{
|
||||
Log.Info($"[{index}/{total}] Scraping {url}");
|
||||
|
||||
var response = await _session.Page.GotoAsync(url, new PageGotoOptions
|
||||
{
|
||||
WaitUntil = WaitUntilState.DOMContentLoaded,
|
||||
Timeout = 60000
|
||||
});
|
||||
await _session.Page.WaitForTimeoutAsync(_config.PageSettleSeconds * 1000);
|
||||
|
||||
var status = response?.Status ?? 0;
|
||||
var title = Clean(await SafeTextAsync(() => _session.Page.TitleAsync()));
|
||||
var plainText = Clean(await SafeTextAsync(() => _session.Page.Locator("body").InnerTextAsync()));
|
||||
|
||||
if (status == 403 || Regex.IsMatch(title, @"^403 Forbidden$", RegexOptions.IgnoreCase))
|
||||
throw new Exception("403 Forbidden returned by site.");
|
||||
if (status == 404 || Regex.IsMatch(title, @"404|Page Not Found", RegexOptions.IgnoreCase))
|
||||
throw new Exception("404 Not Found returned by site.");
|
||||
|
||||
var colorName = Clean(await SafeTextAsync(() => _session.Page.Locator("h1").First.InnerTextAsync()));
|
||||
|
||||
var skuMatch = SkuRegex.Match(plainText);
|
||||
var sku = skuMatch.Success ? skuMatch.Groups[1].Value : "";
|
||||
if (string.IsNullOrEmpty(sku) && string.IsNullOrEmpty(colorName))
|
||||
throw new Exception("Could not find SKU or title on product page.");
|
||||
|
||||
var descMatch = DescRegex.Match(plainText);
|
||||
var description = descMatch.Success ? Clean(descMatch.Groups[1].Value) : "";
|
||||
|
||||
return new ProductRecord
|
||||
{
|
||||
Sku = sku,
|
||||
ColorName = colorName,
|
||||
Description = description,
|
||||
PriceTiers = ParsePriceTiers(plainText),
|
||||
SafetyDataSheetUrl = await GetLinkByTextAsync(new[] { "Safety Data Sheet", @"\bSDS\b" }),
|
||||
TechnicalDataSheetUrl = await GetLinkByTextAsync(new[] { "Tech Data Sheet", "Technical Data Sheet", @"\bTDS\b" }),
|
||||
ApplicationGuideUrl = await GetLinkByTextAsync(new[] { "Application Guide" }),
|
||||
SampleImageUrl = await GetSampleImageUrlAsync(),
|
||||
ProductUrl = url,
|
||||
ScrapedAt = DateTime.UtcNow
|
||||
};
|
||||
}
|
||||
|
||||
private static List<PriceTier> ParsePriceTiers(string text)
|
||||
{
|
||||
var tiers = new List<PriceTier>();
|
||||
foreach (Match m in PriceTierRegex.Matches(text))
|
||||
{
|
||||
if (!decimal.TryParse(m.Groups[2].Value, NumberStyles.Any, CultureInfo.InvariantCulture, out var price))
|
||||
continue;
|
||||
|
||||
var rangeText = Clean(m.Groups[1].Value);
|
||||
int? min = null, max = null;
|
||||
|
||||
var range = RangeRegex.Match(rangeText);
|
||||
if (range.Success)
|
||||
{
|
||||
min = int.Parse(range.Groups[1].Value);
|
||||
max = int.Parse(range.Groups[2].Value);
|
||||
}
|
||||
|
||||
var plus = PlusRegex.Match(rangeText);
|
||||
if (plus.Success)
|
||||
{
|
||||
min = int.Parse(plus.Groups[1].Value);
|
||||
max = null;
|
||||
}
|
||||
|
||||
tiers.Add(new PriceTier { Min = min, Max = max, Price = price });
|
||||
}
|
||||
return tiers;
|
||||
}
|
||||
|
||||
/// <summary>Returns the href of the first link whose text matches any pattern. Uses a single eval
|
||||
/// returning "texthref" pairs to avoid object deserialization quirks.</summary>
|
||||
private async Task<string> GetLinkByTextAsync(string[] patterns)
|
||||
{
|
||||
var combined = await _session.Page.EvalOnSelectorAllAsync<string[]>(
|
||||
"a",
|
||||
"els => els.map(a => ((a.innerText || a.textContent || '').replace(/\\s+/g, ' ').trim()) " +
|
||||
"+ String.fromCharCode(1) + (a.href || ''))");
|
||||
|
||||
foreach (var entry in combined)
|
||||
{
|
||||
var parts = entry.Split('');
|
||||
var text = parts.Length > 0 ? parts[0] : "";
|
||||
var href = parts.Length > 1 ? parts[1] : "";
|
||||
if (href.Length > 0 && patterns.Any(p => Regex.IsMatch(text, p, RegexOptions.IgnoreCase)))
|
||||
return href;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
private async Task<string> GetSampleImageUrlAsync()
|
||||
{
|
||||
var srcs = await _session.Page.EvalOnSelectorAllAsync<string[]>(
|
||||
"img",
|
||||
"els => els.map(i => i.currentSrc || i.src || i.getAttribute('src') || i.getAttribute('data-src') || '')" +
|
||||
".filter(Boolean)");
|
||||
|
||||
return srcs.FirstOrDefault(s => Regex.IsMatch(s, @"images\.nicindustries\.com", RegexOptions.IgnoreCase)
|
||||
&& !Regex.IsMatch(s, "thumbnail", RegexOptions.IgnoreCase))
|
||||
?? srcs.FirstOrDefault(s => Regex.IsMatch(s, @"images\.nicindustries\.com", RegexOptions.IgnoreCase))
|
||||
?? srcs.FirstOrDefault(s => Regex.IsMatch(s, "prismatic|powder|color", RegexOptions.IgnoreCase))
|
||||
?? "";
|
||||
}
|
||||
|
||||
private static bool IsBlocked(Exception ex) =>
|
||||
ex.Message.Contains("403", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
private static async Task<string> SafeTextAsync(Func<Task<string>> fn)
|
||||
{
|
||||
try { return await fn(); } catch { return ""; }
|
||||
}
|
||||
|
||||
private static string Clean(string? text) => WhitespaceRegex.Replace(text ?? "", " ").Trim();
|
||||
|
||||
private int RandomDelayMs()
|
||||
{
|
||||
var min = Math.Max(0, _config.MinDelaySeconds * 1000);
|
||||
var max = Math.Max(min, _config.MaxDelaySeconds * 1000);
|
||||
return _random.Next(min, max + 1);
|
||||
}
|
||||
|
||||
private static string FormatDuration(TimeSpan t) =>
|
||||
t.TotalHours >= 1 ? $"{(int)t.TotalHours}h {t.Minutes}m" :
|
||||
t.TotalMinutes >= 1 ? $"{(int)t.TotalMinutes}m {t.Seconds}s" :
|
||||
$"{t.Seconds}s";
|
||||
}
|
||||
Reference in New Issue
Block a user