From 26b8244422833023333503520441a37351b9f8c8 Mon Sep 17 00:00:00 2001 From: Scott Pouliot Date: Sat, 25 Apr 2026 21:54:32 -0400 Subject: [PATCH] Reduce to 2 concurrent batches to avoid Haiku output TPM bursting 3 concurrent batches hit the rate limit simultaneously then retry in unison, causing repeated 429s. 2 concurrent keeps output rate lower. Co-Authored-By: Claude Sonnet 4.6 --- .../Services/AiCatalogPriceCheckService.cs | 2 +- src/PowderCoating.Web/wwwroot/js/catalog-price-check.js | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/PowderCoating.Infrastructure/Services/AiCatalogPriceCheckService.cs b/src/PowderCoating.Infrastructure/Services/AiCatalogPriceCheckService.cs index 423a211..c2d75fa 100644 --- a/src/PowderCoating.Infrastructure/Services/AiCatalogPriceCheckService.cs +++ b/src/PowderCoating.Infrastructure/Services/AiCatalogPriceCheckService.cs @@ -22,7 +22,7 @@ public class AiCatalogPriceCheckService : IAiCatalogPriceCheckService private const string Model = "claude-haiku-4-5-20251001"; private const int BatchSize = 25; - private const int MaxConcurrentBatches = 3; // Haiku has generous rate limits; retry logic handles any 429s + private const int MaxConcurrentBatches = 2; // 3 concurrent bursts past Haiku's output TPM limit private const int RateLimitRetrySeconds = 65; // wait just past the 60s window before retrying a 429 private static readonly JsonSerializerOptions JsonOpts = new() { PropertyNameCaseInsensitive = true }; diff --git a/src/PowderCoating.Web/wwwroot/js/catalog-price-check.js b/src/PowderCoating.Web/wwwroot/js/catalog-price-check.js index dedb789..ee0e0db 100644 --- a/src/PowderCoating.Web/wwwroot/js/catalog-price-check.js +++ b/src/PowderCoating.Web/wwwroot/js/catalog-price-check.js @@ -11,11 +11,11 @@ if (!form || !btn || !overlay) return; // Estimate total seconds based on item count. - // Haiku: 3 concurrent batches, ~8s per wave. + // Haiku: 2 concurrent batches, ~10s per wave. function estimateDuration(itemCount) { var batches = Math.max(1, Math.ceil(itemCount / 25)); - var waves = Math.ceil(batches / 3); - return Math.max(15, waves * 8); + var waves = Math.ceil(batches / 2); + return Math.max(15, waves * 10); } // Messages keyed to approximate progress milestones (0–100).