From 2d25f6db2b618e9ac4f71dd34fb20e3fc0bd1880 Mon Sep 17 00:00:00 2001 From: Scott Pouliot Date: Sat, 25 Apr 2026 22:01:22 -0400 Subject: [PATCH] Add proactive inter-batch pacing to avoid rate limit hits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than relying on reactive 65s retries, each semaphore slot is held for at least MinBatchIntervalSeconds (20s). With 2 concurrent slots that limits throughput to ~3 batches/min × ~2k tokens = ~6k output TPM, safely under the 8k/min limit. Co-Authored-By: Claude Sonnet 4.6 --- .../Services/AiCatalogPriceCheckService.cs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/PowderCoating.Infrastructure/Services/AiCatalogPriceCheckService.cs b/src/PowderCoating.Infrastructure/Services/AiCatalogPriceCheckService.cs index c2d75fa..783ea96 100644 --- a/src/PowderCoating.Infrastructure/Services/AiCatalogPriceCheckService.cs +++ b/src/PowderCoating.Infrastructure/Services/AiCatalogPriceCheckService.cs @@ -22,8 +22,9 @@ public class AiCatalogPriceCheckService : IAiCatalogPriceCheckService private const string Model = "claude-haiku-4-5-20251001"; private const int BatchSize = 25; - private const int MaxConcurrentBatches = 2; // 3 concurrent bursts past Haiku's output TPM limit - private const int RateLimitRetrySeconds = 65; // wait just past the 60s window before retrying a 429 + private const int MaxConcurrentBatches = 2; + private const int RateLimitRetrySeconds = 65; + private const int MinBatchIntervalSeconds = 20; // proactive pacing: ~3 batches/min × ~2k tokens = ~6k TPM, under the 8k limit private static readonly JsonSerializerOptions JsonOpts = new() { PropertyNameCaseInsensitive = true }; @@ -137,7 +138,13 @@ public class AiCatalogPriceCheckService : IAiCatalogPriceCheckService { _logger.LogInformation("Starting price check batch {Index}/{Total} ({Count} items)", index + 1, batches.Count, batch.Count); - return await AnalyzeBatchAsync(client, systemPrompt, batch); + var sw = System.Diagnostics.Stopwatch.StartNew(); + var result = await AnalyzeBatchAsync(client, systemPrompt, batch); + // Pace output token rate: hold the slot until MinBatchIntervalSeconds has elapsed + // so we stay under the per-minute output token limit without relying solely on retries. + var pad = (int)(MinBatchIntervalSeconds * 1000 - sw.ElapsedMilliseconds); + if (pad > 0) await Task.Delay(pad, cancellationToken); + return result; } finally {