Add proactive inter-batch pacing to avoid rate limit hits
Rather than relying on reactive 65s retries, each semaphore slot is held for at least MinBatchIntervalSeconds (20s). With 2 concurrent slots that limits throughput to ~3 batches/min × ~2k tokens = ~6k output TPM, safely under the 8k/min limit. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -22,8 +22,9 @@ public class AiCatalogPriceCheckService : IAiCatalogPriceCheckService
|
|||||||
|
|
||||||
private const string Model = "claude-haiku-4-5-20251001";
|
private const string Model = "claude-haiku-4-5-20251001";
|
||||||
private const int BatchSize = 25;
|
private const int BatchSize = 25;
|
||||||
private const int MaxConcurrentBatches = 2; // 3 concurrent bursts past Haiku's output TPM limit
|
private const int MaxConcurrentBatches = 2;
|
||||||
private const int RateLimitRetrySeconds = 65; // wait just past the 60s window before retrying a 429
|
private const int RateLimitRetrySeconds = 65;
|
||||||
|
private const int MinBatchIntervalSeconds = 20; // proactive pacing: ~3 batches/min × ~2k tokens = ~6k TPM, under the 8k limit
|
||||||
|
|
||||||
private static readonly JsonSerializerOptions JsonOpts = new() { PropertyNameCaseInsensitive = true };
|
private static readonly JsonSerializerOptions JsonOpts = new() { PropertyNameCaseInsensitive = true };
|
||||||
|
|
||||||
@@ -137,7 +138,13 @@ public class AiCatalogPriceCheckService : IAiCatalogPriceCheckService
|
|||||||
{
|
{
|
||||||
_logger.LogInformation("Starting price check batch {Index}/{Total} ({Count} items)",
|
_logger.LogInformation("Starting price check batch {Index}/{Total} ({Count} items)",
|
||||||
index + 1, batches.Count, batch.Count);
|
index + 1, batches.Count, batch.Count);
|
||||||
return await AnalyzeBatchAsync(client, systemPrompt, batch);
|
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||||
|
var result = await AnalyzeBatchAsync(client, systemPrompt, batch);
|
||||||
|
// Pace output token rate: hold the slot until MinBatchIntervalSeconds has elapsed
|
||||||
|
// so we stay under the per-minute output token limit without relying solely on retries.
|
||||||
|
var pad = (int)(MinBatchIntervalSeconds * 1000 - sw.ElapsedMilliseconds);
|
||||||
|
if (pad > 0) await Task.Delay(pad, cancellationToken);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
finally
|
finally
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user