From 7407d1cd966a75df309c4c6cb337be2dc21908f7 Mon Sep 17 00:00:00 2001 From: Scott Pouliot Date: Sat, 25 Apr 2026 20:54:30 -0400 Subject: [PATCH] Fix rate limit errors in AI price check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tier 1 Anthropic accounts are capped at 8,000 output tokens/minute on Sonnet. 3 concurrent batches burst well past that, causing 429s. - MaxConcurrentBatches: 3 → 1 (sequential prevents burst) - Add retry: on rate_limit_error, wait 65s then retry up to 3 times so the per-minute window resets before the next attempt Co-Authored-By: Claude Sonnet 4.6 --- .../Services/AiCatalogPriceCheckService.cs | 32 ++++++++++++++++--- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/PowderCoating.Infrastructure/Services/AiCatalogPriceCheckService.cs b/src/PowderCoating.Infrastructure/Services/AiCatalogPriceCheckService.cs index 451387d..6cfe391 100644 --- a/src/PowderCoating.Infrastructure/Services/AiCatalogPriceCheckService.cs +++ b/src/PowderCoating.Infrastructure/Services/AiCatalogPriceCheckService.cs @@ -1,3 +1,4 @@ +using System.Net.Http; using System.Text; using System.Text.Json; using Anthropic.SDK; @@ -20,8 +21,9 @@ public class AiCatalogPriceCheckService : IAiCatalogPriceCheckService private readonly ILogger _logger; private const string Model = "claude-sonnet-4-6"; - private const int BatchSize = 25; // 25 items × ~80 tokens (with word limits) ≈ 2000 output tokens, well within 8192 - private const int MaxConcurrentBatches = 3; // cap parallel API calls to stay within rate limits + private const int BatchSize = 25; + private const int MaxConcurrentBatches = 1; // Tier 1 output limit is 8,000 TPM — sequential avoids bursting past it + private const int RateLimitRetrySeconds = 65; // wait just past the 60s window before retrying a 429 private static readonly JsonSerializerOptions JsonOpts = new() { PropertyNameCaseInsensitive = true }; @@ -78,10 +80,30 @@ public class AiCatalogPriceCheckService : IAiCatalogPriceCheckService return "[]"; } - private static async Task SendAsync(AnthropicClient client, MessageParameters parameters) + /// + /// Sends a message to Claude with up to 3 attempts. On a rate-limit 429, waits + /// RateLimitRetrySeconds before retrying so the per-minute token window can reset. + /// + private async Task SendAsync(AnthropicClient client, MessageParameters parameters) { - using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(90)); - return await client.Messages.GetClaudeMessageAsync(parameters, cts.Token); + const int maxAttempts = 3; + for (var attempt = 1; attempt <= maxAttempts; attempt++) + { + try + { + using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(90)); + return await client.Messages.GetClaudeMessageAsync(parameters, cts.Token); + } + catch (HttpRequestException ex) when (attempt < maxAttempts && ex.Message.Contains("rate_limit_error")) + { + _logger.LogWarning("Rate limit hit (attempt {Attempt}/{Max}), waiting {Seconds}s before retry", + attempt, maxAttempts, RateLimitRetrySeconds); + await Task.Delay(TimeSpan.FromSeconds(RateLimitRetrySeconds)); + } + } + // Final attempt — let any exception propagate to the batch error handler + using var finalCts = new CancellationTokenSource(TimeSpan.FromSeconds(90)); + return await client.Messages.GetClaudeMessageAsync(parameters, finalCts.Token); } ///