From 7407d1cd966a75df309c4c6cb337be2dc21908f7 Mon Sep 17 00:00:00 2001
From: Scott Pouliot <spouliot@scppowdercoating.com>
Date: Sat, 25 Apr 2026 20:54:30 -0400
Subject: [PATCH] Fix rate limit errors in AI price check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tier 1 Anthropic accounts are capped at 8,000 output tokens/minute on
Sonnet. 3 concurrent batches burst well past that, causing 429s.

- MaxConcurrentBatches: 3 → 1 (sequential prevents burst)
- Add retry: on rate_limit_error, wait 65s then retry up to 3 times
  so the per-minute window resets before the next attempt

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../Services/AiCatalogPriceCheckService.cs    | 32 ++++++++++++++++---
 1 file changed, 27 insertions(+), 5 deletions(-)
diff --git a/src/PowderCoating.Infrastructure/Services/AiCatalogPriceCheckService.cs b/src/PowderCoating.Infrastructure/Services/AiCatalogPriceCheckService.cs
index 451387d..6cfe391 100644
--- a/src/PowderCoating.Infrastructure/Services/AiCatalogPriceCheckService.cs
+++ b/src/PowderCoating.Infrastructure/Services/AiCatalogPriceCheckService.cs
@@ -1,3 +1,4 @@
+using System.Net.Http;
 using System.Text;
 using System.Text.Json;
 using Anthropic.SDK;
@@ -20,8 +21,9 @@ public class AiCatalogPriceCheckService : IAiCatalogPriceCheckService
     private readonly ILogger<AiCatalogPriceCheckService> _logger;
 
     private const string Model = "claude-sonnet-4-6";
-    private const int BatchSize = 25;    // 25 items × ~80 tokens (with word limits) ≈ 2000 output tokens, well within 8192
-    private const int MaxConcurrentBatches = 3; // cap parallel API calls to stay within rate limits
+    private const int BatchSize = 25;
+    private const int MaxConcurrentBatches = 1; // Tier 1 output limit is 8,000 TPM — sequential avoids bursting past it
+    private const int RateLimitRetrySeconds = 65; // wait just past the 60s window before retrying a 429
 
     private static readonly JsonSerializerOptions JsonOpts = new() { PropertyNameCaseInsensitive = true };
 
@@ -78,10 +80,30 @@ public class AiCatalogPriceCheckService : IAiCatalogPriceCheckService
         return "[]";
     }
 
-    private static async Task<MessageResponse> SendAsync(AnthropicClient client, MessageParameters parameters)
+    /// <summary>
+    /// Sends a message to Claude with up to 3 attempts. On a rate-limit 429, waits
+    /// RateLimitRetrySeconds before retrying so the per-minute token window can reset.
+    /// </summary>
+    private async Task<MessageResponse> SendAsync(AnthropicClient client, MessageParameters parameters)
     {
-        using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(90));
-        return await client.Messages.GetClaudeMessageAsync(parameters, cts.Token);
+        const int maxAttempts = 3;
+        for (var attempt = 1; attempt <= maxAttempts; attempt++)
+        {
+            try
+            {
+                using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(90));
+                return await client.Messages.GetClaudeMessageAsync(parameters, cts.Token);
+            }
+            catch (HttpRequestException ex) when (attempt < maxAttempts && ex.Message.Contains("rate_limit_error"))
+            {
+                _logger.LogWarning("Rate limit hit (attempt {Attempt}/{Max}), waiting {Seconds}s before retry",
+                    attempt, maxAttempts, RateLimitRetrySeconds);
+                await Task.Delay(TimeSpan.FromSeconds(RateLimitRetrySeconds));
+            }
+        }
+        // Final attempt — let any exception propagate to the batch error handler
+        using var finalCts = new CancellationTokenSource(TimeSpan.FromSeconds(90));
+        return await client.Messages.GetClaudeMessageAsync(parameters, finalCts.Token);
     }
 
     /// <inheritdoc/>