Fix rate limit errors in AI price check
Tier 1 Anthropic accounts are capped at 8,000 output tokens/minute on Sonnet. 3 concurrent batches burst well past that, causing 429s. - MaxConcurrentBatches: 3 → 1 (sequential prevents burst) - Add retry: on rate_limit_error, wait 65s then retry up to 3 times so the per-minute window resets before the next attempt Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
|
using System.Net.Http;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using System.Text.Json;
|
using System.Text.Json;
|
||||||
using Anthropic.SDK;
|
using Anthropic.SDK;
|
||||||
@@ -20,8 +21,9 @@ public class AiCatalogPriceCheckService : IAiCatalogPriceCheckService
|
|||||||
private readonly ILogger<AiCatalogPriceCheckService> _logger;
|
private readonly ILogger<AiCatalogPriceCheckService> _logger;
|
||||||
|
|
||||||
private const string Model = "claude-sonnet-4-6";
|
private const string Model = "claude-sonnet-4-6";
|
||||||
private const int BatchSize = 25; // 25 items × ~80 tokens (with word limits) ≈ 2000 output tokens, well within 8192
|
private const int BatchSize = 25;
|
||||||
private const int MaxConcurrentBatches = 3; // cap parallel API calls to stay within rate limits
|
private const int MaxConcurrentBatches = 1; // Tier 1 output limit is 8,000 TPM — sequential avoids bursting past it
|
||||||
|
private const int RateLimitRetrySeconds = 65; // wait just past the 60s window before retrying a 429
|
||||||
|
|
||||||
private static readonly JsonSerializerOptions JsonOpts = new() { PropertyNameCaseInsensitive = true };
|
private static readonly JsonSerializerOptions JsonOpts = new() { PropertyNameCaseInsensitive = true };
|
||||||
|
|
||||||
@@ -78,10 +80,30 @@ public class AiCatalogPriceCheckService : IAiCatalogPriceCheckService
|
|||||||
return "[]";
|
return "[]";
|
||||||
}
|
}
|
||||||
|
|
||||||
private static async Task<MessageResponse> SendAsync(AnthropicClient client, MessageParameters parameters)
|
/// <summary>
|
||||||
|
/// Sends a message to Claude with up to 3 attempts. On a rate-limit 429, waits
|
||||||
|
/// RateLimitRetrySeconds before retrying so the per-minute token window can reset.
|
||||||
|
/// </summary>
|
||||||
|
private async Task<MessageResponse> SendAsync(AnthropicClient client, MessageParameters parameters)
|
||||||
{
|
{
|
||||||
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(90));
|
const int maxAttempts = 3;
|
||||||
return await client.Messages.GetClaudeMessageAsync(parameters, cts.Token);
|
for (var attempt = 1; attempt <= maxAttempts; attempt++)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(90));
|
||||||
|
return await client.Messages.GetClaudeMessageAsync(parameters, cts.Token);
|
||||||
|
}
|
||||||
|
catch (HttpRequestException ex) when (attempt < maxAttempts && ex.Message.Contains("rate_limit_error"))
|
||||||
|
{
|
||||||
|
_logger.LogWarning("Rate limit hit (attempt {Attempt}/{Max}), waiting {Seconds}s before retry",
|
||||||
|
attempt, maxAttempts, RateLimitRetrySeconds);
|
||||||
|
await Task.Delay(TimeSpan.FromSeconds(RateLimitRetrySeconds));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Final attempt — let any exception propagate to the batch error handler
|
||||||
|
using var finalCts = new CancellationTokenSource(TimeSpan.FromSeconds(90));
|
||||||
|
return await client.Messages.GetClaudeMessageAsync(parameters, finalCts.Token);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <inheritdoc/>
|
/// <inheritdoc/>
|
||||||
|
|||||||
Reference in New Issue
Block a user