Files
PowderCoatingLogix/src/PowderCoating.Infrastructure/Services/AiCatalogPriceCheckService.cs
T
spouliot 2d25f6db2b Add proactive inter-batch pacing to avoid rate limit hits
Rather than relying on reactive 65s retries, each semaphore slot is held
for at least MinBatchIntervalSeconds (20s). With 2 concurrent slots that
limits throughput to ~3 batches/min × ~2k tokens = ~6k output TPM,
safely under the 8k/min limit.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-25 22:01:22 -04:00

329 lines
15 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
using System.Net.Http;
using System.Text;
using System.Text.Json;
using Anthropic.SDK;
using Anthropic.SDK.Messaging;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Logging;
using PowderCoating.Application.DTOs.AI;
using PowderCoating.Application.Interfaces;
namespace PowderCoating.Infrastructure.Services;
/// <summary>
/// Sends catalog items to Claude in batches of 25 and collects per-item price verdicts.
/// Each batch produces one Claude call so large catalogs stay within the model's context
/// limits. Results across all batches are merged into a single flat list before returning.
/// </summary>
public class AiCatalogPriceCheckService : IAiCatalogPriceCheckService
{
private readonly IConfiguration _config;
private readonly ILogger<AiCatalogPriceCheckService> _logger;
private const string Model = "claude-haiku-4-5-20251001";
private const int BatchSize = 25;
private const int MaxConcurrentBatches = 2;
private const int RateLimitRetrySeconds = 65;
private const int MinBatchIntervalSeconds = 20; // proactive pacing: ~3 batches/min × ~2k tokens = ~6k TPM, under the 8k limit
private static readonly JsonSerializerOptions JsonOpts = new() { PropertyNameCaseInsensitive = true };
public AiCatalogPriceCheckService(IConfiguration config, ILogger<AiCatalogPriceCheckService> logger)
{
_config = config;
_logger = logger;
}
private string? GetApiKey()
{
var key = _config["AI:Anthropic:ApiKey"];
return string.IsNullOrWhiteSpace(key) || key.StartsWith("your-") ? null : key;
}
/// <summary>
/// Extracts a JSON array from Claude's response, handling three common failure modes:
/// (1) ```json ... ``` fences wrapping the array,
/// (2) prose text before or after the JSON array,
/// (3) truncated responses where the closing ] is missing — in that case we close any
/// open string and append ]} to produce a parseable (though incomplete) array so
/// we recover whatever items Claude did finish.
/// </summary>
private static string ExtractJsonArray(string text)
{
var trimmed = text.Trim();
// Strip code fences
if (trimmed.StartsWith("```"))
{
var firstNewline = trimmed.IndexOf('\n');
if (firstNewline >= 0) trimmed = trimmed[(firstNewline + 1)..];
if (trimmed.EndsWith("```")) trimmed = trimmed[..^3];
trimmed = trimmed.Trim();
}
// Find the outermost [ ... ] even when Claude adds prose around it
var arrayStart = trimmed.IndexOf('[');
if (arrayStart < 0) return "[]";
trimmed = trimmed[arrayStart..];
var arrayEnd = trimmed.LastIndexOf(']');
if (arrayEnd >= 0)
return trimmed[..(arrayEnd + 1)];
// No closing bracket — response was truncated. Patch it so we can recover
// whatever complete objects Claude did return.
// Strategy: find the last complete }, and close the array after it.
var lastComplete = trimmed.LastIndexOf("},");
if (lastComplete < 0) lastComplete = trimmed.LastIndexOf('}');
if (lastComplete >= 0)
return trimmed[..(lastComplete + 1)] + "]";
return "[]";
}
/// <summary>
/// Sends a message to Claude with up to 3 attempts. On a rate-limit 429, waits
/// RateLimitRetrySeconds before retrying so the per-minute token window can reset.
/// </summary>
private async Task<MessageResponse> SendAsync(AnthropicClient client, MessageParameters parameters)
{
const int maxAttempts = 3;
for (var attempt = 1; attempt <= maxAttempts; attempt++)
{
try
{
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(90));
return await client.Messages.GetClaudeMessageAsync(parameters, cts.Token);
}
catch (HttpRequestException ex) when (attempt < maxAttempts && ex.Message.Contains("rate_limit_error"))
{
_logger.LogWarning("Rate limit hit (attempt {Attempt}/{Max}), waiting {Seconds}s before retry",
attempt, maxAttempts, RateLimitRetrySeconds);
await Task.Delay(TimeSpan.FromSeconds(RateLimitRetrySeconds));
}
}
// Final attempt — let any exception propagate to the batch error handler
using var finalCts = new CancellationTokenSource(TimeSpan.FromSeconds(90));
return await client.Messages.GetClaudeMessageAsync(parameters, finalCts.Token);
}
/// <inheritdoc/>
public async Task<List<CatalogItemPriceVerdict>> AnalyzeAsync(
List<CatalogItemForPriceCheck> items,
ShopOperatingCostSummary costs,
CancellationToken cancellationToken = default)
{
var apiKey = GetApiKey();
if (apiKey == null)
{
_logger.LogWarning("AI Catalog Price Check called but Anthropic API key is not configured.");
return new List<CatalogItemPriceVerdict>();
}
var client = new AnthropicClient(apiKey);
var systemPrompt = BuildSystemPrompt(costs);
// Split into independent batches upfront
var batches = Enumerable.Range(0, (int)Math.Ceiling(items.Count / (double)BatchSize))
.Select(i => items.Skip(i * BatchSize).Take(BatchSize).ToList())
.ToList();
// Run up to MaxConcurrentBatches in parallel. Each batch is an independent API call
// with its own fresh MessageParameters — no shared state, no growing context.
var semaphore = new SemaphoreSlim(MaxConcurrentBatches, MaxConcurrentBatches);
var batchTasks = batches.Select(async (batch, index) =>
{
await semaphore.WaitAsync(cancellationToken);
try
{
_logger.LogInformation("Starting price check batch {Index}/{Total} ({Count} items)",
index + 1, batches.Count, batch.Count);
var sw = System.Diagnostics.Stopwatch.StartNew();
var result = await AnalyzeBatchAsync(client, systemPrompt, batch);
// Pace output token rate: hold the slot until MinBatchIntervalSeconds has elapsed
// so we stay under the per-minute output token limit without relying solely on retries.
var pad = (int)(MinBatchIntervalSeconds * 1000 - sw.ElapsedMilliseconds);
if (pad > 0) await Task.Delay(pad, cancellationToken);
return result;
}
finally
{
semaphore.Release();
}
}).ToList();
var batchResults = await Task.WhenAll(batchTasks);
// Preserve original catalog order
return batches.Zip(batchResults, (batch, results) => results)
.SelectMany(r => r)
.ToList();
}
private async Task<List<CatalogItemPriceVerdict>> AnalyzeBatchAsync(
AnthropicClient client,
string systemPrompt,
List<CatalogItemForPriceCheck> batch)
{
var userPrompt = BuildUserPrompt(batch);
var parameters = new MessageParameters
{
Model = Model,
MaxTokens = 8192,
SystemMessage = systemPrompt,
Messages = new List<Message>
{
new() { Role = RoleType.User, Content = new List<ContentBase> { new TextContent { Text = userPrompt } } }
}
};
var raw = string.Empty;
try
{
var response = await SendAsync(client, parameters);
raw = response.Content.OfType<TextContent>().FirstOrDefault()?.Text ?? "[]";
var json = ExtractJsonArray(raw);
var claudeItems = JsonSerializer.Deserialize<List<ClaudePriceCheckItem>>(json, JsonOpts) ?? new();
return claudeItems.Select(ci =>
{
var source = batch.FirstOrDefault(b => b.Id == ci.catalogItemId);
return new CatalogItemPriceVerdict
{
CatalogItemId = ci.catalogItemId,
Name = source?.Name ?? $"Item #{ci.catalogItemId}",
CurrentPrice = source?.CurrentPrice ?? 0,
Assumptions = ci.assumptions,
EstimatedSqFtMin = ci.estimatedSqFtMin,
EstimatedSqFtMax = ci.estimatedSqFtMax,
EstimatedMinutesMin = ci.estimatedMinutesMin,
EstimatedMinutesMax = ci.estimatedMinutesMax,
CostFloor = ci.costFloor,
Verdict = ci.verdict,
SuggestedPriceMin = ci.suggestedPriceMin,
SuggestedPriceMax = ci.suggestedPriceMax,
Confidence = ci.confidence,
Reasoning = ci.reasoning
};
}).ToList();
}
catch (Exception ex)
{
var preview = raw.Length > 300 ? raw[..300] + "…" : raw;
_logger.LogError(ex,
"AI price check batch failed for items [{ItemIds}]. Raw response preview: {RawPreview}",
string.Join(", ", batch.Select(b => b.Id)), preview);
return batch.Select(item => new CatalogItemPriceVerdict
{
CatalogItemId = item.Id,
Name = item.Name,
CurrentPrice = item.CurrentPrice,
Verdict = "ok",
Confidence = "low",
Assumptions = "Analysis unavailable for this item.",
Reasoning = "An error occurred during analysis. Please re-run the price check."
}).ToList();
}
}
private static string BuildSystemPrompt(ShopOperatingCostSummary costs)
{
var sb = new StringBuilder();
sb.AppendLine("You are a pricing consultant for a powder coating business. Your job is to review catalog items and flag potential pricing problems against the shop's actual operating costs.");
sb.AppendLine();
sb.AppendLine("## Shop Operating Costs");
sb.AppendLine($"- Labor rate: ${costs.LaborRatePerHour:F2}/hr");
sb.AppendLine($"- Oven operating cost: ${costs.OvenCostPerHour:F2}/hr");
sb.AppendLine($"- Sandblaster cost: ${costs.SandblasterCostPerHour:F2}/hr");
sb.AppendLine($"- Coating booth cost: ${costs.CoatingBoothCostPerHour:F2}/hr");
sb.AppendLine($"- Powder material cost: ${costs.PowderCostPerSqFt:F2}/sqft");
sb.AppendLine($"- Shop supplies surcharge: {costs.ShopSuppliesRatePercent:F1}%");
if (costs.PricingMode == "margin")
sb.AppendLine($"- Target gross margin: {costs.MarkupOrMarginPercent:F1}%");
else
sb.AppendLine($"- Markup on material: {costs.MarkupOrMarginPercent:F1}%");
if (costs.ShopMinimumCharge > 0)
sb.AppendLine($"- Shop minimum charge: ${costs.ShopMinimumCharge:F2}");
if (!string.IsNullOrWhiteSpace(costs.AiContextProfile))
{
sb.AppendLine();
sb.AppendLine("## Shop Profile");
sb.AppendLine(costs.AiContextProfile);
}
sb.AppendLine();
sb.AppendLine("## Instructions");
sb.AppendLine("For each item, use industry knowledge to estimate a plausible surface area and processing time. Then compute a cost floor = (labor + equipment + material) using the shop's rates above. Compare the cost floor to the item's current price and return a verdict.");
sb.AppendLine();
sb.AppendLine("Verdict values:");
sb.AppendLine("- \"below-cost\" — price is at or below cost floor (the shop loses money)");
sb.AppendLine("- \"low\" — price is above cost floor but margin is thin (< the shop's target margin)");
sb.AppendLine("- \"high\" — price appears significantly above comparable market rates (risk of losing work)");
sb.AppendLine("- \"ok\" — price is within a reasonable range");
sb.AppendLine();
sb.AppendLine("Confidence values:");
sb.AppendLine("- \"high\" — item name clearly identifies part type and standard dimensions");
sb.AppendLine("- \"medium\" — reasonable assumptions were possible");
sb.AppendLine("- \"low\" — item is too vague to estimate reliably (e.g., 'Custom Part', 'Job Special')");
sb.AppendLine();
sb.AppendLine("The \"category\" field contains the full path, e.g. \"Cerakote > Firearms\" or \"Powder Coat > Wheels\". Use this to determine the coating process — Cerakote items have a very different cost profile than standard powder coat (different equipment, cure times, and market rates). Price accordingly.");
sb.AppendLine();
sb.AppendLine("If the item already has an ApproximateArea or EstimatedMinutes, use those instead of guessing.");
sb.AppendLine();
sb.AppendLine("IMPORTANT: Keep responses concise to avoid truncation. Limit assumptions to 20 words max. Limit reasoning to 25 words max.");
sb.AppendLine();
sb.AppendLine("Return ONLY a JSON array — no prose, no markdown fences, nothing before or after the '['. Use this exact schema for each element:");
sb.AppendLine(@"{
""catalogItemId"": <int>,
""assumptions"": ""<≤20 words>"",
""estimatedSqFtMin"": <decimal>,
""estimatedSqFtMax"": <decimal>,
""estimatedMinutesMin"": <int>,
""estimatedMinutesMax"": <int>,
""costFloor"": <decimal>,
""verdict"": ""ok|low|high|below-cost"",
""suggestedPriceMin"": <decimal>,
""suggestedPriceMax"": <decimal>,
""confidence"": ""high|medium|low"",
""reasoning"": ""<≤25 words>""
}");
return sb.ToString();
}
// Local schema — mirrors the JSON shape Claude is asked to return. Kept private to
// the Infrastructure layer because it's a transport detail, not a domain concept.
private sealed class ClaudePriceCheckItem
{
public int catalogItemId { get; set; }
public string assumptions { get; set; } = string.Empty;
public decimal estimatedSqFtMin { get; set; }
public decimal estimatedSqFtMax { get; set; }
public int estimatedMinutesMin { get; set; }
public int estimatedMinutesMax { get; set; }
public decimal costFloor { get; set; }
public string verdict { get; set; } = "ok";
public decimal suggestedPriceMin { get; set; }
public decimal suggestedPriceMax { get; set; }
public string confidence { get; set; } = "medium";
public string reasoning { get; set; } = string.Empty;
}
private static string BuildUserPrompt(List<CatalogItemForPriceCheck> batch)
{
var itemsJson = JsonSerializer.Serialize(batch.Select(item => new
{
catalogItemId = item.Id,
name = item.Name,
category = item.CategoryName,
currentPrice = item.CurrentPrice,
approximateAreaSqFt = item.ApproximateAreaSqFt,
estimatedMinutes = item.EstimatedMinutes,
requiresSandblasting = item.RequiresSandblasting,
requiresMasking = item.RequiresMasking
}), new JsonSerializerOptions { WriteIndented = false });
return $"Analyze these {batch.Count} catalog items and return the JSON verdict array:\n{itemsJson}";
}
}