2d25f6db2b
Rather than relying on reactive 65s retries, each semaphore slot is held for at least MinBatchIntervalSeconds (20s). With 2 concurrent slots that limits throughput to ~3 batches/min × ~2k tokens = ~6k output TPM, safely under the 8k/min limit. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
329 lines
15 KiB
C#
329 lines
15 KiB
C#
using System.Net.Http;
|
||
using System.Text;
|
||
using System.Text.Json;
|
||
using Anthropic.SDK;
|
||
using Anthropic.SDK.Messaging;
|
||
using Microsoft.Extensions.Configuration;
|
||
using Microsoft.Extensions.Logging;
|
||
using PowderCoating.Application.DTOs.AI;
|
||
using PowderCoating.Application.Interfaces;
|
||
|
||
namespace PowderCoating.Infrastructure.Services;
|
||
|
||
/// <summary>
|
||
/// Sends catalog items to Claude in batches of 25 and collects per-item price verdicts.
|
||
/// Each batch produces one Claude call so large catalogs stay within the model's context
|
||
/// limits. Results across all batches are merged into a single flat list before returning.
|
||
/// </summary>
|
||
public class AiCatalogPriceCheckService : IAiCatalogPriceCheckService
|
||
{
|
||
private readonly IConfiguration _config;
|
||
private readonly ILogger<AiCatalogPriceCheckService> _logger;
|
||
|
||
private const string Model = "claude-haiku-4-5-20251001";
|
||
private const int BatchSize = 25;
|
||
private const int MaxConcurrentBatches = 2;
|
||
private const int RateLimitRetrySeconds = 65;
|
||
private const int MinBatchIntervalSeconds = 20; // proactive pacing: ~3 batches/min × ~2k tokens = ~6k TPM, under the 8k limit
|
||
|
||
private static readonly JsonSerializerOptions JsonOpts = new() { PropertyNameCaseInsensitive = true };
|
||
|
||
public AiCatalogPriceCheckService(IConfiguration config, ILogger<AiCatalogPriceCheckService> logger)
|
||
{
|
||
_config = config;
|
||
_logger = logger;
|
||
}
|
||
|
||
private string? GetApiKey()
|
||
{
|
||
var key = _config["AI:Anthropic:ApiKey"];
|
||
return string.IsNullOrWhiteSpace(key) || key.StartsWith("your-") ? null : key;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Extracts a JSON array from Claude's response, handling three common failure modes:
|
||
/// (1) ```json ... ``` fences wrapping the array,
|
||
/// (2) prose text before or after the JSON array,
|
||
/// (3) truncated responses where the closing ] is missing — in that case we close any
|
||
/// open string and append ]} to produce a parseable (though incomplete) array so
|
||
/// we recover whatever items Claude did finish.
|
||
/// </summary>
|
||
private static string ExtractJsonArray(string text)
|
||
{
|
||
var trimmed = text.Trim();
|
||
|
||
// Strip code fences
|
||
if (trimmed.StartsWith("```"))
|
||
{
|
||
var firstNewline = trimmed.IndexOf('\n');
|
||
if (firstNewline >= 0) trimmed = trimmed[(firstNewline + 1)..];
|
||
if (trimmed.EndsWith("```")) trimmed = trimmed[..^3];
|
||
trimmed = trimmed.Trim();
|
||
}
|
||
|
||
// Find the outermost [ ... ] even when Claude adds prose around it
|
||
var arrayStart = trimmed.IndexOf('[');
|
||
if (arrayStart < 0) return "[]";
|
||
trimmed = trimmed[arrayStart..];
|
||
|
||
var arrayEnd = trimmed.LastIndexOf(']');
|
||
if (arrayEnd >= 0)
|
||
return trimmed[..(arrayEnd + 1)];
|
||
|
||
// No closing bracket — response was truncated. Patch it so we can recover
|
||
// whatever complete objects Claude did return.
|
||
// Strategy: find the last complete }, and close the array after it.
|
||
var lastComplete = trimmed.LastIndexOf("},");
|
||
if (lastComplete < 0) lastComplete = trimmed.LastIndexOf('}');
|
||
if (lastComplete >= 0)
|
||
return trimmed[..(lastComplete + 1)] + "]";
|
||
|
||
return "[]";
|
||
}
|
||
|
||
/// <summary>
|
||
/// Sends a message to Claude with up to 3 attempts. On a rate-limit 429, waits
|
||
/// RateLimitRetrySeconds before retrying so the per-minute token window can reset.
|
||
/// </summary>
|
||
private async Task<MessageResponse> SendAsync(AnthropicClient client, MessageParameters parameters)
|
||
{
|
||
const int maxAttempts = 3;
|
||
for (var attempt = 1; attempt <= maxAttempts; attempt++)
|
||
{
|
||
try
|
||
{
|
||
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(90));
|
||
return await client.Messages.GetClaudeMessageAsync(parameters, cts.Token);
|
||
}
|
||
catch (HttpRequestException ex) when (attempt < maxAttempts && ex.Message.Contains("rate_limit_error"))
|
||
{
|
||
_logger.LogWarning("Rate limit hit (attempt {Attempt}/{Max}), waiting {Seconds}s before retry",
|
||
attempt, maxAttempts, RateLimitRetrySeconds);
|
||
await Task.Delay(TimeSpan.FromSeconds(RateLimitRetrySeconds));
|
||
}
|
||
}
|
||
// Final attempt — let any exception propagate to the batch error handler
|
||
using var finalCts = new CancellationTokenSource(TimeSpan.FromSeconds(90));
|
||
return await client.Messages.GetClaudeMessageAsync(parameters, finalCts.Token);
|
||
}
|
||
|
||
/// <inheritdoc/>
|
||
public async Task<List<CatalogItemPriceVerdict>> AnalyzeAsync(
|
||
List<CatalogItemForPriceCheck> items,
|
||
ShopOperatingCostSummary costs,
|
||
CancellationToken cancellationToken = default)
|
||
{
|
||
var apiKey = GetApiKey();
|
||
if (apiKey == null)
|
||
{
|
||
_logger.LogWarning("AI Catalog Price Check called but Anthropic API key is not configured.");
|
||
return new List<CatalogItemPriceVerdict>();
|
||
}
|
||
|
||
var client = new AnthropicClient(apiKey);
|
||
var systemPrompt = BuildSystemPrompt(costs);
|
||
|
||
// Split into independent batches upfront
|
||
var batches = Enumerable.Range(0, (int)Math.Ceiling(items.Count / (double)BatchSize))
|
||
.Select(i => items.Skip(i * BatchSize).Take(BatchSize).ToList())
|
||
.ToList();
|
||
|
||
// Run up to MaxConcurrentBatches in parallel. Each batch is an independent API call
|
||
// with its own fresh MessageParameters — no shared state, no growing context.
|
||
var semaphore = new SemaphoreSlim(MaxConcurrentBatches, MaxConcurrentBatches);
|
||
var batchTasks = batches.Select(async (batch, index) =>
|
||
{
|
||
await semaphore.WaitAsync(cancellationToken);
|
||
try
|
||
{
|
||
_logger.LogInformation("Starting price check batch {Index}/{Total} ({Count} items)",
|
||
index + 1, batches.Count, batch.Count);
|
||
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||
var result = await AnalyzeBatchAsync(client, systemPrompt, batch);
|
||
// Pace output token rate: hold the slot until MinBatchIntervalSeconds has elapsed
|
||
// so we stay under the per-minute output token limit without relying solely on retries.
|
||
var pad = (int)(MinBatchIntervalSeconds * 1000 - sw.ElapsedMilliseconds);
|
||
if (pad > 0) await Task.Delay(pad, cancellationToken);
|
||
return result;
|
||
}
|
||
finally
|
||
{
|
||
semaphore.Release();
|
||
}
|
||
}).ToList();
|
||
|
||
var batchResults = await Task.WhenAll(batchTasks);
|
||
|
||
// Preserve original catalog order
|
||
return batches.Zip(batchResults, (batch, results) => results)
|
||
.SelectMany(r => r)
|
||
.ToList();
|
||
}
|
||
|
||
private async Task<List<CatalogItemPriceVerdict>> AnalyzeBatchAsync(
|
||
AnthropicClient client,
|
||
string systemPrompt,
|
||
List<CatalogItemForPriceCheck> batch)
|
||
{
|
||
var userPrompt = BuildUserPrompt(batch);
|
||
|
||
var parameters = new MessageParameters
|
||
{
|
||
Model = Model,
|
||
MaxTokens = 8192,
|
||
SystemMessage = systemPrompt,
|
||
Messages = new List<Message>
|
||
{
|
||
new() { Role = RoleType.User, Content = new List<ContentBase> { new TextContent { Text = userPrompt } } }
|
||
}
|
||
};
|
||
|
||
var raw = string.Empty;
|
||
try
|
||
{
|
||
var response = await SendAsync(client, parameters);
|
||
raw = response.Content.OfType<TextContent>().FirstOrDefault()?.Text ?? "[]";
|
||
var json = ExtractJsonArray(raw);
|
||
|
||
var claudeItems = JsonSerializer.Deserialize<List<ClaudePriceCheckItem>>(json, JsonOpts) ?? new();
|
||
|
||
return claudeItems.Select(ci =>
|
||
{
|
||
var source = batch.FirstOrDefault(b => b.Id == ci.catalogItemId);
|
||
return new CatalogItemPriceVerdict
|
||
{
|
||
CatalogItemId = ci.catalogItemId,
|
||
Name = source?.Name ?? $"Item #{ci.catalogItemId}",
|
||
CurrentPrice = source?.CurrentPrice ?? 0,
|
||
Assumptions = ci.assumptions,
|
||
EstimatedSqFtMin = ci.estimatedSqFtMin,
|
||
EstimatedSqFtMax = ci.estimatedSqFtMax,
|
||
EstimatedMinutesMin = ci.estimatedMinutesMin,
|
||
EstimatedMinutesMax = ci.estimatedMinutesMax,
|
||
CostFloor = ci.costFloor,
|
||
Verdict = ci.verdict,
|
||
SuggestedPriceMin = ci.suggestedPriceMin,
|
||
SuggestedPriceMax = ci.suggestedPriceMax,
|
||
Confidence = ci.confidence,
|
||
Reasoning = ci.reasoning
|
||
};
|
||
}).ToList();
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
var preview = raw.Length > 300 ? raw[..300] + "…" : raw;
|
||
_logger.LogError(ex,
|
||
"AI price check batch failed for items [{ItemIds}]. Raw response preview: {RawPreview}",
|
||
string.Join(", ", batch.Select(b => b.Id)), preview);
|
||
return batch.Select(item => new CatalogItemPriceVerdict
|
||
{
|
||
CatalogItemId = item.Id,
|
||
Name = item.Name,
|
||
CurrentPrice = item.CurrentPrice,
|
||
Verdict = "ok",
|
||
Confidence = "low",
|
||
Assumptions = "Analysis unavailable for this item.",
|
||
Reasoning = "An error occurred during analysis. Please re-run the price check."
|
||
}).ToList();
|
||
}
|
||
}
|
||
|
||
private static string BuildSystemPrompt(ShopOperatingCostSummary costs)
|
||
{
|
||
var sb = new StringBuilder();
|
||
sb.AppendLine("You are a pricing consultant for a powder coating business. Your job is to review catalog items and flag potential pricing problems against the shop's actual operating costs.");
|
||
sb.AppendLine();
|
||
sb.AppendLine("## Shop Operating Costs");
|
||
sb.AppendLine($"- Labor rate: ${costs.LaborRatePerHour:F2}/hr");
|
||
sb.AppendLine($"- Oven operating cost: ${costs.OvenCostPerHour:F2}/hr");
|
||
sb.AppendLine($"- Sandblaster cost: ${costs.SandblasterCostPerHour:F2}/hr");
|
||
sb.AppendLine($"- Coating booth cost: ${costs.CoatingBoothCostPerHour:F2}/hr");
|
||
sb.AppendLine($"- Powder material cost: ${costs.PowderCostPerSqFt:F2}/sqft");
|
||
sb.AppendLine($"- Shop supplies surcharge: {costs.ShopSuppliesRatePercent:F1}%");
|
||
if (costs.PricingMode == "margin")
|
||
sb.AppendLine($"- Target gross margin: {costs.MarkupOrMarginPercent:F1}%");
|
||
else
|
||
sb.AppendLine($"- Markup on material: {costs.MarkupOrMarginPercent:F1}%");
|
||
if (costs.ShopMinimumCharge > 0)
|
||
sb.AppendLine($"- Shop minimum charge: ${costs.ShopMinimumCharge:F2}");
|
||
if (!string.IsNullOrWhiteSpace(costs.AiContextProfile))
|
||
{
|
||
sb.AppendLine();
|
||
sb.AppendLine("## Shop Profile");
|
||
sb.AppendLine(costs.AiContextProfile);
|
||
}
|
||
sb.AppendLine();
|
||
sb.AppendLine("## Instructions");
|
||
sb.AppendLine("For each item, use industry knowledge to estimate a plausible surface area and processing time. Then compute a cost floor = (labor + equipment + material) using the shop's rates above. Compare the cost floor to the item's current price and return a verdict.");
|
||
sb.AppendLine();
|
||
sb.AppendLine("Verdict values:");
|
||
sb.AppendLine("- \"below-cost\" — price is at or below cost floor (the shop loses money)");
|
||
sb.AppendLine("- \"low\" — price is above cost floor but margin is thin (< the shop's target margin)");
|
||
sb.AppendLine("- \"high\" — price appears significantly above comparable market rates (risk of losing work)");
|
||
sb.AppendLine("- \"ok\" — price is within a reasonable range");
|
||
sb.AppendLine();
|
||
sb.AppendLine("Confidence values:");
|
||
sb.AppendLine("- \"high\" — item name clearly identifies part type and standard dimensions");
|
||
sb.AppendLine("- \"medium\" — reasonable assumptions were possible");
|
||
sb.AppendLine("- \"low\" — item is too vague to estimate reliably (e.g., 'Custom Part', 'Job Special')");
|
||
sb.AppendLine();
|
||
sb.AppendLine("The \"category\" field contains the full path, e.g. \"Cerakote > Firearms\" or \"Powder Coat > Wheels\". Use this to determine the coating process — Cerakote items have a very different cost profile than standard powder coat (different equipment, cure times, and market rates). Price accordingly.");
|
||
sb.AppendLine();
|
||
sb.AppendLine("If the item already has an ApproximateArea or EstimatedMinutes, use those instead of guessing.");
|
||
sb.AppendLine();
|
||
sb.AppendLine("IMPORTANT: Keep responses concise to avoid truncation. Limit assumptions to 20 words max. Limit reasoning to 25 words max.");
|
||
sb.AppendLine();
|
||
sb.AppendLine("Return ONLY a JSON array — no prose, no markdown fences, nothing before or after the '['. Use this exact schema for each element:");
|
||
sb.AppendLine(@"{
|
||
""catalogItemId"": <int>,
|
||
""assumptions"": ""<≤20 words>"",
|
||
""estimatedSqFtMin"": <decimal>,
|
||
""estimatedSqFtMax"": <decimal>,
|
||
""estimatedMinutesMin"": <int>,
|
||
""estimatedMinutesMax"": <int>,
|
||
""costFloor"": <decimal>,
|
||
""verdict"": ""ok|low|high|below-cost"",
|
||
""suggestedPriceMin"": <decimal>,
|
||
""suggestedPriceMax"": <decimal>,
|
||
""confidence"": ""high|medium|low"",
|
||
""reasoning"": ""<≤25 words>""
|
||
}");
|
||
return sb.ToString();
|
||
}
|
||
|
||
// Local schema — mirrors the JSON shape Claude is asked to return. Kept private to
|
||
// the Infrastructure layer because it's a transport detail, not a domain concept.
|
||
private sealed class ClaudePriceCheckItem
|
||
{
|
||
public int catalogItemId { get; set; }
|
||
public string assumptions { get; set; } = string.Empty;
|
||
public decimal estimatedSqFtMin { get; set; }
|
||
public decimal estimatedSqFtMax { get; set; }
|
||
public int estimatedMinutesMin { get; set; }
|
||
public int estimatedMinutesMax { get; set; }
|
||
public decimal costFloor { get; set; }
|
||
public string verdict { get; set; } = "ok";
|
||
public decimal suggestedPriceMin { get; set; }
|
||
public decimal suggestedPriceMax { get; set; }
|
||
public string confidence { get; set; } = "medium";
|
||
public string reasoning { get; set; } = string.Empty;
|
||
}
|
||
|
||
private static string BuildUserPrompt(List<CatalogItemForPriceCheck> batch)
|
||
{
|
||
var itemsJson = JsonSerializer.Serialize(batch.Select(item => new
|
||
{
|
||
catalogItemId = item.Id,
|
||
name = item.Name,
|
||
category = item.CategoryName,
|
||
currentPrice = item.CurrentPrice,
|
||
approximateAreaSqFt = item.ApproximateAreaSqFt,
|
||
estimatedMinutes = item.EstimatedMinutes,
|
||
requiresSandblasting = item.RequiresSandblasting,
|
||
requiresMasking = item.RequiresMasking
|
||
}), new JsonSerializerOptions { WriteIndented = false });
|
||
|
||
return $"Analyze these {batch.Count} catalog items and return the JSON verdict array:\n{itemsJson}";
|
||
}
|
||
}
|