Fix label scanner: full field mapping, vision follow-up lookup, SDS/TDS extraction

- LookupByUrlAsync now maps all identity + spec fields from Claude response
  (manufacturer, SKU, colorName, description, sdsUrl, tdsUrl, unitCostPerLb, etc.)
  Previously only augmenting fields were mapped; Columbia QR path left 80% blank
- Vision scan follow-up: after ScanLabelAsync reads label text, automatically run
  LookupAsync using the extracted manufacturer + color/SKU to fill SDS/TDS URLs,
  product page, image, description, and any specs not printed on the bag;
  label values (cure schedule, SKU) remain authoritative and are never overwritten
- SDS/TDS URL extraction: added ExtractDocumentLinks() that scans anchor tags in
  raw HTML before tag-stripping, injects found URLs as [Structured Data] lines so
  Claude can read and echo them back in the JSON response; previously all hrefs
  were lost with the HTML stripping
- Added SdsUrl/TdsUrl to InventoryAiLookupResult, Claude system prompt JSON schema,
  LookupAsync mapping, and ScanLabel response (catalog match ?? aiResult fallback)
- SDS/TDS now also stored on auto-contributed catalog entries
- jsQR inversionAttempts: 'dontInvert' → 'attemptBoth' for better QR detection
  under varying label contrast and lighting conditions

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-03 18:22:53 -04:00
parent 1fc79b77fe
commit f881b7dd53
4 changed files with 135 additions and 18 deletions
@@ -26,6 +26,8 @@ public class InventoryAiLookupResult
public string? VendorName { get; set; } // manufacturer/vendor name for dropdown matching
public string? SpecPageUrl { get; set; } // URL of the product page that was fetched
public string? ImageUrl { get; set; } // og:image or first product image found on the page
public string? SdsUrl { get; set; } // Safety Data Sheet URL if found on product page
public string? TdsUrl { get; set; } // Technical Data Sheet URL if found on product page
public string? Reasoning { get; set; } // brief explanation of what was found
}
@@ -51,6 +51,8 @@ Respond ONLY with a valid JSON object — no markdown, no explanation:
""transferEfficiency"": number or null,
""unitCostPerLb"": number or null,
""vendorName"": ""string or null — the retailer or distributor name if a price was found (not the manufacturer)"",
""sdsUrl"": ""full URL to the Safety Data Sheet (SDS/MSDS) if found in the page content or links — null if not found"",
""tdsUrl"": ""full URL to the Technical Data Sheet (TDS/Spec Sheet) if found in the page content or links — null if not found"",
""reasoning"": ""one sentence: what specific product data was found and how confident you are""
}
@@ -87,6 +89,8 @@ Rules:
* Cerakote: labeled ""Item:"" followed by a short code like F-122 (letter-dash-digits).
* Other brands: look for ""SKU"", ""Item #"", ""Part #"", ""Product Code"", ""Product ID"", ""Code"", or similar labels
- colorCode: RAL code (e.g. RAL 9005), NCS code, or manufacturer's own color code. Return if known — do not infer from the color name alone.
- sdsUrl: look for links or text labeled ""SDS"", ""Safety Data Sheet"", ""MSDS"". If a [Structured Data] SDS URL line is present, use it. Return the full URL or null.
- tdsUrl: look for links or text labeled ""TDS"", ""Technical Data Sheet"", ""Spec Sheet"", ""Data Sheet"". If a [Structured Data] TDS URL line is present, use it. Return the full URL or null.
- If a field cannot be confidently determined, use null.";
public InventoryAiLookupService(
@@ -250,6 +254,8 @@ Rules:
result.TransferEfficiency = GetDecimal(parsed, "transferEfficiency");
result.UnitCostPerLb = GetDecimal(parsed, "unitCostPerLb");
result.VendorName = GetString(parsed, "vendorName");
result.SdsUrl = GetString(parsed, "sdsUrl");
result.TdsUrl = GetString(parsed, "tdsUrl");
result.SpecPageUrl = specPageUrl;
result.ImageUrl = pageImageUrl;
result.Reasoning = GetString(parsed, "reasoning");
@@ -443,16 +449,26 @@ Rules:
var parsed = JsonSerializer.Deserialize<JsonElement>(rawText);
return new InventoryAiLookupResult
{
Success = true,
Finish = GetString(parsed, "finish"),
CureTemperatureF = GetDecimal(parsed, "cureTemperatureF"),
CureTimeMinutes = GetInt(parsed, "cureTimeMinutes"),
ColorFamilies = GetString(parsed, "colorFamilies"),
RequiresClearCoat = GetBool(parsed, "requiresClearCoat"),
CoverageSqFtPerLb = GetDecimal(parsed, "coverageSqFtPerLb"),
TransferEfficiency= GetDecimal(parsed, "transferEfficiency"),
ImageUrl = pageImageUrl,
Reasoning = GetString(parsed, "reasoning"),
Success = true,
Manufacturer = GetString(parsed, "manufacturer"),
ManufacturerPartNumber = GetString(parsed, "manufacturerPartNumber"),
ColorName = GetString(parsed, "colorName"),
ColorCode = GetString(parsed, "colorCode"),
Description = GetString(parsed, "description"),
Finish = GetString(parsed, "finish"),
CureTemperatureF = GetDecimal(parsed, "cureTemperatureF"),
CureTimeMinutes = GetInt(parsed, "cureTimeMinutes"),
ColorFamilies = GetString(parsed, "colorFamilies"),
RequiresClearCoat = GetBool(parsed, "requiresClearCoat"),
CoverageSqFtPerLb = GetDecimal(parsed, "coverageSqFtPerLb"),
TransferEfficiency = GetDecimal(parsed, "transferEfficiency"),
UnitCostPerLb = GetDecimal(parsed, "unitCostPerLb"),
VendorName = GetString(parsed, "vendorName"),
SdsUrl = GetString(parsed, "sdsUrl"),
TdsUrl = GetString(parsed, "tdsUrl"),
SpecPageUrl = url,
ImageUrl = pageImageUrl,
Reasoning = GetString(parsed, "reasoning"),
};
}
catch (Exception ex)
@@ -709,6 +725,9 @@ Rules:
// Extract product image from Open Graph / Twitter Card meta tags
var imageUrl = ExtractOgImageUrl(html);
// Extract SDS/TDS document links BEFORE stripping HTML so hrefs aren't lost.
var docLinks = ExtractDocumentLinks(html, url);
// Extract structured data (JSON-LD) BEFORE stripping scripts — it contains
// machine-readable price, SKU, and product info that would otherwise be lost.
var structuredData = ExtractJsonLdData(html);
@@ -730,9 +749,11 @@ Rules:
if (text.Length > maxChars)
text = text[..maxChars] + "…";
// Prepend structured data — Claude should treat this as high-confidence
if (!string.IsNullOrWhiteSpace(structuredData))
text = structuredData + "\n" + text;
// Prepend structured data + document links — Claude treats these as high-confidence
var header = new StringBuilder();
if (!string.IsNullOrWhiteSpace(structuredData)) header.Append(structuredData);
if (!string.IsNullOrWhiteSpace(docLinks)) header.Append(docLinks);
if (header.Length > 0) text = header + "\n" + text;
_logger.LogInformation("Fetched {Chars} chars from {Url} (structured data: {HasData}, image: {HasImage})",
text.Length, url, structuredData != null ? "yes" : "no", imageUrl != null ? "yes" : "no");
@@ -774,6 +795,64 @@ Rules:
return null;
}
/// <summary>
/// Scans raw HTML for anchor tags linking to SDS or TDS documents and returns them as
/// "[Structured Data]" lines that Claude can read and echo back in its JSON response.
/// Resolves relative hrefs to absolute URLs using the page's base URL. Stops after
/// finding one SDS and one TDS to avoid returning irrelevant links.
/// </summary>
private static string? ExtractDocumentLinks(string html, string pageUrl)
{
Uri? baseUri = null;
try { baseUri = new Uri(pageUrl); } catch { }
var sb = new StringBuilder();
string? sdsUrl = null, tdsUrl = null;
var matches = System.Text.RegularExpressions.Regex.Matches(
html,
@"<a\s+[^>]*href=[""']([^""'#][^""']*)[""'][^>]*>([\s\S]*?)</a>",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
foreach (System.Text.RegularExpressions.Match m in matches)
{
if (sdsUrl != null && tdsUrl != null) break;
var href = m.Groups[1].Value.Trim();
var linkText = System.Text.RegularExpressions.Regex
.Replace(m.Groups[2].Value, @"<[^>]+>", "").Trim();
// Resolve relative hrefs to absolute
string absHref = href;
if (baseUri != null && !href.StartsWith("http", StringComparison.OrdinalIgnoreCase))
{
try { absHref = new Uri(baseUri, href).ToString(); } catch { continue; }
}
if (!absHref.StartsWith("http", StringComparison.OrdinalIgnoreCase)) continue;
var hrefL = href.ToLowerInvariant();
var textL = linkText.ToLowerInvariant();
if (sdsUrl == null &&
(textL.Contains("sds") || textL.Contains("safety data") || textL.Contains("msds") ||
hrefL.Contains("sds") || hrefL.Contains("safety") || hrefL.Contains("msds")))
{
sdsUrl = absHref;
sb.AppendLine($"[Structured Data] SDS URL: {absHref}");
}
else if (tdsUrl == null &&
(textL.Contains("tds") || textL.Contains("technical data") || textL.Contains("spec sheet") ||
textL.Contains("data sheet") || hrefL.Contains("/tds") || hrefL.Contains("technical-data") ||
hrefL.Contains("techdata") || hrefL.Contains("datasheet")))
{
tdsUrl = absHref;
sb.AppendLine($"[Structured Data] TDS URL: {absHref}");
}
}
return sb.Length > 0 ? sb.ToString() : null;
}
/// <summary>
/// Extracts product name, SKU, and price from JSON-LD structured data blocks.
/// Many e-commerce sites (Shopify, WooCommerce, etc.) embed this in the page HTML
@@ -724,15 +724,49 @@ public class InventoryController : Controller
if (!string.IsNullOrWhiteSpace(qrUrl))
{
// QR path: fetch the product page and let Claude extract specs from its content
// QR path: fetch the product page; LookupByUrlAsync now maps all identity + spec fields
aiResult = await _aiLookupService.LookupByUrlAsync(qrUrl, null);
if (aiResult.Success && aiResult.SpecPageUrl == null)
aiResult.SpecPageUrl = qrUrl;
}
else if (!string.IsNullOrWhiteSpace(imageBase64))
{
// Vision path: Claude reads the label photo directly
// Vision path: Claude reads what's printed on the label (limited to visible text)
aiResult = await _aiLookupService.ScanLabelAsync(imageBase64, mediaType ?? "image/jpeg");
// Follow-up web lookup so we get SDS/TDS URLs, product page, image, description,
// and any specs not printed on the label. Label values are kept as-is (authoritative);
// the full lookup only fills fields that are still null.
if (aiResult.Success)
{
var mfr = aiResult.Manufacturer ?? aiResult.VendorName;
if (!string.IsNullOrWhiteSpace(mfr) &&
(!string.IsNullOrWhiteSpace(aiResult.ColorName) || !string.IsNullOrWhiteSpace(aiResult.ManufacturerPartNumber)))
{
var full = await _aiLookupService.LookupAsync(
mfr, aiResult.ColorName, aiResult.ColorCode, aiResult.ManufacturerPartNumber);
if (full.Success)
{
aiResult.Description ??= full.Description;
aiResult.SdsUrl ??= full.SdsUrl;
aiResult.TdsUrl ??= full.TdsUrl;
aiResult.ImageUrl ??= full.ImageUrl;
aiResult.SpecPageUrl ??= full.SpecPageUrl;
aiResult.UnitCostPerLb ??= full.UnitCostPerLb;
aiResult.VendorName ??= full.VendorName;
aiResult.ColorFamilies ??= full.ColorFamilies;
aiResult.Finish ??= full.Finish;
aiResult.CureTemperatureF ??= full.CureTemperatureF;
aiResult.CureTimeMinutes ??= full.CureTimeMinutes;
aiResult.RequiresClearCoat ??= full.RequiresClearCoat;
aiResult.CoverageSqFtPerLb ??= full.CoverageSqFtPerLb;
aiResult.TransferEfficiency ??= full.TransferEfficiency;
aiResult.ManufacturerPartNumber ??= full.ManufacturerPartNumber;
aiResult.ColorName ??= full.ColorName;
aiResult.ColorCode ??= full.ColorCode;
}
}
}
}
else
{
@@ -783,6 +817,8 @@ public class InventoryController : Controller
TransferEfficiency= aiResult.TransferEfficiency,
ImageUrl = aiResult.ImageUrl,
ProductUrl = aiResult.SpecPageUrl,
SdsUrl = aiResult.SdsUrl,
TdsUrl = aiResult.TdsUrl,
IsUserContributed = true,
CreatedAt = DateTime.UtcNow,
};
@@ -815,8 +851,8 @@ public class InventoryController : Controller
unitPrice = catalogMatch?.UnitPrice ?? 0m,
imageUrl = catalogMatch?.ImageUrl ?? aiResult.ImageUrl,
productUrl = catalogMatch?.ProductUrl ?? aiResult.SpecPageUrl,
sdsUrl = catalogMatch?.SdsUrl,
tdsUrl = catalogMatch?.TdsUrl,
sdsUrl = catalogMatch?.SdsUrl ?? aiResult.SdsUrl,
tdsUrl = catalogMatch?.TdsUrl ?? aiResult.TdsUrl,
vendorName = manufacturer,
wasInCatalog = wasInCatalog,
addedToCatalog = addedToCatalog,
@@ -104,7 +104,7 @@
ctx.drawImage(videoEl, 0, 0);
const imageData = ctx.getImageData(0, 0, canvasEl.width, canvasEl.height);
const code = window.jsQR(imageData.data, imageData.width, imageData.height, {
inversionAttempts: 'dontInvert'
inversionAttempts: 'attemptBoth'
});
if (code && code.data) {