Fix label scanner: full field mapping, vision follow-up lookup, SDS/TDS extraction

- LookupByUrlAsync now maps all identity + spec fields from Claude response
  (manufacturer, SKU, colorName, description, sdsUrl, tdsUrl, unitCostPerLb, etc.)
  Previously only augmenting fields were mapped; Columbia QR path left 80% blank
- Vision scan follow-up: after ScanLabelAsync reads label text, automatically run
  LookupAsync using the extracted manufacturer + color/SKU to fill SDS/TDS URLs,
  product page, image, description, and any specs not printed on the bag;
  label values (cure schedule, SKU) remain authoritative and are never overwritten
- SDS/TDS URL extraction: added ExtractDocumentLinks() that scans anchor tags in
  raw HTML before tag-stripping, injects found URLs as [Structured Data] lines so
  Claude can read and echo them back in the JSON response; previously all hrefs
  were lost with the HTML stripping
- Added SdsUrl/TdsUrl to InventoryAiLookupResult, Claude system prompt JSON schema,
  LookupAsync mapping, and ScanLabel response (catalog match ?? aiResult fallback)
- SDS/TDS now also stored on auto-contributed catalog entries
- jsQR inversionAttempts: 'dontInvert' → 'attemptBoth' for better QR detection
  under varying label contrast and lighting conditions

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-03 18:22:53 -04:00
parent 1fc79b77fe
commit f881b7dd53
4 changed files with 135 additions and 18 deletions
@@ -51,6 +51,8 @@ Respond ONLY with a valid JSON object — no markdown, no explanation:
""transferEfficiency"": number or null,
""unitCostPerLb"": number or null,
""vendorName"": ""string or null — the retailer or distributor name if a price was found (not the manufacturer)"",
""sdsUrl"": ""full URL to the Safety Data Sheet (SDS/MSDS) if found in the page content or links — null if not found"",
""tdsUrl"": ""full URL to the Technical Data Sheet (TDS/Spec Sheet) if found in the page content or links — null if not found"",
""reasoning"": ""one sentence: what specific product data was found and how confident you are""
}
@@ -87,6 +89,8 @@ Rules:
* Cerakote: labeled ""Item:"" followed by a short code like F-122 (letter-dash-digits).
* Other brands: look for ""SKU"", ""Item #"", ""Part #"", ""Product Code"", ""Product ID"", ""Code"", or similar labels
- colorCode: RAL code (e.g. RAL 9005), NCS code, or manufacturer's own color code. Return if known — do not infer from the color name alone.
- sdsUrl: look for links or text labeled ""SDS"", ""Safety Data Sheet"", ""MSDS"". If a [Structured Data] SDS URL line is present, use it. Return the full URL or null.
- tdsUrl: look for links or text labeled ""TDS"", ""Technical Data Sheet"", ""Spec Sheet"", ""Data Sheet"". If a [Structured Data] TDS URL line is present, use it. Return the full URL or null.
- If a field cannot be confidently determined, use null.";
public InventoryAiLookupService(
@@ -250,6 +254,8 @@ Rules:
result.TransferEfficiency = GetDecimal(parsed, "transferEfficiency");
result.UnitCostPerLb = GetDecimal(parsed, "unitCostPerLb");
result.VendorName = GetString(parsed, "vendorName");
result.SdsUrl = GetString(parsed, "sdsUrl");
result.TdsUrl = GetString(parsed, "tdsUrl");
result.SpecPageUrl = specPageUrl;
result.ImageUrl = pageImageUrl;
result.Reasoning = GetString(parsed, "reasoning");
@@ -443,16 +449,26 @@ Rules:
var parsed = JsonSerializer.Deserialize<JsonElement>(rawText);
return new InventoryAiLookupResult
{
Success = true,
Finish = GetString(parsed, "finish"),
CureTemperatureF = GetDecimal(parsed, "cureTemperatureF"),
CureTimeMinutes = GetInt(parsed, "cureTimeMinutes"),
ColorFamilies = GetString(parsed, "colorFamilies"),
RequiresClearCoat = GetBool(parsed, "requiresClearCoat"),
CoverageSqFtPerLb = GetDecimal(parsed, "coverageSqFtPerLb"),
TransferEfficiency= GetDecimal(parsed, "transferEfficiency"),
ImageUrl = pageImageUrl,
Reasoning = GetString(parsed, "reasoning"),
Success = true,
Manufacturer = GetString(parsed, "manufacturer"),
ManufacturerPartNumber = GetString(parsed, "manufacturerPartNumber"),
ColorName = GetString(parsed, "colorName"),
ColorCode = GetString(parsed, "colorCode"),
Description = GetString(parsed, "description"),
Finish = GetString(parsed, "finish"),
CureTemperatureF = GetDecimal(parsed, "cureTemperatureF"),
CureTimeMinutes = GetInt(parsed, "cureTimeMinutes"),
ColorFamilies = GetString(parsed, "colorFamilies"),
RequiresClearCoat = GetBool(parsed, "requiresClearCoat"),
CoverageSqFtPerLb = GetDecimal(parsed, "coverageSqFtPerLb"),
TransferEfficiency = GetDecimal(parsed, "transferEfficiency"),
UnitCostPerLb = GetDecimal(parsed, "unitCostPerLb"),
VendorName = GetString(parsed, "vendorName"),
SdsUrl = GetString(parsed, "sdsUrl"),
TdsUrl = GetString(parsed, "tdsUrl"),
SpecPageUrl = url,
ImageUrl = pageImageUrl,
Reasoning = GetString(parsed, "reasoning"),
};
}
catch (Exception ex)
@@ -709,6 +725,9 @@ Rules:
// Extract product image from Open Graph / Twitter Card meta tags
var imageUrl = ExtractOgImageUrl(html);
// Extract SDS/TDS document links BEFORE stripping HTML so hrefs aren't lost.
var docLinks = ExtractDocumentLinks(html, url);
// Extract structured data (JSON-LD) BEFORE stripping scripts — it contains
// machine-readable price, SKU, and product info that would otherwise be lost.
var structuredData = ExtractJsonLdData(html);
@@ -730,9 +749,11 @@ Rules:
if (text.Length > maxChars)
text = text[..maxChars] + "…";
// Prepend structured data — Claude should treat this as high-confidence
if (!string.IsNullOrWhiteSpace(structuredData))
text = structuredData + "\n" + text;
// Prepend structured data + document links — Claude treats these as high-confidence
var header = new StringBuilder();
if (!string.IsNullOrWhiteSpace(structuredData)) header.Append(structuredData);
if (!string.IsNullOrWhiteSpace(docLinks)) header.Append(docLinks);
if (header.Length > 0) text = header + "\n" + text;
_logger.LogInformation("Fetched {Chars} chars from {Url} (structured data: {HasData}, image: {HasImage})",
text.Length, url, structuredData != null ? "yes" : "no", imageUrl != null ? "yes" : "no");
@@ -774,6 +795,64 @@ Rules:
return null;
}
/// <summary>
/// Scans raw HTML for anchor tags linking to SDS or TDS documents and returns them as
/// "[Structured Data]" lines that Claude can read and echo back in its JSON response.
/// Resolves relative hrefs to absolute URLs using the page's base URL. Stops after
/// finding one SDS and one TDS to avoid returning irrelevant links.
/// </summary>
private static string? ExtractDocumentLinks(string html, string pageUrl)
{
Uri? baseUri = null;
try { baseUri = new Uri(pageUrl); } catch { }
var sb = new StringBuilder();
string? sdsUrl = null, tdsUrl = null;
var matches = System.Text.RegularExpressions.Regex.Matches(
html,
@"<a\s+[^>]*href=[""']([^""'#][^""']*)[""'][^>]*>([\s\S]*?)</a>",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
foreach (System.Text.RegularExpressions.Match m in matches)
{
if (sdsUrl != null && tdsUrl != null) break;
var href = m.Groups[1].Value.Trim();
var linkText = System.Text.RegularExpressions.Regex
.Replace(m.Groups[2].Value, @"<[^>]+>", "").Trim();
// Resolve relative hrefs to absolute
string absHref = href;
if (baseUri != null && !href.StartsWith("http", StringComparison.OrdinalIgnoreCase))
{
try { absHref = new Uri(baseUri, href).ToString(); } catch { continue; }
}
if (!absHref.StartsWith("http", StringComparison.OrdinalIgnoreCase)) continue;
var hrefL = href.ToLowerInvariant();
var textL = linkText.ToLowerInvariant();
if (sdsUrl == null &&
(textL.Contains("sds") || textL.Contains("safety data") || textL.Contains("msds") ||
hrefL.Contains("sds") || hrefL.Contains("safety") || hrefL.Contains("msds")))
{
sdsUrl = absHref;
sb.AppendLine($"[Structured Data] SDS URL: {absHref}");
}
else if (tdsUrl == null &&
(textL.Contains("tds") || textL.Contains("technical data") || textL.Contains("spec sheet") ||
textL.Contains("data sheet") || hrefL.Contains("/tds") || hrefL.Contains("technical-data") ||
hrefL.Contains("techdata") || hrefL.Contains("datasheet")))
{
tdsUrl = absHref;
sb.AppendLine($"[Structured Data] TDS URL: {absHref}");
}
}
return sb.Length > 0 ? sb.ToString() : null;
}
/// <summary>
/// Extracts product name, SKU, and price from JSON-LD structured data blocks.
/// Many e-commerce sites (Shopify, WooCommerce, etc.) embed this in the page HTML