From f881b7dd5353c6404fe72dbdfa88df0e27900a3b Mon Sep 17 00:00:00 2001 From: Scott Pouliot Date: Sun, 3 May 2026 18:22:53 -0400 Subject: [PATCH] Fix label scanner: full field mapping, vision follow-up lookup, SDS/TDS extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - LookupByUrlAsync now maps all identity + spec fields from Claude response (manufacturer, SKU, colorName, description, sdsUrl, tdsUrl, unitCostPerLb, etc.) Previously only augmenting fields were mapped; Columbia QR path left 80% blank - Vision scan follow-up: after ScanLabelAsync reads label text, automatically run LookupAsync using the extracted manufacturer + color/SKU to fill SDS/TDS URLs, product page, image, description, and any specs not printed on the bag; label values (cure schedule, SKU) remain authoritative and are never overwritten - SDS/TDS URL extraction: added ExtractDocumentLinks() that scans anchor tags in raw HTML before tag-stripping, injects found URLs as [Structured Data] lines so Claude can read and echo them back in the JSON response; previously all hrefs were lost with the HTML stripping - Added SdsUrl/TdsUrl to InventoryAiLookupResult, Claude system prompt JSON schema, LookupAsync mapping, and ScanLabel response (catalog match ?? aiResult fallback) - SDS/TDS now also stored on auto-contributed catalog entries - jsQR inversionAttempts: 'dontInvert' → 'attemptBoth' for better QR detection under varying label contrast and lighting conditions Co-Authored-By: Claude Sonnet 4.6 --- .../Interfaces/IInventoryAiLookupService.cs | 2 + .../Services/InventoryAiLookupService.cs | 105 +++++++++++++++--- .../Controllers/InventoryController.cs | 44 +++++++- .../wwwroot/js/inventory-label-scan.js | 2 +- 4 files changed, 135 insertions(+), 18 deletions(-) diff --git a/src/PowderCoating.Application/Interfaces/IInventoryAiLookupService.cs b/src/PowderCoating.Application/Interfaces/IInventoryAiLookupService.cs index 1fd9208..1ad72b1 100644 --- a/src/PowderCoating.Application/Interfaces/IInventoryAiLookupService.cs +++ b/src/PowderCoating.Application/Interfaces/IInventoryAiLookupService.cs @@ -26,6 +26,8 @@ public class InventoryAiLookupResult public string? VendorName { get; set; } // manufacturer/vendor name for dropdown matching public string? SpecPageUrl { get; set; } // URL of the product page that was fetched public string? ImageUrl { get; set; } // og:image or first product image found on the page + public string? SdsUrl { get; set; } // Safety Data Sheet URL if found on product page + public string? TdsUrl { get; set; } // Technical Data Sheet URL if found on product page public string? Reasoning { get; set; } // brief explanation of what was found } diff --git a/src/PowderCoating.Infrastructure/Services/InventoryAiLookupService.cs b/src/PowderCoating.Infrastructure/Services/InventoryAiLookupService.cs index c028a91..26875e4 100644 --- a/src/PowderCoating.Infrastructure/Services/InventoryAiLookupService.cs +++ b/src/PowderCoating.Infrastructure/Services/InventoryAiLookupService.cs @@ -51,6 +51,8 @@ Respond ONLY with a valid JSON object — no markdown, no explanation: ""transferEfficiency"": number or null, ""unitCostPerLb"": number or null, ""vendorName"": ""string or null — the retailer or distributor name if a price was found (not the manufacturer)"", + ""sdsUrl"": ""full URL to the Safety Data Sheet (SDS/MSDS) if found in the page content or links — null if not found"", + ""tdsUrl"": ""full URL to the Technical Data Sheet (TDS/Spec Sheet) if found in the page content or links — null if not found"", ""reasoning"": ""one sentence: what specific product data was found and how confident you are"" } @@ -87,6 +89,8 @@ Rules: * Cerakote: labeled ""Item:"" followed by a short code like F-122 (letter-dash-digits). * Other brands: look for ""SKU"", ""Item #"", ""Part #"", ""Product Code"", ""Product ID"", ""Code"", or similar labels - colorCode: RAL code (e.g. RAL 9005), NCS code, or manufacturer's own color code. Return if known — do not infer from the color name alone. +- sdsUrl: look for links or text labeled ""SDS"", ""Safety Data Sheet"", ""MSDS"". If a [Structured Data] SDS URL line is present, use it. Return the full URL or null. +- tdsUrl: look for links or text labeled ""TDS"", ""Technical Data Sheet"", ""Spec Sheet"", ""Data Sheet"". If a [Structured Data] TDS URL line is present, use it. Return the full URL or null. - If a field cannot be confidently determined, use null."; public InventoryAiLookupService( @@ -250,6 +254,8 @@ Rules: result.TransferEfficiency = GetDecimal(parsed, "transferEfficiency"); result.UnitCostPerLb = GetDecimal(parsed, "unitCostPerLb"); result.VendorName = GetString(parsed, "vendorName"); + result.SdsUrl = GetString(parsed, "sdsUrl"); + result.TdsUrl = GetString(parsed, "tdsUrl"); result.SpecPageUrl = specPageUrl; result.ImageUrl = pageImageUrl; result.Reasoning = GetString(parsed, "reasoning"); @@ -443,16 +449,26 @@ Rules: var parsed = JsonSerializer.Deserialize(rawText); return new InventoryAiLookupResult { - Success = true, - Finish = GetString(parsed, "finish"), - CureTemperatureF = GetDecimal(parsed, "cureTemperatureF"), - CureTimeMinutes = GetInt(parsed, "cureTimeMinutes"), - ColorFamilies = GetString(parsed, "colorFamilies"), - RequiresClearCoat = GetBool(parsed, "requiresClearCoat"), - CoverageSqFtPerLb = GetDecimal(parsed, "coverageSqFtPerLb"), - TransferEfficiency= GetDecimal(parsed, "transferEfficiency"), - ImageUrl = pageImageUrl, - Reasoning = GetString(parsed, "reasoning"), + Success = true, + Manufacturer = GetString(parsed, "manufacturer"), + ManufacturerPartNumber = GetString(parsed, "manufacturerPartNumber"), + ColorName = GetString(parsed, "colorName"), + ColorCode = GetString(parsed, "colorCode"), + Description = GetString(parsed, "description"), + Finish = GetString(parsed, "finish"), + CureTemperatureF = GetDecimal(parsed, "cureTemperatureF"), + CureTimeMinutes = GetInt(parsed, "cureTimeMinutes"), + ColorFamilies = GetString(parsed, "colorFamilies"), + RequiresClearCoat = GetBool(parsed, "requiresClearCoat"), + CoverageSqFtPerLb = GetDecimal(parsed, "coverageSqFtPerLb"), + TransferEfficiency = GetDecimal(parsed, "transferEfficiency"), + UnitCostPerLb = GetDecimal(parsed, "unitCostPerLb"), + VendorName = GetString(parsed, "vendorName"), + SdsUrl = GetString(parsed, "sdsUrl"), + TdsUrl = GetString(parsed, "tdsUrl"), + SpecPageUrl = url, + ImageUrl = pageImageUrl, + Reasoning = GetString(parsed, "reasoning"), }; } catch (Exception ex) @@ -709,6 +725,9 @@ Rules: // Extract product image from Open Graph / Twitter Card meta tags var imageUrl = ExtractOgImageUrl(html); + // Extract SDS/TDS document links BEFORE stripping HTML so hrefs aren't lost. + var docLinks = ExtractDocumentLinks(html, url); + // Extract structured data (JSON-LD) BEFORE stripping scripts — it contains // machine-readable price, SKU, and product info that would otherwise be lost. var structuredData = ExtractJsonLdData(html); @@ -730,9 +749,11 @@ Rules: if (text.Length > maxChars) text = text[..maxChars] + "…"; - // Prepend structured data — Claude should treat this as high-confidence - if (!string.IsNullOrWhiteSpace(structuredData)) - text = structuredData + "\n" + text; + // Prepend structured data + document links — Claude treats these as high-confidence + var header = new StringBuilder(); + if (!string.IsNullOrWhiteSpace(structuredData)) header.Append(structuredData); + if (!string.IsNullOrWhiteSpace(docLinks)) header.Append(docLinks); + if (header.Length > 0) text = header + "\n" + text; _logger.LogInformation("Fetched {Chars} chars from {Url} (structured data: {HasData}, image: {HasImage})", text.Length, url, structuredData != null ? "yes" : "no", imageUrl != null ? "yes" : "no"); @@ -774,6 +795,64 @@ Rules: return null; } + /// + /// Scans raw HTML for anchor tags linking to SDS or TDS documents and returns them as + /// "[Structured Data]" lines that Claude can read and echo back in its JSON response. + /// Resolves relative hrefs to absolute URLs using the page's base URL. Stops after + /// finding one SDS and one TDS to avoid returning irrelevant links. + /// + private static string? ExtractDocumentLinks(string html, string pageUrl) + { + Uri? baseUri = null; + try { baseUri = new Uri(pageUrl); } catch { } + + var sb = new StringBuilder(); + string? sdsUrl = null, tdsUrl = null; + + var matches = System.Text.RegularExpressions.Regex.Matches( + html, + @"]*href=[""']([^""'#][^""']*)[""'][^>]*>([\s\S]*?)", + System.Text.RegularExpressions.RegexOptions.IgnoreCase); + + foreach (System.Text.RegularExpressions.Match m in matches) + { + if (sdsUrl != null && tdsUrl != null) break; + + var href = m.Groups[1].Value.Trim(); + var linkText = System.Text.RegularExpressions.Regex + .Replace(m.Groups[2].Value, @"<[^>]+>", "").Trim(); + + // Resolve relative hrefs to absolute + string absHref = href; + if (baseUri != null && !href.StartsWith("http", StringComparison.OrdinalIgnoreCase)) + { + try { absHref = new Uri(baseUri, href).ToString(); } catch { continue; } + } + if (!absHref.StartsWith("http", StringComparison.OrdinalIgnoreCase)) continue; + + var hrefL = href.ToLowerInvariant(); + var textL = linkText.ToLowerInvariant(); + + if (sdsUrl == null && + (textL.Contains("sds") || textL.Contains("safety data") || textL.Contains("msds") || + hrefL.Contains("sds") || hrefL.Contains("safety") || hrefL.Contains("msds"))) + { + sdsUrl = absHref; + sb.AppendLine($"[Structured Data] SDS URL: {absHref}"); + } + else if (tdsUrl == null && + (textL.Contains("tds") || textL.Contains("technical data") || textL.Contains("spec sheet") || + textL.Contains("data sheet") || hrefL.Contains("/tds") || hrefL.Contains("technical-data") || + hrefL.Contains("techdata") || hrefL.Contains("datasheet"))) + { + tdsUrl = absHref; + sb.AppendLine($"[Structured Data] TDS URL: {absHref}"); + } + } + + return sb.Length > 0 ? sb.ToString() : null; + } + /// /// Extracts product name, SKU, and price from JSON-LD structured data blocks. /// Many e-commerce sites (Shopify, WooCommerce, etc.) embed this in the page HTML diff --git a/src/PowderCoating.Web/Controllers/InventoryController.cs b/src/PowderCoating.Web/Controllers/InventoryController.cs index 6eeba3d..7af264c 100644 --- a/src/PowderCoating.Web/Controllers/InventoryController.cs +++ b/src/PowderCoating.Web/Controllers/InventoryController.cs @@ -724,15 +724,49 @@ public class InventoryController : Controller if (!string.IsNullOrWhiteSpace(qrUrl)) { - // QR path: fetch the product page and let Claude extract specs from its content + // QR path: fetch the product page; LookupByUrlAsync now maps all identity + spec fields aiResult = await _aiLookupService.LookupByUrlAsync(qrUrl, null); if (aiResult.Success && aiResult.SpecPageUrl == null) aiResult.SpecPageUrl = qrUrl; } else if (!string.IsNullOrWhiteSpace(imageBase64)) { - // Vision path: Claude reads the label photo directly + // Vision path: Claude reads what's printed on the label (limited to visible text) aiResult = await _aiLookupService.ScanLabelAsync(imageBase64, mediaType ?? "image/jpeg"); + + // Follow-up web lookup so we get SDS/TDS URLs, product page, image, description, + // and any specs not printed on the label. Label values are kept as-is (authoritative); + // the full lookup only fills fields that are still null. + if (aiResult.Success) + { + var mfr = aiResult.Manufacturer ?? aiResult.VendorName; + if (!string.IsNullOrWhiteSpace(mfr) && + (!string.IsNullOrWhiteSpace(aiResult.ColorName) || !string.IsNullOrWhiteSpace(aiResult.ManufacturerPartNumber))) + { + var full = await _aiLookupService.LookupAsync( + mfr, aiResult.ColorName, aiResult.ColorCode, aiResult.ManufacturerPartNumber); + if (full.Success) + { + aiResult.Description ??= full.Description; + aiResult.SdsUrl ??= full.SdsUrl; + aiResult.TdsUrl ??= full.TdsUrl; + aiResult.ImageUrl ??= full.ImageUrl; + aiResult.SpecPageUrl ??= full.SpecPageUrl; + aiResult.UnitCostPerLb ??= full.UnitCostPerLb; + aiResult.VendorName ??= full.VendorName; + aiResult.ColorFamilies ??= full.ColorFamilies; + aiResult.Finish ??= full.Finish; + aiResult.CureTemperatureF ??= full.CureTemperatureF; + aiResult.CureTimeMinutes ??= full.CureTimeMinutes; + aiResult.RequiresClearCoat ??= full.RequiresClearCoat; + aiResult.CoverageSqFtPerLb ??= full.CoverageSqFtPerLb; + aiResult.TransferEfficiency ??= full.TransferEfficiency; + aiResult.ManufacturerPartNumber ??= full.ManufacturerPartNumber; + aiResult.ColorName ??= full.ColorName; + aiResult.ColorCode ??= full.ColorCode; + } + } + } } else { @@ -783,6 +817,8 @@ public class InventoryController : Controller TransferEfficiency= aiResult.TransferEfficiency, ImageUrl = aiResult.ImageUrl, ProductUrl = aiResult.SpecPageUrl, + SdsUrl = aiResult.SdsUrl, + TdsUrl = aiResult.TdsUrl, IsUserContributed = true, CreatedAt = DateTime.UtcNow, }; @@ -815,8 +851,8 @@ public class InventoryController : Controller unitPrice = catalogMatch?.UnitPrice ?? 0m, imageUrl = catalogMatch?.ImageUrl ?? aiResult.ImageUrl, productUrl = catalogMatch?.ProductUrl ?? aiResult.SpecPageUrl, - sdsUrl = catalogMatch?.SdsUrl, - tdsUrl = catalogMatch?.TdsUrl, + sdsUrl = catalogMatch?.SdsUrl ?? aiResult.SdsUrl, + tdsUrl = catalogMatch?.TdsUrl ?? aiResult.TdsUrl, vendorName = manufacturer, wasInCatalog = wasInCatalog, addedToCatalog = addedToCatalog, diff --git a/src/PowderCoating.Web/wwwroot/js/inventory-label-scan.js b/src/PowderCoating.Web/wwwroot/js/inventory-label-scan.js index 8fd8296..e2df43c 100644 --- a/src/PowderCoating.Web/wwwroot/js/inventory-label-scan.js +++ b/src/PowderCoating.Web/wwwroot/js/inventory-label-scan.js @@ -104,7 +104,7 @@ ctx.drawImage(videoEl, 0, 0); const imageData = ctx.getImageData(0, 0, canvasEl.width, canvasEl.height); const code = window.jsQR(imageData.data, imageData.width, imageData.height, { - inversionAttempts: 'dontInvert' + inversionAttempts: 'attemptBoth' }); if (code && code.data) {