Fix label scanner: full field mapping, vision follow-up lookup, SDS/TDS extraction
- LookupByUrlAsync now maps all identity + spec fields from Claude response (manufacturer, SKU, colorName, description, sdsUrl, tdsUrl, unitCostPerLb, etc.) Previously only augmenting fields were mapped; Columbia QR path left 80% blank - Vision scan follow-up: after ScanLabelAsync reads label text, automatically run LookupAsync using the extracted manufacturer + color/SKU to fill SDS/TDS URLs, product page, image, description, and any specs not printed on the bag; label values (cure schedule, SKU) remain authoritative and are never overwritten - SDS/TDS URL extraction: added ExtractDocumentLinks() that scans anchor tags in raw HTML before tag-stripping, injects found URLs as [Structured Data] lines so Claude can read and echo them back in the JSON response; previously all hrefs were lost with the HTML stripping - Added SdsUrl/TdsUrl to InventoryAiLookupResult, Claude system prompt JSON schema, LookupAsync mapping, and ScanLabel response (catalog match ?? aiResult fallback) - SDS/TDS now also stored on auto-contributed catalog entries - jsQR inversionAttempts: 'dontInvert' → 'attemptBoth' for better QR detection under varying label contrast and lighting conditions Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -26,6 +26,8 @@ public class InventoryAiLookupResult
|
||||
public string? VendorName { get; set; } // manufacturer/vendor name for dropdown matching
|
||||
public string? SpecPageUrl { get; set; } // URL of the product page that was fetched
|
||||
public string? ImageUrl { get; set; } // og:image or first product image found on the page
|
||||
public string? SdsUrl { get; set; } // Safety Data Sheet URL if found on product page
|
||||
public string? TdsUrl { get; set; } // Technical Data Sheet URL if found on product page
|
||||
|
||||
public string? Reasoning { get; set; } // brief explanation of what was found
|
||||
}
|
||||
|
||||
@@ -51,6 +51,8 @@ Respond ONLY with a valid JSON object — no markdown, no explanation:
|
||||
""transferEfficiency"": number or null,
|
||||
""unitCostPerLb"": number or null,
|
||||
""vendorName"": ""string or null — the retailer or distributor name if a price was found (not the manufacturer)"",
|
||||
""sdsUrl"": ""full URL to the Safety Data Sheet (SDS/MSDS) if found in the page content or links — null if not found"",
|
||||
""tdsUrl"": ""full URL to the Technical Data Sheet (TDS/Spec Sheet) if found in the page content or links — null if not found"",
|
||||
""reasoning"": ""one sentence: what specific product data was found and how confident you are""
|
||||
}
|
||||
|
||||
@@ -87,6 +89,8 @@ Rules:
|
||||
* Cerakote: labeled ""Item:"" followed by a short code like F-122 (letter-dash-digits).
|
||||
* Other brands: look for ""SKU"", ""Item #"", ""Part #"", ""Product Code"", ""Product ID"", ""Code"", or similar labels
|
||||
- colorCode: RAL code (e.g. RAL 9005), NCS code, or manufacturer's own color code. Return if known — do not infer from the color name alone.
|
||||
- sdsUrl: look for links or text labeled ""SDS"", ""Safety Data Sheet"", ""MSDS"". If a [Structured Data] SDS URL line is present, use it. Return the full URL or null.
|
||||
- tdsUrl: look for links or text labeled ""TDS"", ""Technical Data Sheet"", ""Spec Sheet"", ""Data Sheet"". If a [Structured Data] TDS URL line is present, use it. Return the full URL or null.
|
||||
- If a field cannot be confidently determined, use null.";
|
||||
|
||||
public InventoryAiLookupService(
|
||||
@@ -250,6 +254,8 @@ Rules:
|
||||
result.TransferEfficiency = GetDecimal(parsed, "transferEfficiency");
|
||||
result.UnitCostPerLb = GetDecimal(parsed, "unitCostPerLb");
|
||||
result.VendorName = GetString(parsed, "vendorName");
|
||||
result.SdsUrl = GetString(parsed, "sdsUrl");
|
||||
result.TdsUrl = GetString(parsed, "tdsUrl");
|
||||
result.SpecPageUrl = specPageUrl;
|
||||
result.ImageUrl = pageImageUrl;
|
||||
result.Reasoning = GetString(parsed, "reasoning");
|
||||
@@ -443,16 +449,26 @@ Rules:
|
||||
var parsed = JsonSerializer.Deserialize<JsonElement>(rawText);
|
||||
return new InventoryAiLookupResult
|
||||
{
|
||||
Success = true,
|
||||
Finish = GetString(parsed, "finish"),
|
||||
CureTemperatureF = GetDecimal(parsed, "cureTemperatureF"),
|
||||
CureTimeMinutes = GetInt(parsed, "cureTimeMinutes"),
|
||||
ColorFamilies = GetString(parsed, "colorFamilies"),
|
||||
RequiresClearCoat = GetBool(parsed, "requiresClearCoat"),
|
||||
CoverageSqFtPerLb = GetDecimal(parsed, "coverageSqFtPerLb"),
|
||||
TransferEfficiency= GetDecimal(parsed, "transferEfficiency"),
|
||||
ImageUrl = pageImageUrl,
|
||||
Reasoning = GetString(parsed, "reasoning"),
|
||||
Success = true,
|
||||
Manufacturer = GetString(parsed, "manufacturer"),
|
||||
ManufacturerPartNumber = GetString(parsed, "manufacturerPartNumber"),
|
||||
ColorName = GetString(parsed, "colorName"),
|
||||
ColorCode = GetString(parsed, "colorCode"),
|
||||
Description = GetString(parsed, "description"),
|
||||
Finish = GetString(parsed, "finish"),
|
||||
CureTemperatureF = GetDecimal(parsed, "cureTemperatureF"),
|
||||
CureTimeMinutes = GetInt(parsed, "cureTimeMinutes"),
|
||||
ColorFamilies = GetString(parsed, "colorFamilies"),
|
||||
RequiresClearCoat = GetBool(parsed, "requiresClearCoat"),
|
||||
CoverageSqFtPerLb = GetDecimal(parsed, "coverageSqFtPerLb"),
|
||||
TransferEfficiency = GetDecimal(parsed, "transferEfficiency"),
|
||||
UnitCostPerLb = GetDecimal(parsed, "unitCostPerLb"),
|
||||
VendorName = GetString(parsed, "vendorName"),
|
||||
SdsUrl = GetString(parsed, "sdsUrl"),
|
||||
TdsUrl = GetString(parsed, "tdsUrl"),
|
||||
SpecPageUrl = url,
|
||||
ImageUrl = pageImageUrl,
|
||||
Reasoning = GetString(parsed, "reasoning"),
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
@@ -709,6 +725,9 @@ Rules:
|
||||
// Extract product image from Open Graph / Twitter Card meta tags
|
||||
var imageUrl = ExtractOgImageUrl(html);
|
||||
|
||||
// Extract SDS/TDS document links BEFORE stripping HTML so hrefs aren't lost.
|
||||
var docLinks = ExtractDocumentLinks(html, url);
|
||||
|
||||
// Extract structured data (JSON-LD) BEFORE stripping scripts — it contains
|
||||
// machine-readable price, SKU, and product info that would otherwise be lost.
|
||||
var structuredData = ExtractJsonLdData(html);
|
||||
@@ -730,9 +749,11 @@ Rules:
|
||||
if (text.Length > maxChars)
|
||||
text = text[..maxChars] + "…";
|
||||
|
||||
// Prepend structured data — Claude should treat this as high-confidence
|
||||
if (!string.IsNullOrWhiteSpace(structuredData))
|
||||
text = structuredData + "\n" + text;
|
||||
// Prepend structured data + document links — Claude treats these as high-confidence
|
||||
var header = new StringBuilder();
|
||||
if (!string.IsNullOrWhiteSpace(structuredData)) header.Append(structuredData);
|
||||
if (!string.IsNullOrWhiteSpace(docLinks)) header.Append(docLinks);
|
||||
if (header.Length > 0) text = header + "\n" + text;
|
||||
|
||||
_logger.LogInformation("Fetched {Chars} chars from {Url} (structured data: {HasData}, image: {HasImage})",
|
||||
text.Length, url, structuredData != null ? "yes" : "no", imageUrl != null ? "yes" : "no");
|
||||
@@ -774,6 +795,64 @@ Rules:
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Scans raw HTML for anchor tags linking to SDS or TDS documents and returns them as
|
||||
/// "[Structured Data]" lines that Claude can read and echo back in its JSON response.
|
||||
/// Resolves relative hrefs to absolute URLs using the page's base URL. Stops after
|
||||
/// finding one SDS and one TDS to avoid returning irrelevant links.
|
||||
/// </summary>
|
||||
private static string? ExtractDocumentLinks(string html, string pageUrl)
|
||||
{
|
||||
Uri? baseUri = null;
|
||||
try { baseUri = new Uri(pageUrl); } catch { }
|
||||
|
||||
var sb = new StringBuilder();
|
||||
string? sdsUrl = null, tdsUrl = null;
|
||||
|
||||
var matches = System.Text.RegularExpressions.Regex.Matches(
|
||||
html,
|
||||
@"<a\s+[^>]*href=[""']([^""'#][^""']*)[""'][^>]*>([\s\S]*?)</a>",
|
||||
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
|
||||
foreach (System.Text.RegularExpressions.Match m in matches)
|
||||
{
|
||||
if (sdsUrl != null && tdsUrl != null) break;
|
||||
|
||||
var href = m.Groups[1].Value.Trim();
|
||||
var linkText = System.Text.RegularExpressions.Regex
|
||||
.Replace(m.Groups[2].Value, @"<[^>]+>", "").Trim();
|
||||
|
||||
// Resolve relative hrefs to absolute
|
||||
string absHref = href;
|
||||
if (baseUri != null && !href.StartsWith("http", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
try { absHref = new Uri(baseUri, href).ToString(); } catch { continue; }
|
||||
}
|
||||
if (!absHref.StartsWith("http", StringComparison.OrdinalIgnoreCase)) continue;
|
||||
|
||||
var hrefL = href.ToLowerInvariant();
|
||||
var textL = linkText.ToLowerInvariant();
|
||||
|
||||
if (sdsUrl == null &&
|
||||
(textL.Contains("sds") || textL.Contains("safety data") || textL.Contains("msds") ||
|
||||
hrefL.Contains("sds") || hrefL.Contains("safety") || hrefL.Contains("msds")))
|
||||
{
|
||||
sdsUrl = absHref;
|
||||
sb.AppendLine($"[Structured Data] SDS URL: {absHref}");
|
||||
}
|
||||
else if (tdsUrl == null &&
|
||||
(textL.Contains("tds") || textL.Contains("technical data") || textL.Contains("spec sheet") ||
|
||||
textL.Contains("data sheet") || hrefL.Contains("/tds") || hrefL.Contains("technical-data") ||
|
||||
hrefL.Contains("techdata") || hrefL.Contains("datasheet")))
|
||||
{
|
||||
tdsUrl = absHref;
|
||||
sb.AppendLine($"[Structured Data] TDS URL: {absHref}");
|
||||
}
|
||||
}
|
||||
|
||||
return sb.Length > 0 ? sb.ToString() : null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts product name, SKU, and price from JSON-LD structured data blocks.
|
||||
/// Many e-commerce sites (Shopify, WooCommerce, etc.) embed this in the page HTML
|
||||
|
||||
@@ -724,15 +724,49 @@ public class InventoryController : Controller
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(qrUrl))
|
||||
{
|
||||
// QR path: fetch the product page and let Claude extract specs from its content
|
||||
// QR path: fetch the product page; LookupByUrlAsync now maps all identity + spec fields
|
||||
aiResult = await _aiLookupService.LookupByUrlAsync(qrUrl, null);
|
||||
if (aiResult.Success && aiResult.SpecPageUrl == null)
|
||||
aiResult.SpecPageUrl = qrUrl;
|
||||
}
|
||||
else if (!string.IsNullOrWhiteSpace(imageBase64))
|
||||
{
|
||||
// Vision path: Claude reads the label photo directly
|
||||
// Vision path: Claude reads what's printed on the label (limited to visible text)
|
||||
aiResult = await _aiLookupService.ScanLabelAsync(imageBase64, mediaType ?? "image/jpeg");
|
||||
|
||||
// Follow-up web lookup so we get SDS/TDS URLs, product page, image, description,
|
||||
// and any specs not printed on the label. Label values are kept as-is (authoritative);
|
||||
// the full lookup only fills fields that are still null.
|
||||
if (aiResult.Success)
|
||||
{
|
||||
var mfr = aiResult.Manufacturer ?? aiResult.VendorName;
|
||||
if (!string.IsNullOrWhiteSpace(mfr) &&
|
||||
(!string.IsNullOrWhiteSpace(aiResult.ColorName) || !string.IsNullOrWhiteSpace(aiResult.ManufacturerPartNumber)))
|
||||
{
|
||||
var full = await _aiLookupService.LookupAsync(
|
||||
mfr, aiResult.ColorName, aiResult.ColorCode, aiResult.ManufacturerPartNumber);
|
||||
if (full.Success)
|
||||
{
|
||||
aiResult.Description ??= full.Description;
|
||||
aiResult.SdsUrl ??= full.SdsUrl;
|
||||
aiResult.TdsUrl ??= full.TdsUrl;
|
||||
aiResult.ImageUrl ??= full.ImageUrl;
|
||||
aiResult.SpecPageUrl ??= full.SpecPageUrl;
|
||||
aiResult.UnitCostPerLb ??= full.UnitCostPerLb;
|
||||
aiResult.VendorName ??= full.VendorName;
|
||||
aiResult.ColorFamilies ??= full.ColorFamilies;
|
||||
aiResult.Finish ??= full.Finish;
|
||||
aiResult.CureTemperatureF ??= full.CureTemperatureF;
|
||||
aiResult.CureTimeMinutes ??= full.CureTimeMinutes;
|
||||
aiResult.RequiresClearCoat ??= full.RequiresClearCoat;
|
||||
aiResult.CoverageSqFtPerLb ??= full.CoverageSqFtPerLb;
|
||||
aiResult.TransferEfficiency ??= full.TransferEfficiency;
|
||||
aiResult.ManufacturerPartNumber ??= full.ManufacturerPartNumber;
|
||||
aiResult.ColorName ??= full.ColorName;
|
||||
aiResult.ColorCode ??= full.ColorCode;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -783,6 +817,8 @@ public class InventoryController : Controller
|
||||
TransferEfficiency= aiResult.TransferEfficiency,
|
||||
ImageUrl = aiResult.ImageUrl,
|
||||
ProductUrl = aiResult.SpecPageUrl,
|
||||
SdsUrl = aiResult.SdsUrl,
|
||||
TdsUrl = aiResult.TdsUrl,
|
||||
IsUserContributed = true,
|
||||
CreatedAt = DateTime.UtcNow,
|
||||
};
|
||||
@@ -815,8 +851,8 @@ public class InventoryController : Controller
|
||||
unitPrice = catalogMatch?.UnitPrice ?? 0m,
|
||||
imageUrl = catalogMatch?.ImageUrl ?? aiResult.ImageUrl,
|
||||
productUrl = catalogMatch?.ProductUrl ?? aiResult.SpecPageUrl,
|
||||
sdsUrl = catalogMatch?.SdsUrl,
|
||||
tdsUrl = catalogMatch?.TdsUrl,
|
||||
sdsUrl = catalogMatch?.SdsUrl ?? aiResult.SdsUrl,
|
||||
tdsUrl = catalogMatch?.TdsUrl ?? aiResult.TdsUrl,
|
||||
vendorName = manufacturer,
|
||||
wasInCatalog = wasInCatalog,
|
||||
addedToCatalog = addedToCatalog,
|
||||
|
||||
@@ -104,7 +104,7 @@
|
||||
ctx.drawImage(videoEl, 0, 0);
|
||||
const imageData = ctx.getImageData(0, 0, canvasEl.width, canvasEl.height);
|
||||
const code = window.jsQR(imageData.data, imageData.width, imageData.height, {
|
||||
inversionAttempts: 'dontInvert'
|
||||
inversionAttempts: 'attemptBoth'
|
||||
});
|
||||
|
||||
if (code && code.data) {
|
||||
|
||||
Reference in New Issue
Block a user