Add product image to powder inventory via AI lookup
When AI Lookup fetches a manufacturer product page, it now extracts the og:image (Open Graph) meta tag before stripping HTML tags. The image URL is returned in InventoryAiLookupResult.ImageUrl and automatically shown as a preview on the Create/Edit form alongside the other filled fields. The preview includes a Remove button to clear the image, and the Wrong Match? button clears it along with the other AI-filled fields. On the inventory Details page a product image card is rendered above the Stock & Pricing card whenever ImageUrl is set. The field is nullable so existing records and powders without an image are unaffected. New field: InventoryItem.ImageUrl (nvarchar, nullable). Migration: AddInventoryItemImageUrl. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -171,14 +171,19 @@ Rules:
|
||||
_logger.LogInformation("Using direct manufacturer URL: {Url}", directUrl);
|
||||
|
||||
// Fetch product page
|
||||
var pageContent = fetchUrl != null ? await FetchPageTextAsync(fetchUrl) : null;
|
||||
string? pageContent = null;
|
||||
string? pageImageUrl = null;
|
||||
if (fetchUrl != null)
|
||||
{
|
||||
(pageContent, pageImageUrl) = await FetchPageAsync(fetchUrl);
|
||||
}
|
||||
|
||||
// If direct URL fetch failed, fall back to the search fetch URL
|
||||
if (pageContent == null && directUrl != null && searchFetchUrl != null && searchFetchUrl != directUrl)
|
||||
{
|
||||
_logger.LogInformation("Direct URL fetch failed; falling back to search URL: {Url}", searchFetchUrl);
|
||||
fetchUrl = searchFetchUrl;
|
||||
pageContent = await FetchPageTextAsync(searchFetchUrl);
|
||||
(pageContent, pageImageUrl) = await FetchPageAsync(searchFetchUrl);
|
||||
}
|
||||
|
||||
var userPrompt = BuildUserPrompt(manufacturer, colorName, colorCode, partNumber, snippets, fetchUrl, pageContent);
|
||||
@@ -246,6 +251,7 @@ Rules:
|
||||
result.UnitCostPerLb = GetDecimal(parsed, "unitCostPerLb");
|
||||
result.VendorName = GetString(parsed, "vendorName");
|
||||
result.SpecPageUrl = specPageUrl;
|
||||
result.ImageUrl = pageImageUrl;
|
||||
result.Reasoning = GetString(parsed, "reasoning");
|
||||
|
||||
return result;
|
||||
@@ -488,7 +494,13 @@ Rules:
|
||||
/// A browser-like User-Agent header is sent because some manufacturer sites return 403
|
||||
/// or empty responses to bare HttpClient default agents.
|
||||
/// </summary>
|
||||
private async Task<string?> FetchPageTextAsync(string url)
|
||||
/// <summary>
|
||||
/// Fetches a product page and returns both stripped plain text (for Claude) and the
|
||||
/// best product image URL found on the page. Extracts og:image (Open Graph) first,
|
||||
/// then falls back to twitter:image. The raw HTML is processed before tag-stripping
|
||||
/// so the image URL is captured while it still exists in the markup.
|
||||
/// </summary>
|
||||
private async Task<(string? text, string? imageUrl)> FetchPageAsync(string url)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -499,6 +511,9 @@ Rules:
|
||||
|
||||
var html = await client.GetStringAsync(url);
|
||||
|
||||
// Extract product image from Open Graph / Twitter Card meta tags
|
||||
var imageUrl = ExtractOgImageUrl(html);
|
||||
|
||||
// Extract structured data (JSON-LD) BEFORE stripping scripts — it contains
|
||||
// machine-readable price, SKU, and product info that would otherwise be lost.
|
||||
var structuredData = ExtractJsonLdData(html);
|
||||
@@ -524,17 +539,46 @@ Rules:
|
||||
if (!string.IsNullOrWhiteSpace(structuredData))
|
||||
text = structuredData + "\n" + text;
|
||||
|
||||
_logger.LogInformation("Fetched {Chars} chars from {Url} (structured data: {HasData})",
|
||||
text.Length, url, structuredData != null ? "yes" : "no");
|
||||
return text;
|
||||
_logger.LogInformation("Fetched {Chars} chars from {Url} (structured data: {HasData}, image: {HasImage})",
|
||||
text.Length, url, structuredData != null ? "yes" : "no", imageUrl != null ? "yes" : "no");
|
||||
return (text, imageUrl);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to fetch page content from {Url}", url);
|
||||
return null;
|
||||
return (null, null);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts the best product image URL from raw HTML. Checks og:image first (most
|
||||
/// reliable for e-commerce product pages), then twitter:image as fallback.
|
||||
/// </summary>
|
||||
private static string? ExtractOgImageUrl(string html)
|
||||
{
|
||||
var patterns = new[]
|
||||
{
|
||||
@"<meta[^>]+property=[""']og:image[""'][^>]+content=[""']([^""']+)[""']",
|
||||
@"<meta[^>]+content=[""']([^""']+)[""'][^>]+property=[""']og:image[""']",
|
||||
@"<meta[^>]+name=[""']twitter:image[""'][^>]+content=[""']([^""']+)[""']",
|
||||
@"<meta[^>]+content=[""']([^""']+)[""'][^>]+name=[""']twitter:image[""']",
|
||||
};
|
||||
|
||||
foreach (var pattern in patterns)
|
||||
{
|
||||
var m = System.Text.RegularExpressions.Regex.Match(
|
||||
html, pattern, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
if (m.Success)
|
||||
{
|
||||
var url = m.Groups[1].Value.Trim();
|
||||
if (url.StartsWith("http", StringComparison.OrdinalIgnoreCase))
|
||||
return url;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts product name, SKU, and price from JSON-LD structured data blocks.
|
||||
/// Many e-commerce sites (Shopify, WooCommerce, etc.) embed this in the page HTML
|
||||
|
||||
Reference in New Issue
Block a user