Add product image to powder inventory via AI lookup

When AI Lookup fetches a manufacturer product page, it now extracts the og:image (Open Graph) meta tag before stripping HTML tags. The image URL is returned in InventoryAiLookupResult.ImageUrl and automatically shown as a preview on the Create/Edit form alongside the other filled fields. The preview includes a Remove button to clear the image, and the Wrong Match? button clears it along with the other AI-filled fields. On the inventory Details page a product image card is rendered above the Stock & Pricing card whenever ImageUrl is set. The field is nullable so existing records and powders without an image are unaffected. New field: InventoryItem.ImageUrl (nvarchar, nullable). Migration: AddInventoryItemImageUrl. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-29 18:15:55 -04:00
parent 9221fcc783
commit 90a06c6acd
11 changed files with 9534 additions and 10 deletions
@@ -171,14 +171,19 @@ Rules:
                _logger.LogInformation("Using direct manufacturer URL: {Url}", directUrl);

            // Fetch product page
-            var pageContent = fetchUrl != null ? await FetchPageTextAsync(fetchUrl) : null;
+            string? pageContent = null;
+            string? pageImageUrl = null;
+            if (fetchUrl != null)
+            {
+                (pageContent, pageImageUrl) = await FetchPageAsync(fetchUrl);
+            }

            // If direct URL fetch failed, fall back to the search fetch URL
            if (pageContent == null && directUrl != null && searchFetchUrl != null && searchFetchUrl != directUrl)
            {
                _logger.LogInformation("Direct URL fetch failed; falling back to search URL: {Url}", searchFetchUrl);
                fetchUrl = searchFetchUrl;
-                pageContent = await FetchPageTextAsync(searchFetchUrl);
+                (pageContent, pageImageUrl) = await FetchPageAsync(searchFetchUrl);
            }

            var userPrompt = BuildUserPrompt(manufacturer, colorName, colorCode, partNumber, snippets, fetchUrl, pageContent);
@@ -246,6 +251,7 @@ Rules:
            result.UnitCostPerLb = GetDecimal(parsed, "unitCostPerLb");
            result.VendorName = GetString(parsed, "vendorName");
            result.SpecPageUrl = specPageUrl;
+            result.ImageUrl = pageImageUrl;
            result.Reasoning = GetString(parsed, "reasoning");

            return result;
@@ -488,7 +494,13 @@ Rules:
    /// A browser-like User-Agent header is sent because some manufacturer sites return 403
    /// or empty responses to bare HttpClient default agents.
    /// </summary>
-    private async Task<string?> FetchPageTextAsync(string url)
+    /// <summary>
+    /// Fetches a product page and returns both stripped plain text (for Claude) and the
+    /// best product image URL found on the page. Extracts og:image (Open Graph) first,
+    /// then falls back to twitter:image. The raw HTML is processed before tag-stripping
+    /// so the image URL is captured while it still exists in the markup.
+    /// </summary>
+    private async Task<(string? text, string? imageUrl)> FetchPageAsync(string url)
    {
        try
        {
@@ -499,6 +511,9 @@ Rules:

            var html = await client.GetStringAsync(url);

+            // Extract product image from Open Graph / Twitter Card meta tags
+            var imageUrl = ExtractOgImageUrl(html);
+
            // Extract structured data (JSON-LD) BEFORE stripping scripts — it contains
            // machine-readable price, SKU, and product info that would otherwise be lost.
            var structuredData = ExtractJsonLdData(html);
@@ -524,17 +539,46 @@ Rules:
            if (!string.IsNullOrWhiteSpace(structuredData))
                text = structuredData + "\n" + text;

-            _logger.LogInformation("Fetched {Chars} chars from {Url} (structured data: {HasData})",
-                text.Length, url, structuredData != null ? "yes" : "no");
-            return text;
+            _logger.LogInformation("Fetched {Chars} chars from {Url} (structured data: {HasData}, image: {HasImage})",
+                text.Length, url, structuredData != null ? "yes" : "no", imageUrl != null ? "yes" : "no");
+            return (text, imageUrl);
        }
        catch (Exception ex)
        {
            _logger.LogWarning(ex, "Failed to fetch page content from {Url}", url);
-            return null;
+            return (null, null);
        }
    }

+    /// <summary>
+    /// Extracts the best product image URL from raw HTML. Checks og:image first (most
+    /// reliable for e-commerce product pages), then twitter:image as fallback.
+    /// </summary>
+    private static string? ExtractOgImageUrl(string html)
+    {
+        var patterns = new[]
+        {
+            @"<meta[^>]+property=[""']og:image[""'][^>]+content=[""']([^""']+)[""']",
+            @"<meta[^>]+content=[""']([^""']+)[""'][^>]+property=[""']og:image[""']",
+            @"<meta[^>]+name=[""']twitter:image[""'][^>]+content=[""']([^""']+)[""']",
+            @"<meta[^>]+content=[""']([^""']+)[""'][^>]+name=[""']twitter:image[""']",
+        };
+
+        foreach (var pattern in patterns)
+        {
+            var m = System.Text.RegularExpressions.Regex.Match(
+                html, pattern, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
+            if (m.Success)
+            {
+                var url = m.Groups[1].Value.Trim();
+                if (url.StartsWith("http", StringComparison.OrdinalIgnoreCase))
+                    return url;
+            }
+        }
+
+        return null;
+    }
+
    /// <summary>
    /// Extracts product name, SKU, and price from JSON-LD structured data blocks.
    /// Many e-commerce sites (Shopify, WooCommerce, etc.) embed this in the page HTML