PowderCoatingLogix/scripts/Prismatic Data Scraper/Infrastructure/JsonStore.cs

using System.Text.Json;
using PrismaticSync.Models;

namespace PrismaticSync.Infrastructure;

/// <summary>Loads/saves the scrape output and the URL list, with atomic writes so a crash mid-save can't corrupt them.</summary>
public static class JsonStore
{
    private static readonly JsonSerializerOptions WriteOptions = new() { WriteIndented = true };
    private static readonly JsonSerializerOptions ReadOptions = new() { PropertyNameCaseInsensitive = true };

    public static ScrapeOutput LoadOutput(string path)
    {
        if (!File.Exists(path))
            return new ScrapeOutput();

        var json = File.ReadAllText(path);
        try
        {
            // Tolerate a bare array (older output format) as well as { results, errors }.
            if (json.TrimStart().StartsWith("["))
            {
                var results = JsonSerializer.Deserialize<List<ProductRecord>>(json, ReadOptions) ?? new();
                return new ScrapeOutput { Results = results };
            }
            return JsonSerializer.Deserialize<ScrapeOutput>(json, ReadOptions) ?? new ScrapeOutput();
        }
        catch (Exception ex)
        {
            var backup = $"{path}.invalid-{DateTimeOffset.UtcNow.ToUnixTimeSeconds()}.bak";
            File.Copy(path, backup, overwrite: true);
            throw new InvalidOperationException($"Could not parse {path}. Backed it up to {backup}. {ex.Message}");
        }
    }

    public static void SaveOutput(string path, ScrapeOutput data)
    {
        var tmp = path + ".tmp";
        File.WriteAllText(tmp, JsonSerializer.Serialize(data, WriteOptions));
        File.Move(tmp, path, overwrite: true);
    }

    public static List<string> LoadUrls(string path)
    {
        if (!File.Exists(path))
            return new List<string>();

        return File.ReadAllLines(path)
            .Select(CleanUrl)
            .Where(u => u.Length > 0 && !u.StartsWith("#"))
            .Distinct(StringComparer.OrdinalIgnoreCase)
            .ToList();
    }

    public static void SaveUrls(string path, IEnumerable<string> urls)
    {
        var sorted = urls.Distinct(StringComparer.OrdinalIgnoreCase).OrderBy(u => u, StringComparer.OrdinalIgnoreCase);
        var tmp = path + ".tmp";
        File.WriteAllText(tmp, string.Join(Environment.NewLine, sorted) + Environment.NewLine);
        File.Move(tmp, path, overwrite: true);
    }

    public static string CleanUrl(string? url) =>
        (url ?? string.Empty).Split('?')[0].Split('#')[0].Trim();
}