From be89327c01de168f357042a798b87f392a6ec495 Mon Sep 17 00:00:00 2001 From: Scott Pouliot Date: Wed, 27 May 2026 13:02:10 -0400 Subject: [PATCH] Fix customer import dedup: use composite keys, not exclusive tiers New logic: Tier 1 - email present: email match -> skip (unchanged) Tier 2 - email absent + phone present: name + phone composite -> skip Tier 3 - email and phone absent: name + city/state/zip composite -> warn, import anyway Tier 2 requires BOTH name and phone to match so two people sharing an office line don't falsely collide. Tier 3 warns but imports because location data is too imprecise to hard-skip on. Co-Authored-By: Claude Sonnet 4.6 --- .../Services/CsvImportService.cs | 106 +++++++++++++----- 1 file changed, 75 insertions(+), 31 deletions(-) diff --git a/src/PowderCoating.Infrastructure/Services/CsvImportService.cs b/src/PowderCoating.Infrastructure/Services/CsvImportService.cs index ac9f75d..a86276d 100644 --- a/src/PowderCoating.Infrastructure/Services/CsvImportService.cs +++ b/src/PowderCoating.Infrastructure/Services/CsvImportService.cs @@ -389,12 +389,15 @@ public class CsvImportService : ICsvImportService /// Imports customers from a CSV stream and persists valid rows to the database for the given company. /// The import uses a two-phase approach: all rows are parsed and validated first, then each validated /// entity is saved individually so that a single bad row does not roll back the entire batch. - /// Duplicate detection uses a three-tier strategy: - /// Tier 1 — email address (case-insensitive): checked against DB and within the batch; row is skipped. - /// Tier 2 — normalised phone number (last 10 digits of MobilePhone, then Phone): used only when email - /// is absent; checked against DB and within the batch; row is skipped. - /// Tier 3 — FirstName + LastName: used only when both email and phone are absent; emits a warning - /// but still imports the row because name collisions across unrelated people are common. + /// Duplicate detection uses a three-tier strategy, each tier only engaged when the previous + /// identifier is absent: + /// Tier 1 — email address (case-insensitive): if email is present and matches a DB record or + /// earlier batch row the row is skipped. + /// Tier 2 — name + normalised phone composite: used when email is absent. Combining name with + /// phone prevents false positives when two people share a number (e.g. a family). + /// Row is skipped on match. + /// Tier 3 — name + city/state/zip composite: used when both email and phone are absent. + /// Location data is imprecise so this emits a warning but still imports the row. /// Pricing tiers are resolved by tier name; an unrecognised name is demoted to a warning and the /// customer is imported without a tier rather than being skipped entirely. /// Contact names are split on the first space into FirstName / LastName because the CSV carries a @@ -428,13 +431,35 @@ public class CsvImportService : ICsvImportService var existingEmails = existingCustomers.Where(c => !string.IsNullOrEmpty(c.Email)) .ToDictionary(c => c.Email!.ToLower(), c => c, StringComparer.OrdinalIgnoreCase); - // Tier 2 lookup: normalised phone → existing customer (prefer MobilePhone, fall back to Phone) - var existingPhones = new Dictionary(StringComparer.Ordinal); + // Tier 2 lookup: (normalised phone + "|" + display name) → existing customer. + // Combining name with phone avoids false positives when two people share a number. + var existingByPhoneAndName = new Dictionary(StringComparer.OrdinalIgnoreCase); foreach (var c in existingCustomers) { var phone = NormalizePhone(c.MobilePhone) ?? NormalizePhone(c.Phone); - if (phone != null && !existingPhones.ContainsKey(phone)) - existingPhones[phone] = c; + if (phone == null) continue; + var name = string.IsNullOrWhiteSpace(c.CompanyName) + ? $"{c.ContactFirstName} {c.ContactLastName}".Trim() + : c.CompanyName; + var key = $"{phone}|{name}"; + if (!existingByPhoneAndName.ContainsKey(key)) + existingByPhoneAndName[key] = c; + } + + // Tier 3 lookup: (display name + "|" + city + "|" + state + "|" + zip) → existing customer. + // Only populated when a customer has at least one location field so the key isn't trivially blank. + var existingByNameAndLocation = new Dictionary(StringComparer.OrdinalIgnoreCase); + foreach (var c in existingCustomers) + { + if (string.IsNullOrWhiteSpace(c.City) && string.IsNullOrWhiteSpace(c.State) && string.IsNullOrWhiteSpace(c.ZipCode)) + continue; + var name = string.IsNullOrWhiteSpace(c.CompanyName) + ? $"{c.ContactFirstName} {c.ContactLastName}".Trim() + : c.CompanyName; + if (string.IsNullOrWhiteSpace(name)) continue; + var key = $"{name}|{c.City}|{c.State}|{c.ZipCode}"; + if (!existingByNameAndLocation.ContainsKey(key)) + existingByNameAndLocation[key] = c; } // Get pricing tiers for lookup @@ -444,9 +469,9 @@ public class CsvImportService : ICsvImportService var customersToImport = new List<(int RowNumber, Customer Customer, string Email)>(); // Within-batch tracking sets (prevent duplicate detection against rows already queued) - var batchEmails = new HashSet(StringComparer.OrdinalIgnoreCase); - var batchPhones = new HashSet(StringComparer.Ordinal); - var batchNames = new HashSet(StringComparer.OrdinalIgnoreCase); + var batchEmails = new HashSet(StringComparer.OrdinalIgnoreCase); + var batchPhoneAndName = new HashSet(StringComparer.OrdinalIgnoreCase); + var batchNameAndLocation = new HashSet(StringComparer.OrdinalIgnoreCase); foreach (var record in records) { @@ -472,7 +497,13 @@ public class CsvImportService : ICsvImportService cleanCompanyName = derivedName; } - // --- Tier 1: email dedup (primary key) --- + // Canonical display name used as part of composite keys in Tiers 2 and 3 + var displayName = string.IsNullOrWhiteSpace(cleanCompanyName) + ? $"{firstName} {lastName}".Trim() + : cleanCompanyName; + + // --- Tier 1: email dedup --- + // Only engaged when the row has an email address. if (!string.IsNullOrEmpty(cleanEmail)) { if (existingEmails.ContainsKey(cleanEmail.ToLower())) @@ -490,34 +521,42 @@ public class CsvImportService : ICsvImportService } else { - // --- Tier 2: phone dedup (when email is absent) --- - // NormalizePhone strips to digits-only and returns the last 10, so formatting - // differences like (423) 331-9834 vs 423-331-9834 are treated as the same number. + // --- Tier 2: name + phone composite dedup (email absent) --- + // Requiring both name and phone to match avoids false positives when two + // unrelated people happen to share a phone number (e.g. a shared office line). var normalizedPhone = NormalizePhone(record.MobilePhone) ?? NormalizePhone(record.Phone); - if (normalizedPhone != null) { - if (existingPhones.TryGetValue(normalizedPhone, out var existingByPhone)) + var phoneNameKey = $"{normalizedPhone}|{displayName}"; + if (existingByPhoneAndName.TryGetValue(phoneNameKey, out var existingMatch)) { - result.Warnings.Add($"Row {rowNumber}: Customer '{cleanCompanyName}' has no email; phone '{normalizedPhone}' already belongs to existing customer '{existingByPhone.CompanyName}'. Skipping."); + result.Warnings.Add($"Row {rowNumber}: '{displayName}' has no email; name + phone matches existing customer '{existingMatch.CompanyName}'. Skipping."); result.SkippedCount++; continue; } - if (batchPhones.Contains(normalizedPhone)) + if (batchPhoneAndName.Contains(phoneNameKey)) { - result.Warnings.Add($"Row {rowNumber}: Customer '{cleanCompanyName}' has no email; duplicate phone '{normalizedPhone}' found in import file. Skipping."); + result.Warnings.Add($"Row {rowNumber}: '{displayName}' has no email; duplicate name + phone found in import file. Skipping."); result.SkippedCount++; continue; } } else { - // --- Tier 3: name warning (no email, no phone — import anyway, flag for review) --- - var nameKey = $"{firstName}|{lastName}".ToLowerInvariant(); - var hasName = !string.IsNullOrWhiteSpace(firstName) || !string.IsNullOrWhiteSpace(lastName); - if (hasName && batchNames.Contains(nameKey)) + // --- Tier 3: name + location composite warning (no email, no phone) --- + // Location data is imprecise so we warn but still import — a name + city + // collision across unrelated people is plausible enough not to hard-skip. + var city = record.City?.Trim(); + var state = record.State?.Trim(); + var zip = record.ZipCode?.Trim(); + var hasLocation = !string.IsNullOrWhiteSpace(city) || !string.IsNullOrWhiteSpace(state) || !string.IsNullOrWhiteSpace(zip); + if (hasLocation && !string.IsNullOrWhiteSpace(displayName)) { - result.Warnings.Add($"Row {rowNumber}: Customer '{firstName} {lastName}'.Trim() has no email or phone and shares a name with another row in the import file. Imported anyway — verify manually."); + var locationKey = $"{displayName}|{city}|{state}|{zip}"; + if (existingByNameAndLocation.ContainsKey(locationKey) || batchNameAndLocation.Contains(locationKey)) + { + result.Warnings.Add($"Row {rowNumber}: '{displayName}' has no email or phone; name + location matches an existing record. Imported anyway — verify manually."); + } } } } @@ -577,12 +616,17 @@ public class CsvImportService : ICsvImportService { var normalizedPhone = NormalizePhone(record.MobilePhone) ?? NormalizePhone(record.Phone); if (normalizedPhone != null) - batchPhones.Add(normalizedPhone); + { + batchPhoneAndName.Add($"{normalizedPhone}|{displayName}"); + } else { - var nameKey = $"{firstName}|{lastName}".ToLowerInvariant(); - if (!string.IsNullOrWhiteSpace(nameKey.Replace("|", ""))) - batchNames.Add(nameKey); + var city = record.City?.Trim(); + var state = record.State?.Trim(); + var zip = record.ZipCode?.Trim(); + var hasLocation = !string.IsNullOrWhiteSpace(city) || !string.IsNullOrWhiteSpace(state) || !string.IsNullOrWhiteSpace(zip); + if (hasLocation && !string.IsNullOrWhiteSpace(displayName)) + batchNameAndLocation.Add($"{displayName}|{city}|{state}|{zip}"); } } }