Improve customer import duplicate detection to 3-tier strategy

Tier 1 (email): existing behavior, now uses HashSet instead of O(n²) .Any()
Tier 2 (phone): when email is absent, deduplicate by normalised phone number
  (last 10 digits of MobilePhone then Phone) against both DB and within-batch
Tier 3 (name): when both email and phone are absent, warn but still import

Fixes customers with no email being silently skipped or left undetected as
duplicates. NormalizePhone strips formatting so (423) 331-9834 and
423-331-9834 match correctly.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 12:48:40 -04:00
parent 972123c7a2
commit 8f955851e5
@@ -389,8 +389,12 @@ public class CsvImportService : ICsvImportService
/// Imports customers from a CSV stream and persists valid rows to the database for the given company.
/// The import uses a two-phase approach: all rows are parsed and validated first, then each validated
/// entity is saved individually so that a single bad row does not roll back the entire batch.
/// Duplicate detection runs against both existing DB records (by email) and within the import file
/// itself, catching cases where the same email appears twice in one upload.
/// Duplicate detection uses a three-tier strategy:
/// Tier 1 — email address (case-insensitive): checked against DB and within the batch; row is skipped.
/// Tier 2 — normalised phone number (last 10 digits of MobilePhone, then Phone): used only when email
/// is absent; checked against DB and within the batch; row is skipped.
/// Tier 3 — FirstName + LastName: used only when both email and phone are absent; emits a warning
/// but still imports the row because name collisions across unrelated people are common.
/// Pricing tiers are resolved by tier name; an unrecognised name is demoted to a warning and the
/// customer is imported without a tier rather than being skipped entirely.
/// Contact names are split on the first space into FirstName / LastName because the CSV carries a
@@ -419,15 +423,31 @@ public class CsvImportService : ICsvImportService
// Get all existing customers for duplicate detection
var existingCustomers = await _unitOfWork.Customers.GetAllAsync();
// Tier 1 lookup: email → existing customer
var existingEmails = existingCustomers.Where(c => !string.IsNullOrEmpty(c.Email))
.ToDictionary(c => c.Email!.ToLower(), c => c, StringComparer.OrdinalIgnoreCase);
// Tier 2 lookup: normalised phone → existing customer (prefer MobilePhone, fall back to Phone)
var existingPhones = new Dictionary<string, Customer>(StringComparer.Ordinal);
foreach (var c in existingCustomers)
{
var phone = NormalizePhone(c.MobilePhone) ?? NormalizePhone(c.Phone);
if (phone != null && !existingPhones.ContainsKey(phone))
existingPhones[phone] = c;
}
// Get pricing tiers for lookup
var pricingTiers = await _unitOfWork.PricingTiers.GetAllAsync();
var pricingTierDict = pricingTiers.ToDictionary(pt => pt.TierName.ToUpper(), pt => pt, StringComparer.OrdinalIgnoreCase);
var customersToImport = new List<(int RowNumber, Customer Customer, string Email)>();
// Within-batch tracking sets (prevent duplicate detection against rows already queued)
var batchEmails = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var batchPhones = new HashSet<string>(StringComparer.Ordinal);
var batchNames = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var record in records)
{
rowNumber++;
@@ -452,20 +472,54 @@ public class CsvImportService : ICsvImportService
cleanCompanyName = derivedName;
}
// Check for duplicate email in existing data
if (!string.IsNullOrEmpty(cleanEmail) && existingEmails.ContainsKey(cleanEmail.ToLower()))
// --- Tier 1: email dedup (primary key) ---
if (!string.IsNullOrEmpty(cleanEmail))
{
result.Warnings.Add($"Row {rowNumber}: Customer with email '{cleanEmail}' already exists in database. Skipping.");
result.SkippedCount++;
continue;
if (existingEmails.ContainsKey(cleanEmail.ToLower()))
{
result.Warnings.Add($"Row {rowNumber}: Customer with email '{cleanEmail}' already exists in database. Skipping.");
result.SkippedCount++;
continue;
}
if (batchEmails.Contains(cleanEmail))
{
result.Warnings.Add($"Row {rowNumber}: Duplicate email '{cleanEmail}' found in import file. Skipping.");
result.SkippedCount++;
continue;
}
}
// Check for duplicate email within the import batch
if (!string.IsNullOrEmpty(cleanEmail) && customersToImport.Any(x => x.Email.Equals(cleanEmail, StringComparison.OrdinalIgnoreCase)))
else
{
result.Warnings.Add($"Row {rowNumber}: Duplicate email '{cleanEmail}' found in import file. Skipping.");
result.SkippedCount++;
continue;
// --- Tier 2: phone dedup (when email is absent) ---
// NormalizePhone strips to digits-only and returns the last 10, so formatting
// differences like (423) 331-9834 vs 423-331-9834 are treated as the same number.
var normalizedPhone = NormalizePhone(record.MobilePhone) ?? NormalizePhone(record.Phone);
if (normalizedPhone != null)
{
if (existingPhones.TryGetValue(normalizedPhone, out var existingByPhone))
{
result.Warnings.Add($"Row {rowNumber}: Customer '{cleanCompanyName}' has no email; phone '{normalizedPhone}' already belongs to existing customer '{existingByPhone.CompanyName}'. Skipping.");
result.SkippedCount++;
continue;
}
if (batchPhones.Contains(normalizedPhone))
{
result.Warnings.Add($"Row {rowNumber}: Customer '{cleanCompanyName}' has no email; duplicate phone '{normalizedPhone}' found in import file. Skipping.");
result.SkippedCount++;
continue;
}
}
else
{
// --- Tier 3: name warning (no email, no phone — import anyway, flag for review) ---
var nameKey = $"{firstName}|{lastName}".ToLowerInvariant();
var hasName = !string.IsNullOrWhiteSpace(firstName) || !string.IsNullOrWhiteSpace(lastName);
if (hasName && batchNames.Contains(nameKey))
{
result.Warnings.Add($"Row {rowNumber}: Customer '{firstName} {lastName}'.Trim() has no email or phone and shares a name with another row in the import file. Imported anyway — verify manually.");
}
}
}
// Resolve pricing tier
@@ -513,6 +567,24 @@ public class CsvImportService : ICsvImportService
};
customersToImport.Add((rowNumber, customer, cleanEmail ?? string.Empty));
// Register in batch tracking so later rows are checked against this one
if (!string.IsNullOrEmpty(cleanEmail))
{
batchEmails.Add(cleanEmail);
}
else
{
var normalizedPhone = NormalizePhone(record.MobilePhone) ?? NormalizePhone(record.Phone);
if (normalizedPhone != null)
batchPhones.Add(normalizedPhone);
else
{
var nameKey = $"{firstName}|{lastName}".ToLowerInvariant();
if (!string.IsNullOrWhiteSpace(nameKey.Replace("|", "")))
batchNames.Add(nameKey);
}
}
}
catch (Exception ex)
{
@@ -2837,6 +2909,23 @@ public class CsvImportService : ICsvImportService
return trimmed;
}
/// <summary>
/// Normalises a phone string to its last 10 digits for duplicate-detection comparisons.
/// Stripping to digits only means formatting differences such as (423)&nbsp;331-9834,
/// 423-331-9834, and 4233319834 all produce the same key. Returns null when the input
/// contains fewer than 7 digits — too short to be a real phone number and avoids false
/// positive matches on placeholder values like "N/A" or extension-only strings.
/// </summary>
/// <param name="phone">Raw phone string as read from the CSV, or null.</param>
/// <returns>Last 10 (or all, if fewer than 10) digits of the input; null if input is unusable.</returns>
private static string? NormalizePhone(string? phone)
{
if (string.IsNullOrWhiteSpace(phone)) return null;
var digits = new string(phone.Where(char.IsDigit).ToArray());
if (digits.Length < 7) return null;
return digits.Length >= 10 ? digits[^10..] : digits;
}
// ── Invoice Import ───────────────────────────────────────────────────────────
/// <summary>