Improve customer import duplicate detection to 3-tier strategy
Tier 1 (email): existing behavior, now uses HashSet instead of O(n²) .Any() Tier 2 (phone): when email is absent, deduplicate by normalised phone number (last 10 digits of MobilePhone then Phone) against both DB and within-batch Tier 3 (name): when both email and phone are absent, warn but still import Fixes customers with no email being silently skipped or left undetected as duplicates. NormalizePhone strips formatting so (423) 331-9834 and 423-331-9834 match correctly. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -389,8 +389,12 @@ public class CsvImportService : ICsvImportService
|
||||
/// Imports customers from a CSV stream and persists valid rows to the database for the given company.
|
||||
/// The import uses a two-phase approach: all rows are parsed and validated first, then each validated
|
||||
/// entity is saved individually so that a single bad row does not roll back the entire batch.
|
||||
/// Duplicate detection runs against both existing DB records (by email) and within the import file
|
||||
/// itself, catching cases where the same email appears twice in one upload.
|
||||
/// Duplicate detection uses a three-tier strategy:
|
||||
/// Tier 1 — email address (case-insensitive): checked against DB and within the batch; row is skipped.
|
||||
/// Tier 2 — normalised phone number (last 10 digits of MobilePhone, then Phone): used only when email
|
||||
/// is absent; checked against DB and within the batch; row is skipped.
|
||||
/// Tier 3 — FirstName + LastName: used only when both email and phone are absent; emits a warning
|
||||
/// but still imports the row because name collisions across unrelated people are common.
|
||||
/// Pricing tiers are resolved by tier name; an unrecognised name is demoted to a warning and the
|
||||
/// customer is imported without a tier rather than being skipped entirely.
|
||||
/// Contact names are split on the first space into FirstName / LastName because the CSV carries a
|
||||
@@ -419,15 +423,31 @@ public class CsvImportService : ICsvImportService
|
||||
|
||||
// Get all existing customers for duplicate detection
|
||||
var existingCustomers = await _unitOfWork.Customers.GetAllAsync();
|
||||
|
||||
// Tier 1 lookup: email → existing customer
|
||||
var existingEmails = existingCustomers.Where(c => !string.IsNullOrEmpty(c.Email))
|
||||
.ToDictionary(c => c.Email!.ToLower(), c => c, StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
// Tier 2 lookup: normalised phone → existing customer (prefer MobilePhone, fall back to Phone)
|
||||
var existingPhones = new Dictionary<string, Customer>(StringComparer.Ordinal);
|
||||
foreach (var c in existingCustomers)
|
||||
{
|
||||
var phone = NormalizePhone(c.MobilePhone) ?? NormalizePhone(c.Phone);
|
||||
if (phone != null && !existingPhones.ContainsKey(phone))
|
||||
existingPhones[phone] = c;
|
||||
}
|
||||
|
||||
// Get pricing tiers for lookup
|
||||
var pricingTiers = await _unitOfWork.PricingTiers.GetAllAsync();
|
||||
var pricingTierDict = pricingTiers.ToDictionary(pt => pt.TierName.ToUpper(), pt => pt, StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
var customersToImport = new List<(int RowNumber, Customer Customer, string Email)>();
|
||||
|
||||
// Within-batch tracking sets (prevent duplicate detection against rows already queued)
|
||||
var batchEmails = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
var batchPhones = new HashSet<string>(StringComparer.Ordinal);
|
||||
var batchNames = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var record in records)
|
||||
{
|
||||
rowNumber++;
|
||||
@@ -452,20 +472,54 @@ public class CsvImportService : ICsvImportService
|
||||
cleanCompanyName = derivedName;
|
||||
}
|
||||
|
||||
// Check for duplicate email in existing data
|
||||
if (!string.IsNullOrEmpty(cleanEmail) && existingEmails.ContainsKey(cleanEmail.ToLower()))
|
||||
// --- Tier 1: email dedup (primary key) ---
|
||||
if (!string.IsNullOrEmpty(cleanEmail))
|
||||
{
|
||||
result.Warnings.Add($"Row {rowNumber}: Customer with email '{cleanEmail}' already exists in database. Skipping.");
|
||||
result.SkippedCount++;
|
||||
continue;
|
||||
if (existingEmails.ContainsKey(cleanEmail.ToLower()))
|
||||
{
|
||||
result.Warnings.Add($"Row {rowNumber}: Customer with email '{cleanEmail}' already exists in database. Skipping.");
|
||||
result.SkippedCount++;
|
||||
continue;
|
||||
}
|
||||
if (batchEmails.Contains(cleanEmail))
|
||||
{
|
||||
result.Warnings.Add($"Row {rowNumber}: Duplicate email '{cleanEmail}' found in import file. Skipping.");
|
||||
result.SkippedCount++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for duplicate email within the import batch
|
||||
if (!string.IsNullOrEmpty(cleanEmail) && customersToImport.Any(x => x.Email.Equals(cleanEmail, StringComparison.OrdinalIgnoreCase)))
|
||||
else
|
||||
{
|
||||
result.Warnings.Add($"Row {rowNumber}: Duplicate email '{cleanEmail}' found in import file. Skipping.");
|
||||
result.SkippedCount++;
|
||||
continue;
|
||||
// --- Tier 2: phone dedup (when email is absent) ---
|
||||
// NormalizePhone strips to digits-only and returns the last 10, so formatting
|
||||
// differences like (423) 331-9834 vs 423-331-9834 are treated as the same number.
|
||||
var normalizedPhone = NormalizePhone(record.MobilePhone) ?? NormalizePhone(record.Phone);
|
||||
|
||||
if (normalizedPhone != null)
|
||||
{
|
||||
if (existingPhones.TryGetValue(normalizedPhone, out var existingByPhone))
|
||||
{
|
||||
result.Warnings.Add($"Row {rowNumber}: Customer '{cleanCompanyName}' has no email; phone '{normalizedPhone}' already belongs to existing customer '{existingByPhone.CompanyName}'. Skipping.");
|
||||
result.SkippedCount++;
|
||||
continue;
|
||||
}
|
||||
if (batchPhones.Contains(normalizedPhone))
|
||||
{
|
||||
result.Warnings.Add($"Row {rowNumber}: Customer '{cleanCompanyName}' has no email; duplicate phone '{normalizedPhone}' found in import file. Skipping.");
|
||||
result.SkippedCount++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// --- Tier 3: name warning (no email, no phone — import anyway, flag for review) ---
|
||||
var nameKey = $"{firstName}|{lastName}".ToLowerInvariant();
|
||||
var hasName = !string.IsNullOrWhiteSpace(firstName) || !string.IsNullOrWhiteSpace(lastName);
|
||||
if (hasName && batchNames.Contains(nameKey))
|
||||
{
|
||||
result.Warnings.Add($"Row {rowNumber}: Customer '{firstName} {lastName}'.Trim() has no email or phone and shares a name with another row in the import file. Imported anyway — verify manually.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve pricing tier
|
||||
@@ -513,6 +567,24 @@ public class CsvImportService : ICsvImportService
|
||||
};
|
||||
|
||||
customersToImport.Add((rowNumber, customer, cleanEmail ?? string.Empty));
|
||||
|
||||
// Register in batch tracking so later rows are checked against this one
|
||||
if (!string.IsNullOrEmpty(cleanEmail))
|
||||
{
|
||||
batchEmails.Add(cleanEmail);
|
||||
}
|
||||
else
|
||||
{
|
||||
var normalizedPhone = NormalizePhone(record.MobilePhone) ?? NormalizePhone(record.Phone);
|
||||
if (normalizedPhone != null)
|
||||
batchPhones.Add(normalizedPhone);
|
||||
else
|
||||
{
|
||||
var nameKey = $"{firstName}|{lastName}".ToLowerInvariant();
|
||||
if (!string.IsNullOrWhiteSpace(nameKey.Replace("|", "")))
|
||||
batchNames.Add(nameKey);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
@@ -2837,6 +2909,23 @@ public class CsvImportService : ICsvImportService
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalises a phone string to its last 10 digits for duplicate-detection comparisons.
|
||||
/// Stripping to digits only means formatting differences such as (423) 331-9834,
|
||||
/// 423-331-9834, and 4233319834 all produce the same key. Returns null when the input
|
||||
/// contains fewer than 7 digits — too short to be a real phone number and avoids false
|
||||
/// positive matches on placeholder values like "N/A" or extension-only strings.
|
||||
/// </summary>
|
||||
/// <param name="phone">Raw phone string as read from the CSV, or null.</param>
|
||||
/// <returns>Last 10 (or all, if fewer than 10) digits of the input; null if input is unusable.</returns>
|
||||
private static string? NormalizePhone(string? phone)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(phone)) return null;
|
||||
var digits = new string(phone.Where(char.IsDigit).ToArray());
|
||||
if (digits.Length < 7) return null;
|
||||
return digits.Length >= 10 ? digits[^10..] : digits;
|
||||
}
|
||||
|
||||
// ── Invoice Import ───────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
|
||||
Reference in New Issue
Block a user