Fix customer import dedup: use composite keys, not exclusive tiers
New logic: Tier 1 - email present: email match -> skip (unchanged) Tier 2 - email absent + phone present: name + phone composite -> skip Tier 3 - email and phone absent: name + city/state/zip composite -> warn, import anyway Tier 2 requires BOTH name and phone to match so two people sharing an office line don't falsely collide. Tier 3 warns but imports because location data is too imprecise to hard-skip on. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -389,12 +389,15 @@ public class CsvImportService : ICsvImportService
|
||||
/// Imports customers from a CSV stream and persists valid rows to the database for the given company.
|
||||
/// The import uses a two-phase approach: all rows are parsed and validated first, then each validated
|
||||
/// entity is saved individually so that a single bad row does not roll back the entire batch.
|
||||
/// Duplicate detection uses a three-tier strategy:
|
||||
/// Tier 1 — email address (case-insensitive): checked against DB and within the batch; row is skipped.
|
||||
/// Tier 2 — normalised phone number (last 10 digits of MobilePhone, then Phone): used only when email
|
||||
/// is absent; checked against DB and within the batch; row is skipped.
|
||||
/// Tier 3 — FirstName + LastName: used only when both email and phone are absent; emits a warning
|
||||
/// but still imports the row because name collisions across unrelated people are common.
|
||||
/// Duplicate detection uses a three-tier strategy, each tier only engaged when the previous
|
||||
/// identifier is absent:
|
||||
/// Tier 1 — email address (case-insensitive): if email is present and matches a DB record or
|
||||
/// earlier batch row the row is skipped.
|
||||
/// Tier 2 — name + normalised phone composite: used when email is absent. Combining name with
|
||||
/// phone prevents false positives when two people share a number (e.g. a family).
|
||||
/// Row is skipped on match.
|
||||
/// Tier 3 — name + city/state/zip composite: used when both email and phone are absent.
|
||||
/// Location data is imprecise so this emits a warning but still imports the row.
|
||||
/// Pricing tiers are resolved by tier name; an unrecognised name is demoted to a warning and the
|
||||
/// customer is imported without a tier rather than being skipped entirely.
|
||||
/// Contact names are split on the first space into FirstName / LastName because the CSV carries a
|
||||
@@ -428,13 +431,35 @@ public class CsvImportService : ICsvImportService
|
||||
var existingEmails = existingCustomers.Where(c => !string.IsNullOrEmpty(c.Email))
|
||||
.ToDictionary(c => c.Email!.ToLower(), c => c, StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
// Tier 2 lookup: normalised phone → existing customer (prefer MobilePhone, fall back to Phone)
|
||||
var existingPhones = new Dictionary<string, Customer>(StringComparer.Ordinal);
|
||||
// Tier 2 lookup: (normalised phone + "|" + display name) → existing customer.
|
||||
// Combining name with phone avoids false positives when two people share a number.
|
||||
var existingByPhoneAndName = new Dictionary<string, Customer>(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var c in existingCustomers)
|
||||
{
|
||||
var phone = NormalizePhone(c.MobilePhone) ?? NormalizePhone(c.Phone);
|
||||
if (phone != null && !existingPhones.ContainsKey(phone))
|
||||
existingPhones[phone] = c;
|
||||
if (phone == null) continue;
|
||||
var name = string.IsNullOrWhiteSpace(c.CompanyName)
|
||||
? $"{c.ContactFirstName} {c.ContactLastName}".Trim()
|
||||
: c.CompanyName;
|
||||
var key = $"{phone}|{name}";
|
||||
if (!existingByPhoneAndName.ContainsKey(key))
|
||||
existingByPhoneAndName[key] = c;
|
||||
}
|
||||
|
||||
// Tier 3 lookup: (display name + "|" + city + "|" + state + "|" + zip) → existing customer.
|
||||
// Only populated when a customer has at least one location field so the key isn't trivially blank.
|
||||
var existingByNameAndLocation = new Dictionary<string, Customer>(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var c in existingCustomers)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(c.City) && string.IsNullOrWhiteSpace(c.State) && string.IsNullOrWhiteSpace(c.ZipCode))
|
||||
continue;
|
||||
var name = string.IsNullOrWhiteSpace(c.CompanyName)
|
||||
? $"{c.ContactFirstName} {c.ContactLastName}".Trim()
|
||||
: c.CompanyName;
|
||||
if (string.IsNullOrWhiteSpace(name)) continue;
|
||||
var key = $"{name}|{c.City}|{c.State}|{c.ZipCode}";
|
||||
if (!existingByNameAndLocation.ContainsKey(key))
|
||||
existingByNameAndLocation[key] = c;
|
||||
}
|
||||
|
||||
// Get pricing tiers for lookup
|
||||
@@ -444,9 +469,9 @@ public class CsvImportService : ICsvImportService
|
||||
var customersToImport = new List<(int RowNumber, Customer Customer, string Email)>();
|
||||
|
||||
// Within-batch tracking sets (prevent duplicate detection against rows already queued)
|
||||
var batchEmails = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
var batchPhones = new HashSet<string>(StringComparer.Ordinal);
|
||||
var batchNames = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
var batchEmails = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
var batchPhoneAndName = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
var batchNameAndLocation = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var record in records)
|
||||
{
|
||||
@@ -472,7 +497,13 @@ public class CsvImportService : ICsvImportService
|
||||
cleanCompanyName = derivedName;
|
||||
}
|
||||
|
||||
// --- Tier 1: email dedup (primary key) ---
|
||||
// Canonical display name used as part of composite keys in Tiers 2 and 3
|
||||
var displayName = string.IsNullOrWhiteSpace(cleanCompanyName)
|
||||
? $"{firstName} {lastName}".Trim()
|
||||
: cleanCompanyName;
|
||||
|
||||
// --- Tier 1: email dedup ---
|
||||
// Only engaged when the row has an email address.
|
||||
if (!string.IsNullOrEmpty(cleanEmail))
|
||||
{
|
||||
if (existingEmails.ContainsKey(cleanEmail.ToLower()))
|
||||
@@ -490,34 +521,42 @@ public class CsvImportService : ICsvImportService
|
||||
}
|
||||
else
|
||||
{
|
||||
// --- Tier 2: phone dedup (when email is absent) ---
|
||||
// NormalizePhone strips to digits-only and returns the last 10, so formatting
|
||||
// differences like (423) 331-9834 vs 423-331-9834 are treated as the same number.
|
||||
// --- Tier 2: name + phone composite dedup (email absent) ---
|
||||
// Requiring both name and phone to match avoids false positives when two
|
||||
// unrelated people happen to share a phone number (e.g. a shared office line).
|
||||
var normalizedPhone = NormalizePhone(record.MobilePhone) ?? NormalizePhone(record.Phone);
|
||||
|
||||
if (normalizedPhone != null)
|
||||
{
|
||||
if (existingPhones.TryGetValue(normalizedPhone, out var existingByPhone))
|
||||
var phoneNameKey = $"{normalizedPhone}|{displayName}";
|
||||
if (existingByPhoneAndName.TryGetValue(phoneNameKey, out var existingMatch))
|
||||
{
|
||||
result.Warnings.Add($"Row {rowNumber}: Customer '{cleanCompanyName}' has no email; phone '{normalizedPhone}' already belongs to existing customer '{existingByPhone.CompanyName}'. Skipping.");
|
||||
result.Warnings.Add($"Row {rowNumber}: '{displayName}' has no email; name + phone matches existing customer '{existingMatch.CompanyName}'. Skipping.");
|
||||
result.SkippedCount++;
|
||||
continue;
|
||||
}
|
||||
if (batchPhones.Contains(normalizedPhone))
|
||||
if (batchPhoneAndName.Contains(phoneNameKey))
|
||||
{
|
||||
result.Warnings.Add($"Row {rowNumber}: Customer '{cleanCompanyName}' has no email; duplicate phone '{normalizedPhone}' found in import file. Skipping.");
|
||||
result.Warnings.Add($"Row {rowNumber}: '{displayName}' has no email; duplicate name + phone found in import file. Skipping.");
|
||||
result.SkippedCount++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// --- Tier 3: name warning (no email, no phone — import anyway, flag for review) ---
|
||||
var nameKey = $"{firstName}|{lastName}".ToLowerInvariant();
|
||||
var hasName = !string.IsNullOrWhiteSpace(firstName) || !string.IsNullOrWhiteSpace(lastName);
|
||||
if (hasName && batchNames.Contains(nameKey))
|
||||
// --- Tier 3: name + location composite warning (no email, no phone) ---
|
||||
// Location data is imprecise so we warn but still import — a name + city
|
||||
// collision across unrelated people is plausible enough not to hard-skip.
|
||||
var city = record.City?.Trim();
|
||||
var state = record.State?.Trim();
|
||||
var zip = record.ZipCode?.Trim();
|
||||
var hasLocation = !string.IsNullOrWhiteSpace(city) || !string.IsNullOrWhiteSpace(state) || !string.IsNullOrWhiteSpace(zip);
|
||||
if (hasLocation && !string.IsNullOrWhiteSpace(displayName))
|
||||
{
|
||||
result.Warnings.Add($"Row {rowNumber}: Customer '{firstName} {lastName}'.Trim() has no email or phone and shares a name with another row in the import file. Imported anyway — verify manually.");
|
||||
var locationKey = $"{displayName}|{city}|{state}|{zip}";
|
||||
if (existingByNameAndLocation.ContainsKey(locationKey) || batchNameAndLocation.Contains(locationKey))
|
||||
{
|
||||
result.Warnings.Add($"Row {rowNumber}: '{displayName}' has no email or phone; name + location matches an existing record. Imported anyway — verify manually.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -577,12 +616,17 @@ public class CsvImportService : ICsvImportService
|
||||
{
|
||||
var normalizedPhone = NormalizePhone(record.MobilePhone) ?? NormalizePhone(record.Phone);
|
||||
if (normalizedPhone != null)
|
||||
batchPhones.Add(normalizedPhone);
|
||||
{
|
||||
batchPhoneAndName.Add($"{normalizedPhone}|{displayName}");
|
||||
}
|
||||
else
|
||||
{
|
||||
var nameKey = $"{firstName}|{lastName}".ToLowerInvariant();
|
||||
if (!string.IsNullOrWhiteSpace(nameKey.Replace("|", "")))
|
||||
batchNames.Add(nameKey);
|
||||
var city = record.City?.Trim();
|
||||
var state = record.State?.Trim();
|
||||
var zip = record.ZipCode?.Trim();
|
||||
var hasLocation = !string.IsNullOrWhiteSpace(city) || !string.IsNullOrWhiteSpace(state) || !string.IsNullOrWhiteSpace(zip);
|
||||
if (hasLocation && !string.IsNullOrWhiteSpace(displayName))
|
||||
batchNameAndLocation.Add($"{displayName}|{city}|{state}|{zip}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user