Fix customer import dedup: use composite keys, not exclusive tiers

New logic:
  Tier 1 - email present: email match -> skip (unchanged)
  Tier 2 - email absent + phone present: name + phone composite -> skip
  Tier 3 - email and phone absent: name + city/state/zip composite -> warn, import anyway

Tier 2 requires BOTH name and phone to match so two people sharing an
office line don't falsely collide. Tier 3 warns but imports because
location data is too imprecise to hard-skip on.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 13:02:10 -04:00
parent 8f955851e5
commit be89327c01
@@ -389,12 +389,15 @@ public class CsvImportService : ICsvImportService
/// Imports customers from a CSV stream and persists valid rows to the database for the given company.
/// The import uses a two-phase approach: all rows are parsed and validated first, then each validated
/// entity is saved individually so that a single bad row does not roll back the entire batch.
/// Duplicate detection uses a three-tier strategy:
/// Tier 1 — email address (case-insensitive): checked against DB and within the batch; row is skipped.
/// Tier 2normalised phone number (last 10 digits of MobilePhone, then Phone): used only when email
/// is absent; checked against DB and within the batch; row is skipped.
/// Tier 3FirstName + LastName: used only when both email and phone are absent; emits a warning
/// but still imports the row because name collisions across unrelated people are common.
/// Duplicate detection uses a three-tier strategy, each tier only engaged when the previous
/// identifier is absent:
/// Tier 1email address (case-insensitive): if email is present and matches a DB record or
/// earlier batch row the row is skipped.
/// Tier 2name + normalised phone composite: used when email is absent. Combining name with
/// phone prevents false positives when two people share a number (e.g. a family).
/// Row is skipped on match.
/// Tier 3 — name + city/state/zip composite: used when both email and phone are absent.
/// Location data is imprecise so this emits a warning but still imports the row.
/// Pricing tiers are resolved by tier name; an unrecognised name is demoted to a warning and the
/// customer is imported without a tier rather than being skipped entirely.
/// Contact names are split on the first space into FirstName / LastName because the CSV carries a
@@ -428,13 +431,35 @@ public class CsvImportService : ICsvImportService
var existingEmails = existingCustomers.Where(c => !string.IsNullOrEmpty(c.Email))
.ToDictionary(c => c.Email!.ToLower(), c => c, StringComparer.OrdinalIgnoreCase);
// Tier 2 lookup: normalised phone → existing customer (prefer MobilePhone, fall back to Phone)
var existingPhones = new Dictionary<string, Customer>(StringComparer.Ordinal);
// Tier 2 lookup: (normalised phone + "|" + display name) → existing customer.
// Combining name with phone avoids false positives when two people share a number.
var existingByPhoneAndName = new Dictionary<string, Customer>(StringComparer.OrdinalIgnoreCase);
foreach (var c in existingCustomers)
{
var phone = NormalizePhone(c.MobilePhone) ?? NormalizePhone(c.Phone);
if (phone != null && !existingPhones.ContainsKey(phone))
existingPhones[phone] = c;
if (phone == null) continue;
var name = string.IsNullOrWhiteSpace(c.CompanyName)
? $"{c.ContactFirstName} {c.ContactLastName}".Trim()
: c.CompanyName;
var key = $"{phone}|{name}";
if (!existingByPhoneAndName.ContainsKey(key))
existingByPhoneAndName[key] = c;
}
// Tier 3 lookup: (display name + "|" + city + "|" + state + "|" + zip) → existing customer.
// Only populated when a customer has at least one location field so the key isn't trivially blank.
var existingByNameAndLocation = new Dictionary<string, Customer>(StringComparer.OrdinalIgnoreCase);
foreach (var c in existingCustomers)
{
if (string.IsNullOrWhiteSpace(c.City) && string.IsNullOrWhiteSpace(c.State) && string.IsNullOrWhiteSpace(c.ZipCode))
continue;
var name = string.IsNullOrWhiteSpace(c.CompanyName)
? $"{c.ContactFirstName} {c.ContactLastName}".Trim()
: c.CompanyName;
if (string.IsNullOrWhiteSpace(name)) continue;
var key = $"{name}|{c.City}|{c.State}|{c.ZipCode}";
if (!existingByNameAndLocation.ContainsKey(key))
existingByNameAndLocation[key] = c;
}
// Get pricing tiers for lookup
@@ -444,9 +469,9 @@ public class CsvImportService : ICsvImportService
var customersToImport = new List<(int RowNumber, Customer Customer, string Email)>();
// Within-batch tracking sets (prevent duplicate detection against rows already queued)
var batchEmails = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var batchPhones = new HashSet<string>(StringComparer.Ordinal);
var batchNames = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var batchEmails = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var batchPhoneAndName = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var batchNameAndLocation = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var record in records)
{
@@ -472,7 +497,13 @@ public class CsvImportService : ICsvImportService
cleanCompanyName = derivedName;
}
// --- Tier 1: email dedup (primary key) ---
// Canonical display name used as part of composite keys in Tiers 2 and 3
var displayName = string.IsNullOrWhiteSpace(cleanCompanyName)
? $"{firstName} {lastName}".Trim()
: cleanCompanyName;
// --- Tier 1: email dedup ---
// Only engaged when the row has an email address.
if (!string.IsNullOrEmpty(cleanEmail))
{
if (existingEmails.ContainsKey(cleanEmail.ToLower()))
@@ -490,34 +521,42 @@ public class CsvImportService : ICsvImportService
}
else
{
// --- Tier 2: phone dedup (when email is absent) ---
// NormalizePhone strips to digits-only and returns the last 10, so formatting
// differences like (423) 331-9834 vs 423-331-9834 are treated as the same number.
// --- Tier 2: name + phone composite dedup (email absent) ---
// Requiring both name and phone to match avoids false positives when two
// unrelated people happen to share a phone number (e.g. a shared office line).
var normalizedPhone = NormalizePhone(record.MobilePhone) ?? NormalizePhone(record.Phone);
if (normalizedPhone != null)
{
if (existingPhones.TryGetValue(normalizedPhone, out var existingByPhone))
var phoneNameKey = $"{normalizedPhone}|{displayName}";
if (existingByPhoneAndName.TryGetValue(phoneNameKey, out var existingMatch))
{
result.Warnings.Add($"Row {rowNumber}: Customer '{cleanCompanyName}' has no email; phone '{normalizedPhone}' already belongs to existing customer '{existingByPhone.CompanyName}'. Skipping.");
result.Warnings.Add($"Row {rowNumber}: '{displayName}' has no email; name + phone matches existing customer '{existingMatch.CompanyName}'. Skipping.");
result.SkippedCount++;
continue;
}
if (batchPhones.Contains(normalizedPhone))
if (batchPhoneAndName.Contains(phoneNameKey))
{
result.Warnings.Add($"Row {rowNumber}: Customer '{cleanCompanyName}' has no email; duplicate phone '{normalizedPhone}' found in import file. Skipping.");
result.Warnings.Add($"Row {rowNumber}: '{displayName}' has no email; duplicate name + phone found in import file. Skipping.");
result.SkippedCount++;
continue;
}
}
else
{
// --- Tier 3: name warning (no email, no phone — import anyway, flag for review) ---
var nameKey = $"{firstName}|{lastName}".ToLowerInvariant();
var hasName = !string.IsNullOrWhiteSpace(firstName) || !string.IsNullOrWhiteSpace(lastName);
if (hasName && batchNames.Contains(nameKey))
// --- Tier 3: name + location composite warning (no email, no phone) ---
// Location data is imprecise so we warn but still import — a name + city
// collision across unrelated people is plausible enough not to hard-skip.
var city = record.City?.Trim();
var state = record.State?.Trim();
var zip = record.ZipCode?.Trim();
var hasLocation = !string.IsNullOrWhiteSpace(city) || !string.IsNullOrWhiteSpace(state) || !string.IsNullOrWhiteSpace(zip);
if (hasLocation && !string.IsNullOrWhiteSpace(displayName))
{
result.Warnings.Add($"Row {rowNumber}: Customer '{firstName} {lastName}'.Trim() has no email or phone and shares a name with another row in the import file. Imported anyway — verify manually.");
var locationKey = $"{displayName}|{city}|{state}|{zip}";
if (existingByNameAndLocation.ContainsKey(locationKey) || batchNameAndLocation.Contains(locationKey))
{
result.Warnings.Add($"Row {rowNumber}: '{displayName}' has no email or phone; name + location matches an existing record. Imported anyway — verify manually.");
}
}
}
}
@@ -577,12 +616,17 @@ public class CsvImportService : ICsvImportService
{
var normalizedPhone = NormalizePhone(record.MobilePhone) ?? NormalizePhone(record.Phone);
if (normalizedPhone != null)
batchPhones.Add(normalizedPhone);
{
batchPhoneAndName.Add($"{normalizedPhone}|{displayName}");
}
else
{
var nameKey = $"{firstName}|{lastName}".ToLowerInvariant();
if (!string.IsNullOrWhiteSpace(nameKey.Replace("|", "")))
batchNames.Add(nameKey);
var city = record.City?.Trim();
var state = record.State?.Trim();
var zip = record.ZipCode?.Trim();
var hasLocation = !string.IsNullOrWhiteSpace(city) || !string.IsNullOrWhiteSpace(state) || !string.IsNullOrWhiteSpace(zip);
if (hasLocation && !string.IsNullOrWhiteSpace(displayName))
batchNameAndLocation.Add($"{displayName}|{city}|{state}|{zip}");
}
}
}