# Fix-Encoding.ps1 # Replaces corrupted UTF-8-decoded-as-Windows-1252 characters with HTML entities # in all .cshtml files under src/PowderCoating.Web/Views/. # # Each corruption pattern is built from exact Unicode code points at runtime so # this script file itself never contains a literal non-ASCII character. # Uses [System.IO.File]::ReadAllText / WriteAllText with explicit UTF-8 (no BOM) # to avoid PowerShell 5.1's default ANSI encoding on Get-Content / Set-Content. # # Usage: # .\tools\Fix-Encoding.ps1 # fix all .cshtml files # .\tools\Fix-Encoding.ps1 -DryRun # report what would change, write nothing param( [switch]$DryRun ) $utf8NoBom = New-Object System.Text.UTF8Encoding $false # --------------------------------------------------------------------------- # Corruption map — each entry is the 3-char corrupt sequence (as .NET strings) # and the HTML entity to replace it with. # # How the corruption happens: # UTF-8 bytes for a Unicode char are misread as Windows-1252, producing 3 # garbled characters. Those garbled chars are then saved back as UTF-8, # so the file now contains their UTF-8 encoding instead of the original char. # # Pattern construction: # Original char UTF-8 bytes Win-1252 chars (Unicode) Corrupt string # em dash E2 80 94 U+00E2 U+20AC U+201D a + euro + rdquo # en dash E2 80 93 U+00E2 U+20AC U+201C a + euro + ldquo # ellipsis E2 80 A6 U+00E2 U+20AC U+00A6 a + euro + brvbar # lsaquo E2 80 B9 U+00E2 U+20AC U+00B9 a + euro + sup1 # rsaquo E2 80 BA U+00E2 U+20AC U+00BA a + euro + ordm # box horiz E2 94 80 U+00E2 U+201D U+20AC a + rdquo + euro # lsquo E2 80 98 U+00E2 U+20AC U+02DC a + euro + tilde # rsquo E2 80 99 U+00E2 U+20AC U+2122 a + euro + trade # ldquo E2 80 9C U+00E2 U+20AC U+0153 a + euro + oelig # bullet E2 80 A2 U+00E2 U+20AC U+00A2 a + euro + cent # --------------------------------------------------------------------------- $a = [char]0x00E2 # a with circumflex $euro = [char]0x20AC # euro sign $rdq = [char]0x201D # right double quotation mark $ldq = [char]0x201C # left double quotation mark $brvb = [char]0x00A6 # broken bar $sup1 = [char]0x00B9 # superscript one $ordm = [char]0x00BA # masculine ordinal indicator $tilde = [char]0x02DC # small tilde $trade = [char]0x2122 # trade mark sign $oelig = [char]0x0153 # latin small letter oe $cent = [char]0x00A2 # cent sign $replacements = @( # -- 3-char â€-corruption sequences (UTF-8 bytes misread as Windows-1252) ---- @{ Find = "$a$euro$rdq"; Replace = '—' } # em dash U+2014 @{ Find = "$a$euro$ldq"; Replace = '–' } # en dash U+2013 @{ Find = "$a$euro$brvb"; Replace = '…' } # ellipsis U+2026 @{ Find = "$a$euro$sup1"; Replace = '‹' } # lsaquo U+2039 @{ Find = "$a$euro$ordm"; Replace = '›' } # rsaquo U+203A @{ Find = "$a$rdq$euro"; Replace = '-' } # box horiz U+2500 @{ Find = "$a$euro$tilde"; Replace = "'" } # lsquo U+2018 @{ Find = "$a$euro$trade"; Replace = "'" } # rsquo U+2019 @{ Find = "$a$euro$oelig"; Replace = '“' } # ldquo U+201C @{ Find = "$a$euro$cent"; Replace = '•' } # bullet U+2022 # -- Partially-re-corrupted variants (3rd byte already normalised to ASCII) -- # When an AI tool "fixed" the closing curly quote to ASCII before our script ran, # the corruption sequence ends with a plain ASCII double-quote (U+0022) or # single-quote (U+0027) instead of U+201C/201D. These won't match the 3-char # patterns above so they need their own entries. @{ Find = "$a$euro`""; Replace = '—' } # ae + ASCII double-quote @{ Find = "$a$euro'"; Replace = '—' } # ae + ASCII single-quote (rare) # -- Right arrow (U+2192) corruption: UTF-8 E2 86 92 read as Win-1252 ----------- # 0x86 = dagger (U+2020), 0x92 = right single quote (U+2019) # In JS comments we can safely render as -> @{ Find = [string][char]0x00E2 + [char]0x2020 + [char]0x2019; Replace = '->' } # -> right arrow # -- Standalone smart/curly quotes introduced by AI tools or paste ---------- # These have no place in .cshtml files. Curly double quotes break C# Razor # expressions; replace with ASCII. Em/en dashes in HTML → entities. @{ Find = [string][char]0x201C; Replace = '"' } # left double quote → ASCII " @{ Find = [string][char]0x201D; Replace = '"' } # right double quote → ASCII " @{ Find = [string][char]0x2018; Replace = "'" } # left single quote → ASCII ' @{ Find = [string][char]0x2019; Replace = "'" } # right single quote → ASCII ' @{ Find = [string][char]0x2014; Replace = '—' } # em dash (bare) @{ Find = [string][char]0x2013; Replace = '–' } # en dash (bare) @{ Find = [string][char]0x2026; Replace = '…' } # ellipsis (bare) ) $viewsPath = Join-Path $PSScriptRoot "..\src\PowderCoating.Web\Views" $viewsPath = [System.IO.Path]::GetFullPath($viewsPath) if (-not (Test-Path $viewsPath)) { Write-Error "Views directory not found: $viewsPath" exit 1 } $files = Get-ChildItem -Path $viewsPath -Filter "*.cshtml" -Recurse $changedCount = 0 foreach ($file in $files) { $content = [System.IO.File]::ReadAllText($file.FullName, [System.Text.Encoding]::UTF8) $updated = $content foreach ($r in $replacements) { $updated = $updated.Replace($r.Find, $r.Replace) } if ($updated -ne $content) { $rel = $file.FullName.Replace($viewsPath, 'Views') if ($DryRun) { Write-Host "[DRY RUN] Would fix: $rel" -ForegroundColor Yellow } else { [System.IO.File]::WriteAllText($file.FullName, $updated, $utf8NoBom) Write-Host "Fixed: $rel" -ForegroundColor Green } $changedCount++ } } $verb = if ($DryRun) { "would be updated" } else { "updated" } Write-Host "`nDone. $changedCount file(s) $verb." -ForegroundColor Cyan