Files
spouliot a0bdd2b5b4 Sweep all .cshtml files for encoding corruption; add pre-commit guard
Replace all corruption variants with HTML entities across 226 view files:
- 3-char UTF-8-as-Win1252 sequences (ae-corruption)
- Standalone smart/curly quotes that break C# Razor expressions
- Partially re-corrupted variants where the 3rd byte was normalised to ASCII

tools/Fix-Encoding.ps1: re-runnable sweep; uses [char] code points so the
script itself never contains a literal non-ASCII character; supports -DryRun

.githooks/pre-commit: blocks commits containing the ae-corruption byte
signature (xc3xa2xe2x82xac); git core.hooksPath = .githooks so the
hook is repo-committed and active for all future work on this machine.

Build clean; 225 unit tests pass.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-20 21:37:10 -04:00

126 lines
6.2 KiB
PowerShell

# Fix-Encoding.ps1
# Replaces corrupted UTF-8-decoded-as-Windows-1252 characters with HTML entities
# in all .cshtml files under src/PowderCoating.Web/Views/.
#
# Each corruption pattern is built from exact Unicode code points at runtime so
# this script file itself never contains a literal non-ASCII character.
# Uses [System.IO.File]::ReadAllText / WriteAllText with explicit UTF-8 (no BOM)
# to avoid PowerShell 5.1's default ANSI encoding on Get-Content / Set-Content.
#
# Usage:
# .\tools\Fix-Encoding.ps1 # fix all .cshtml files
# .\tools\Fix-Encoding.ps1 -DryRun # report what would change, write nothing
param(
[switch]$DryRun
)
$utf8NoBom = New-Object System.Text.UTF8Encoding $false
# ---------------------------------------------------------------------------
# Corruption map — each entry is the 3-char corrupt sequence (as .NET strings)
# and the HTML entity to replace it with.
#
# How the corruption happens:
# UTF-8 bytes for a Unicode char are misread as Windows-1252, producing 3
# garbled characters. Those garbled chars are then saved back as UTF-8,
# so the file now contains their UTF-8 encoding instead of the original char.
#
# Pattern construction:
# Original char UTF-8 bytes Win-1252 chars (Unicode) Corrupt string
# em dash E2 80 94 U+00E2 U+20AC U+201D a + euro + rdquo
# en dash E2 80 93 U+00E2 U+20AC U+201C a + euro + ldquo
# ellipsis E2 80 A6 U+00E2 U+20AC U+00A6 a + euro + brvbar
# lsaquo E2 80 B9 U+00E2 U+20AC U+00B9 a + euro + sup1
# rsaquo E2 80 BA U+00E2 U+20AC U+00BA a + euro + ordm
# box horiz E2 94 80 U+00E2 U+201D U+20AC a + rdquo + euro
# lsquo E2 80 98 U+00E2 U+20AC U+02DC a + euro + tilde
# rsquo E2 80 99 U+00E2 U+20AC U+2122 a + euro + trade
# ldquo E2 80 9C U+00E2 U+20AC U+0153 a + euro + oelig
# bullet E2 80 A2 U+00E2 U+20AC U+00A2 a + euro + cent
# ---------------------------------------------------------------------------
$a = [char]0x00E2 # a with circumflex
$euro = [char]0x20AC # euro sign
$rdq = [char]0x201D # right double quotation mark
$ldq = [char]0x201C # left double quotation mark
$brvb = [char]0x00A6 # broken bar
$sup1 = [char]0x00B9 # superscript one
$ordm = [char]0x00BA # masculine ordinal indicator
$tilde = [char]0x02DC # small tilde
$trade = [char]0x2122 # trade mark sign
$oelig = [char]0x0153 # latin small letter oe
$cent = [char]0x00A2 # cent sign
$replacements = @(
# -- 3-char â€-corruption sequences (UTF-8 bytes misread as Windows-1252) ----
@{ Find = "$a$euro$rdq"; Replace = '&mdash;' } # em dash U+2014
@{ Find = "$a$euro$ldq"; Replace = '&ndash;' } # en dash U+2013
@{ Find = "$a$euro$brvb"; Replace = '&hellip;' } # ellipsis U+2026
@{ Find = "$a$euro$sup1"; Replace = '&lsaquo;' } # lsaquo U+2039
@{ Find = "$a$euro$ordm"; Replace = '&rsaquo;' } # rsaquo U+203A
@{ Find = "$a$rdq$euro"; Replace = '-' } # box horiz U+2500
@{ Find = "$a$euro$tilde"; Replace = "'" } # lsquo U+2018
@{ Find = "$a$euro$trade"; Replace = "'" } # rsquo U+2019
@{ Find = "$a$euro$oelig"; Replace = '&ldquo;' } # ldquo U+201C
@{ Find = "$a$euro$cent"; Replace = '&bull;' } # bullet U+2022
# -- Partially-re-corrupted variants (3rd byte already normalised to ASCII) --
# When an AI tool "fixed" the closing curly quote to ASCII before our script ran,
# the corruption sequence ends with a plain ASCII double-quote (U+0022) or
# single-quote (U+0027) instead of U+201C/201D. These won't match the 3-char
# patterns above so they need their own entries.
@{ Find = "$a$euro`""; Replace = '&mdash;' } # ae + ASCII double-quote
@{ Find = "$a$euro'"; Replace = '&mdash;' } # ae + ASCII single-quote (rare)
# -- Right arrow (U+2192) corruption: UTF-8 E2 86 92 read as Win-1252 -----------
# 0x86 = dagger (U+2020), 0x92 = right single quote (U+2019)
# In JS comments we can safely render as ->
@{ Find = [string][char]0x00E2 + [char]0x2020 + [char]0x2019; Replace = '->' } # -> right arrow
# -- Standalone smart/curly quotes introduced by AI tools or paste ----------
# These have no place in .cshtml files. Curly double quotes break C# Razor
# expressions; replace with ASCII. Em/en dashes in HTML → entities.
@{ Find = [string][char]0x201C; Replace = '"' } # left double quote → ASCII "
@{ Find = [string][char]0x201D; Replace = '"' } # right double quote → ASCII "
@{ Find = [string][char]0x2018; Replace = "'" } # left single quote → ASCII '
@{ Find = [string][char]0x2019; Replace = "'" } # right single quote → ASCII '
@{ Find = [string][char]0x2014; Replace = '&mdash;' } # em dash (bare)
@{ Find = [string][char]0x2013; Replace = '&ndash;' } # en dash (bare)
@{ Find = [string][char]0x2026; Replace = '&hellip;' } # ellipsis (bare)
)
$viewsPath = Join-Path $PSScriptRoot "..\src\PowderCoating.Web\Views"
$viewsPath = [System.IO.Path]::GetFullPath($viewsPath)
if (-not (Test-Path $viewsPath)) {
Write-Error "Views directory not found: $viewsPath"
exit 1
}
$files = Get-ChildItem -Path $viewsPath -Filter "*.cshtml" -Recurse
$changedCount = 0
foreach ($file in $files) {
$content = [System.IO.File]::ReadAllText($file.FullName, [System.Text.Encoding]::UTF8)
$updated = $content
foreach ($r in $replacements) {
$updated = $updated.Replace($r.Find, $r.Replace)
}
if ($updated -ne $content) {
$rel = $file.FullName.Replace($viewsPath, 'Views')
if ($DryRun) {
Write-Host "[DRY RUN] Would fix: $rel" -ForegroundColor Yellow
} else {
[System.IO.File]::WriteAllText($file.FullName, $updated, $utf8NoBom)
Write-Host "Fixed: $rel" -ForegroundColor Green
}
$changedCount++
}
}
$verb = if ($DryRun) { "would be updated" } else { "updated" }
Write-Host "`nDone. $changedCount file(s) $verb." -ForegroundColor Cyan