a0bdd2b5b4
Replace all corruption variants with HTML entities across 226 view files: - 3-char UTF-8-as-Win1252 sequences (ae-corruption) - Standalone smart/curly quotes that break C# Razor expressions - Partially re-corrupted variants where the 3rd byte was normalised to ASCII tools/Fix-Encoding.ps1: re-runnable sweep; uses [char] code points so the script itself never contains a literal non-ASCII character; supports -DryRun .githooks/pre-commit: blocks commits containing the ae-corruption byte signature (xc3xa2xe2x82xac); git core.hooksPath = .githooks so the hook is repo-committed and active for all future work on this machine. Build clean; 225 unit tests pass. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
126 lines
6.2 KiB
PowerShell
126 lines
6.2 KiB
PowerShell
# Fix-Encoding.ps1
|
|
# Replaces corrupted UTF-8-decoded-as-Windows-1252 characters with HTML entities
|
|
# in all .cshtml files under src/PowderCoating.Web/Views/.
|
|
#
|
|
# Each corruption pattern is built from exact Unicode code points at runtime so
|
|
# this script file itself never contains a literal non-ASCII character.
|
|
# Uses [System.IO.File]::ReadAllText / WriteAllText with explicit UTF-8 (no BOM)
|
|
# to avoid PowerShell 5.1's default ANSI encoding on Get-Content / Set-Content.
|
|
#
|
|
# Usage:
|
|
# .\tools\Fix-Encoding.ps1 # fix all .cshtml files
|
|
# .\tools\Fix-Encoding.ps1 -DryRun # report what would change, write nothing
|
|
|
|
param(
|
|
[switch]$DryRun
|
|
)
|
|
|
|
$utf8NoBom = New-Object System.Text.UTF8Encoding $false
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Corruption map — each entry is the 3-char corrupt sequence (as .NET strings)
|
|
# and the HTML entity to replace it with.
|
|
#
|
|
# How the corruption happens:
|
|
# UTF-8 bytes for a Unicode char are misread as Windows-1252, producing 3
|
|
# garbled characters. Those garbled chars are then saved back as UTF-8,
|
|
# so the file now contains their UTF-8 encoding instead of the original char.
|
|
#
|
|
# Pattern construction:
|
|
# Original char UTF-8 bytes Win-1252 chars (Unicode) Corrupt string
|
|
# em dash E2 80 94 U+00E2 U+20AC U+201D a + euro + rdquo
|
|
# en dash E2 80 93 U+00E2 U+20AC U+201C a + euro + ldquo
|
|
# ellipsis E2 80 A6 U+00E2 U+20AC U+00A6 a + euro + brvbar
|
|
# lsaquo E2 80 B9 U+00E2 U+20AC U+00B9 a + euro + sup1
|
|
# rsaquo E2 80 BA U+00E2 U+20AC U+00BA a + euro + ordm
|
|
# box horiz E2 94 80 U+00E2 U+201D U+20AC a + rdquo + euro
|
|
# lsquo E2 80 98 U+00E2 U+20AC U+02DC a + euro + tilde
|
|
# rsquo E2 80 99 U+00E2 U+20AC U+2122 a + euro + trade
|
|
# ldquo E2 80 9C U+00E2 U+20AC U+0153 a + euro + oelig
|
|
# bullet E2 80 A2 U+00E2 U+20AC U+00A2 a + euro + cent
|
|
# ---------------------------------------------------------------------------
|
|
|
|
$a = [char]0x00E2 # a with circumflex
|
|
$euro = [char]0x20AC # euro sign
|
|
$rdq = [char]0x201D # right double quotation mark
|
|
$ldq = [char]0x201C # left double quotation mark
|
|
$brvb = [char]0x00A6 # broken bar
|
|
$sup1 = [char]0x00B9 # superscript one
|
|
$ordm = [char]0x00BA # masculine ordinal indicator
|
|
$tilde = [char]0x02DC # small tilde
|
|
$trade = [char]0x2122 # trade mark sign
|
|
$oelig = [char]0x0153 # latin small letter oe
|
|
$cent = [char]0x00A2 # cent sign
|
|
|
|
$replacements = @(
|
|
# -- 3-char â€-corruption sequences (UTF-8 bytes misread as Windows-1252) ----
|
|
@{ Find = "$a$euro$rdq"; Replace = '—' } # em dash U+2014
|
|
@{ Find = "$a$euro$ldq"; Replace = '–' } # en dash U+2013
|
|
@{ Find = "$a$euro$brvb"; Replace = '…' } # ellipsis U+2026
|
|
@{ Find = "$a$euro$sup1"; Replace = '‹' } # lsaquo U+2039
|
|
@{ Find = "$a$euro$ordm"; Replace = '›' } # rsaquo U+203A
|
|
@{ Find = "$a$rdq$euro"; Replace = '-' } # box horiz U+2500
|
|
@{ Find = "$a$euro$tilde"; Replace = "'" } # lsquo U+2018
|
|
@{ Find = "$a$euro$trade"; Replace = "'" } # rsquo U+2019
|
|
@{ Find = "$a$euro$oelig"; Replace = '“' } # ldquo U+201C
|
|
@{ Find = "$a$euro$cent"; Replace = '•' } # bullet U+2022
|
|
|
|
# -- Partially-re-corrupted variants (3rd byte already normalised to ASCII) --
|
|
# When an AI tool "fixed" the closing curly quote to ASCII before our script ran,
|
|
# the corruption sequence ends with a plain ASCII double-quote (U+0022) or
|
|
# single-quote (U+0027) instead of U+201C/201D. These won't match the 3-char
|
|
# patterns above so they need their own entries.
|
|
@{ Find = "$a$euro`""; Replace = '—' } # ae + ASCII double-quote
|
|
@{ Find = "$a$euro'"; Replace = '—' } # ae + ASCII single-quote (rare)
|
|
|
|
# -- Right arrow (U+2192) corruption: UTF-8 E2 86 92 read as Win-1252 -----------
|
|
# 0x86 = dagger (U+2020), 0x92 = right single quote (U+2019)
|
|
# In JS comments we can safely render as ->
|
|
@{ Find = [string][char]0x00E2 + [char]0x2020 + [char]0x2019; Replace = '->' } # -> right arrow
|
|
|
|
# -- Standalone smart/curly quotes introduced by AI tools or paste ----------
|
|
# These have no place in .cshtml files. Curly double quotes break C# Razor
|
|
# expressions; replace with ASCII. Em/en dashes in HTML → entities.
|
|
@{ Find = [string][char]0x201C; Replace = '"' } # left double quote → ASCII "
|
|
@{ Find = [string][char]0x201D; Replace = '"' } # right double quote → ASCII "
|
|
@{ Find = [string][char]0x2018; Replace = "'" } # left single quote → ASCII '
|
|
@{ Find = [string][char]0x2019; Replace = "'" } # right single quote → ASCII '
|
|
@{ Find = [string][char]0x2014; Replace = '—' } # em dash (bare)
|
|
@{ Find = [string][char]0x2013; Replace = '–' } # en dash (bare)
|
|
@{ Find = [string][char]0x2026; Replace = '…' } # ellipsis (bare)
|
|
)
|
|
|
|
$viewsPath = Join-Path $PSScriptRoot "..\src\PowderCoating.Web\Views"
|
|
$viewsPath = [System.IO.Path]::GetFullPath($viewsPath)
|
|
|
|
if (-not (Test-Path $viewsPath)) {
|
|
Write-Error "Views directory not found: $viewsPath"
|
|
exit 1
|
|
}
|
|
|
|
$files = Get-ChildItem -Path $viewsPath -Filter "*.cshtml" -Recurse
|
|
$changedCount = 0
|
|
|
|
foreach ($file in $files) {
|
|
$content = [System.IO.File]::ReadAllText($file.FullName, [System.Text.Encoding]::UTF8)
|
|
$updated = $content
|
|
|
|
foreach ($r in $replacements) {
|
|
$updated = $updated.Replace($r.Find, $r.Replace)
|
|
}
|
|
|
|
if ($updated -ne $content) {
|
|
$rel = $file.FullName.Replace($viewsPath, 'Views')
|
|
if ($DryRun) {
|
|
Write-Host "[DRY RUN] Would fix: $rel" -ForegroundColor Yellow
|
|
} else {
|
|
[System.IO.File]::WriteAllText($file.FullName, $updated, $utf8NoBom)
|
|
Write-Host "Fixed: $rel" -ForegroundColor Green
|
|
}
|
|
$changedCount++
|
|
}
|
|
}
|
|
|
|
$verb = if ($DryRun) { "would be updated" } else { "updated" }
|
|
Write-Host "`nDone. $changedCount file(s) $verb." -ForegroundColor Cyan
|