#!/bin/sh
# Pre-commit hook: block commits containing corrupted Unicode in .cshtml files.
#
# All corruption variants start with the UTF-8 byte sequence for a-circumflex
# followed by euro-sign (bytes C3 A2 E2 82 AC), which is the first two chars
# of every known corruption pattern. Grep for that byte sequence in staged files.

STAGED=$(git diff --cached --name-only | grep '\.cshtml$')
if [ -z "$STAGED" ]; then
    exit 0
fi

# $'\xc3\xa2\xe2\x82\xac' = UTF-8 bytes for a-circumflex + euro-sign
CORRUPT=$(echo "$STAGED" | xargs grep -l $'\xc3\xa2\xe2\x82\xac' 2>/dev/null)

if [ -n "$CORRUPT" ]; then
    echo ""
    echo "ERROR: Corrupted Unicode characters detected in staged .cshtml files:"
    echo "$CORRUPT" | sed 's/^/  /'
    echo ""
    echo "Fix by running:  .\\tools\\Fix-Encoding.ps1"
    echo "Then re-stage the files and commit again."
    echo ""
    exit 1
fi

exit 0
