|
| 1 | +#Requires -Version 3.0 |
| 2 | + |
| 3 | +function Measure-UseASCII { |
| 4 | +<# |
| 5 | + .SYNOPSIS |
| 6 | + Use UTF-8 Characters |
| 7 | + .DESCRIPTION |
| 8 | + Validates if only ASCII characters are used and reveal the position of any violation. |
| 9 | + .INPUTS |
| 10 | + [System.Management.Automation.Language.ScriptBlockAst] |
| 11 | + .OUTPUTS |
| 12 | + [Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.DiagnosticRecord] |
| 13 | +#> |
| 14 | + |
| 15 | + [CmdletBinding()] |
| 16 | + [OutputType([Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.DiagnosticRecord])] |
| 17 | + Param ( |
| 18 | + [Parameter(Mandatory = $true)] |
| 19 | + [ValidateNotNullOrEmpty()] |
| 20 | + [System.Management.Automation.Language.ScriptBlockAst] |
| 21 | + $ScriptBlockAst |
| 22 | + ) |
| 23 | + Begin { |
| 24 | + function GetNonASCIIPositions ([String]$Text) { |
| 25 | + $LF = [Char]0x0A |
| 26 | + $DEL = [Char]0x7F |
| 27 | + $LineNumber = 1; $ColumnNumber = 1 |
| 28 | + for ($Offset = 0; $Offset -lt $Text.Length; $Offset++) { |
| 29 | + $Character = $Text[$Offset] |
| 30 | + if ($Character -eq $Lf) { |
| 31 | + $LineNumber++ |
| 32 | + $ColumnNumber = 0 |
| 33 | + } |
| 34 | + else { |
| 35 | + $ColumnNumber++ |
| 36 | + if ($Character -gt $Del) { |
| 37 | + [PSCustomObject]@{ |
| 38 | + Character = $Character |
| 39 | + Offset = $Offset |
| 40 | + LineNumber = $LineNumber |
| 41 | + ColumnNumber = $ColumnNumber |
| 42 | + } |
| 43 | + } |
| 44 | + } |
| 45 | + } |
| 46 | + } |
| 47 | + |
| 48 | + function CharToHex([Char]$Char) { |
| 49 | + ([Int][Char]$Char).ToString('x4') |
| 50 | + } |
| 51 | + function SuggestedASCII([Char]$Char) { |
| 52 | + switch ([Int]$Char) { |
| 53 | + 0x00A0 { ' ' } |
| 54 | + 0x1806 { '-' } |
| 55 | + 0x2010 { '-' } |
| 56 | + 0x2011 { '-' } |
| 57 | + 0x2012 { '-' } |
| 58 | + 0x2013 { '-' } |
| 59 | + 0x2014 { '-' } |
| 60 | + 0x2015 { '-' } |
| 61 | + 0x2016 { '-' } |
| 62 | + 0x2212 { '-' } |
| 63 | + 0x2018 { "'" } |
| 64 | + 0x2019 { "'" } |
| 65 | + 0x201A { "'" } |
| 66 | + 0x201B { "'" } |
| 67 | + 0x201C { '"' } |
| 68 | + 0x201D { '"' } |
| 69 | + 0x201E { '"' } |
| 70 | + 0x201F { '"' } |
| 71 | + Default { |
| 72 | + $ASCII = $Char.ToString().Normalize([System.text.NormalizationForm]::FormD)[0] |
| 73 | + if ($ASCII -le 0x7F) { $ASCII } else { '_' } |
| 74 | + } |
| 75 | + |
| 76 | + } |
| 77 | + } |
| 78 | + } |
| 79 | + |
| 80 | + Process { |
| 81 | + # As the AST parser, tokenize doesn't capture (smart) quotes |
| 82 | + # $Tokens = [System.Management.Automation.PSParser]::Tokenize($ScriptBlockAst.Extent.Text, [ref]$null) |
| 83 | + # $Violations = $Tokens.where{ $_.Content -cMatch '[\u0100-\uFFFF]' } |
| 84 | + $Violations = GetNonASCIIPositions $ScriptBlockAst.Extent.Text |
| 85 | + [Collections.Generic.List[Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.DiagnosticRecord]]@( |
| 86 | + Foreach ($Violation in $Violations) { |
| 87 | + $Text = $ScriptBlockAst.Extent.Text |
| 88 | + For ($i = $Violation.Offset - 1; $i -ge 0; $i--) { if ($Text[$i] -NotMatch '\w') { break } } |
| 89 | + $Start = $i + 1 |
| 90 | + For ($i = $Violation.Offset + 1; $i -lt $Text.Length; $i++) { if ($Text[$i] -NotMatch '\w') { break } } |
| 91 | + $Length = $i - $Start |
| 92 | + $Word = $Text.SubString($Start, $Length) |
| 93 | + |
| 94 | + $StartPosition = [System.Management.Automation.Language.ScriptPosition]::new( |
| 95 | + $Null, |
| 96 | + $Violation.LineNumber, |
| 97 | + $Violation.ColumnNumber, |
| 98 | + $ScriptBlockAst.Extent.Text |
| 99 | + ) |
| 100 | + $EndPosition = [System.Management.Automation.Language.ScriptPosition]::new( |
| 101 | + $Null, |
| 102 | + $Violation.LineNumber, |
| 103 | + ($Violation.ColumnNumber + 1), |
| 104 | + $ScriptBlockAst.Extent.Text |
| 105 | + ) |
| 106 | + $Extent = [System.Management.Automation.Language.ScriptExtent]::new($StartPosition, $EndPosition) |
| 107 | + $Character = $Violation.Character |
| 108 | + $UniCode = "U+$(CharToHex $Character)" |
| 109 | + $SuggestedASCII = SuggestedASCII $Character |
| 110 | + $AscCode = "U+$(CharToHex $SuggestedASCII)" |
| 111 | + [Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.DiagnosticRecord]@{ |
| 112 | + Message = "Non-ASCII character $UniCode found in: $Word" |
| 113 | + Extent = $Extent |
| 114 | + RuleName = 'PSUseASCII' |
| 115 | + Severity = 'Information' |
| 116 | + RuleSuppressionID = $Word |
| 117 | + SuggestedCorrections = [System.Collections.ObjectModel.Collection[Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.CorrectionExtent]]( |
| 118 | + [Microsoft.Windows.PowerShell.ScriptAnalyzer.Generic.CorrectionExtent]::New( |
| 119 | + $Violation.LineNumber, |
| 120 | + $Violation.LineNumber, |
| 121 | + $Violation.ColumnNumber, |
| 122 | + ($Violation.ColumnNumber + 1), |
| 123 | + "$SuggestedASCII", |
| 124 | + "Replace '$Character' ($UniCode) with '$SuggestedASCII' ($AscCode)" |
| 125 | + ) |
| 126 | + ) |
| 127 | + } |
| 128 | + } |
| 129 | + ) |
| 130 | + } |
| 131 | +} |
| 132 | +Export-ModuleMember -Function Measure-* |
0 commit comments