Skip to content

Commit ceb9fe3

Browse files
Drop UTF-32 & “UTF-16”; use UTF-16BE and UTF-16LE
This change drops all handling for UTF-32 (which is a completely invalid/ unsupported encoding per the Encoding spec), as well as replacing handling for “UTF-16” (which also isn’t a valid/supported encoding) with, instead, handling for the valid/supported encodings UTF-16BE and UTF-16LE.
1 parent c536a4a commit ceb9fe3

File tree

3 files changed

+5
-10
lines changed

3 files changed

+5
-10
lines changed

src/nu/validator/htmlparser/io/Driver.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -337,9 +337,8 @@ public boolean internalEncodingDeclaration(String internalCharset)
337337
throws SAXException {
338338
try {
339339
internalCharset = internalCharset.toLowerCase();
340-
Encoding cs;
341-
if ("utf-16".equals(internalCharset)
342-
|| "utf-16be".equals(internalCharset)
340+
Encoding cs = Encoding.forName(internalCharset);
341+
if ("utf-16be".equals(internalCharset)
343342
|| "utf-16le".equals(internalCharset)) {
344343
tokenizer.errTreeBuilder("Internal encoding declaration specified \u201C"
345344
+ internalCharset
@@ -431,8 +430,8 @@ protected Encoding encodingFromExternalDeclaration(String encoding)
431430
encoding = encoding.toLowerCase();
432431
try {
433432
Encoding cs = Encoding.forName(encoding);
434-
if ("utf-16".equals(cs.getCanonName())
435-
|| "utf-32".equals(cs.getCanonName())) {
433+
if ("utf-16be".equals(cs.getCanonName())
434+
|| "utf-16le".equals(cs.getCanonName())) {
436435
swallowBom = false;
437436
}
438437
return whineAboutEncodingAndReturnCanonical(encoding, cs);

src/nu/validator/htmlparser/io/Encoding.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,6 @@ public class Encoding {
4444

4545
public static final Encoding UTF8;
4646

47-
public static final Encoding UTF16;
48-
4947
public static final Encoding UTF16LE;
5048

5149
public static final Encoding UTF16BE;
@@ -391,7 +389,6 @@ private static void createEncoding(String name, String[] labels) {
391389

392390
static {
393391
UTF8 = forName("utf-8");
394-
UTF16 = forName("utf-16");
395392
UTF16BE = forName("utf-16be");
396393
UTF16LE = forName("utf-16le");
397394
WINDOWS1252 = forName("windows-1252");

src/nu/validator/htmlparser/io/MetaSniffer.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,7 @@ public String getEncoding() {
161161
protected boolean tryCharset(String encoding) throws SAXException {
162162
encoding = encoding.toLowerCase();
163163
try {
164-
// XXX spec says only UTF-16
165-
if ("utf-16".equals(encoding) || "utf-16be".equals(encoding) || "utf-16le".equals(encoding) || "utf-32".equals(encoding) || "utf-32be".equals(encoding) || "utf-32le".equals(encoding)) {
164+
if ("utf-16be".equals(encoding) || "utf-16le".equals(encoding)) {
166165
this.characterEncoding = Encoding.UTF8;
167166
err("The internal character encoding declaration specified \u201C" + encoding + "\u201D which is not a rough superset of ASCII. Using \u201CUTF-8\u201D instead.");
168167
return true;

0 commit comments

Comments
 (0)