Skip to content

Commit 7ffae07

Browse files
committed
Fix character reference parsing
Ignore leading zeros and case-insensitive hexadecimal characters. Fixes: NaturalIntelligence#568
1 parent c7b3cea commit 7ffae07

File tree

2 files changed

+46
-18
lines changed

2 files changed

+46
-18
lines changed

spec/entities_spec.js

+28
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,34 @@ describe("XMLParser Entities", function() {
4646
expect(result).toEqual(expected);
4747
});
4848

49+
it("should parse different entity character reference variants", function() {
50+
const xmlData = `<?xml version="1.0"?>
51+
<tests>
52+
<test>&lt;</test>
53+
<test>&#60;</test>
54+
<test>&#060;</test>
55+
<test>&#0060;</test>
56+
<test>&#x3C;</test>
57+
<test>&#x03C;</test>
58+
<test>&#x003C;</test>
59+
<test>&#x3c;</test>
60+
<test>&#x03c;</test>
61+
<test>&#x003c;</test>
62+
</tests>`;
63+
64+
const expected = {
65+
"?xml": "",
66+
"tests": {
67+
"test": ["<", "<", "<", "<", "<", "<", "<", "<", "<", "<"]
68+
}
69+
};
70+
71+
const parser = new XMLParser();
72+
let result = parser.parse(xmlData, true);
73+
74+
expect(result).toEqual(expected);
75+
});
76+
4977
it("should parse XML with DOCTYPE without internal DTD", function() {
5078
const xmlData = "<?xml version='1.0' standalone='no'?><!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\" ><svg><metadata>test</metadata></svg>";
5179
const expected = {

src/xmlparser/OrderedObjParser.js

+18-18
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,26 @@ class OrderedObjParser{
2020
this.tagsNodeStack = [];
2121
this.docTypeEntities = {};
2222
this.lastEntities = {
23-
"apos" : { regex: /&(apos|#39|#x27);/g, val : "'"},
24-
"gt" : { regex: /&(gt|#62|#x3E);/g, val : ">"},
25-
"lt" : { regex: /&(lt|#60|#x3C);/g, val : "<"},
26-
"quot" : { regex: /&(quot|#34|#x22);/g, val : "\""},
23+
"apos" : { regex: /&(apos|#0*39|#x0*27);/gi, val : "'"},
24+
"gt" : { regex: /&(gt|#0*62|#x0*3E);/gi, val : ">"},
25+
"lt" : { regex: /&(lt|#0*60|#x0*3C);/gi, val : "<"},
26+
"quot" : { regex: /&(quot|#0*34|#x0*22);/gi, val : "\""},
2727
};
28-
this.ampEntity = { regex: /&(amp|#38|#x26);/g, val : "&"};
28+
this.ampEntity = { regex: /&(amp|#0*38|#x0*26);/gi, val : "&"};
2929
this.htmlEntities = {
30-
"space": { regex: /&(nbsp|#160);/g, val: " " },
31-
// "lt" : { regex: /&(lt|#60);/g, val: "<" },
32-
// "gt" : { regex: /&(gt|#62);/g, val: ">" },
33-
// "amp" : { regex: /&(amp|#38);/g, val: "&" },
34-
// "quot" : { regex: /&(quot|#34);/g, val: "\"" },
35-
// "apos" : { regex: /&(apos|#39);/g, val: "'" },
36-
"cent" : { regex: /&(cent|#162);/g, val: "¢" },
37-
"pound" : { regex: /&(pound|#163);/g, val: "£" },
38-
"yen" : { regex: /&(yen|#165);/g, val: "¥" },
39-
"euro" : { regex: /&(euro|#8364);/g, val: "€" },
40-
"copyright" : { regex: /&(copy|#169);/g, val: "©" },
41-
"reg" : { regex: /&(reg|#174);/g, val: "®" },
42-
"inr" : { regex: /&(inr|#8377);/g, val: "₹" },
30+
"space": { regex: /&(nbsp|#0*160);/gi, val: " " },
31+
// "lt" : { regex: /&(lt|#0*60);/gi, val: "<" },
32+
// "gt" : { regex: /&(gt|#0*62);/gi, val: ">" },
33+
// "amp" : { regex: /&(amp|#0*38);/gi, val: "&" },
34+
// "quot" : { regex: /&(quot|#0*34);/gi, val: "\"" },
35+
// "apos" : { regex: /&(apos|#0*39);/gi, val: "'" },
36+
"cent" : { regex: /&(cent|#0*162);/gi, val: "¢" },
37+
"pound" : { regex: /&(pound|#0*163);/gi, val: "£" },
38+
"yen" : { regex: /&(yen|#0*165);/gi, val: "¥" },
39+
"euro" : { regex: /&(euro|#0*8364);/gi, val: "€" },
40+
"copyright" : { regex: /&(copy|#0*169);/gi, val: "©" },
41+
"reg" : { regex: /&(reg|#0*174);/gi, val: "®" },
42+
"inr" : { regex: /&(inr|#0*8377);/gi, val: "₹" },
4343
};
4444
this.addExternalEntities = addExternalEntities;
4545
this.parseXml = parseXml;

0 commit comments

Comments
 (0)