Skip to content

Commit 4b733b1

Browse files
committed
Updated InScope search basetypes to be namespace aware
Implements the todo added in b875b92 Case found by the fuzzer.
1 parent d89d757 commit 4b733b1

File tree

4 files changed

+29
-13
lines changed

4 files changed

+29
-13
lines changed

src/main/java/org/jsoup/parser/HtmlTreeBuilder.java

+26-10
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,17 @@
2828
*/
2929
public class HtmlTreeBuilder extends TreeBuilder {
3030
// tag searches. must be sorted, used in inSorted. HtmlTreeBuilderTest validates they're sorted.
31-
// todo - tag search in scope might need to be properly namespace aware - https://html.spec.whatwg.org/#has-an-element-in-scope
32-
static final String[] TagsSearchInScope = new String[]{
33-
"annotation-xml", "applet", "caption", "desc", "foreignObject", "html", "marquee", "mi", "mn", "mo", "ms", "mtext", "object", "table", "td", "template", "th", "title" // <- svg title
31+
static final String[] TagsSearchInScope = new String[]{ // a particular element in scope
32+
"applet", "caption", "html", "marquee", "object", "table", "td", "template", "th"
3433
};
34+
// math and svg namespaces for particular element in scope
35+
static final String[]TagSearchInScopeMath = new String[] {
36+
"annotation-xml", "mi", "mn", "mo", "ms", "mtext"
37+
};
38+
static final String[]TagSearchInScopeSvg = new String[] {
39+
"desc", "foreignObject", "title"
40+
};
41+
3542
static final String[] TagSearchList = new String[]{"ol", "ul"};
3643
static final String[] TagSearchButton = new String[]{"button"};
3744
static final String[] TagSearchTableScope = new String[]{"html", "table"};
@@ -681,13 +688,22 @@ private boolean inSpecificScope(String[] targetNames, String[] baseTypes, @Nulla
681688
// don't walk too far up the tree
682689
for (int pos = bottom; pos >= top; pos--) {
683690
Element el = stack.get(pos);
684-
final String elName = el.normalName();
685-
if (inSorted(elName, targetNames))
686-
return true;
687-
if (inSorted(elName, baseTypes))
688-
return false;
689-
if (extraTypes != null && inSorted(elName, extraTypes))
690-
return false;
691+
String elName = el.normalName();
692+
// namespace checks - arguments provided are always in html ns, with this bolt-on for math and svg:
693+
String ns = el.tag().namespace();
694+
if (ns.equals(NamespaceHtml)) {
695+
if (inSorted(elName, targetNames))
696+
return true;
697+
if (inSorted(elName, baseTypes))
698+
return false;
699+
if (extraTypes != null && inSorted(elName, extraTypes))
700+
return false;
701+
} else if (baseTypes == TagsSearchInScope) {
702+
if (ns.equals(NamespaceMathml) && inSorted(elName, TagSearchInScopeMath))
703+
return false;
704+
if (ns.equals(NamespaceSvg) && inSorted(elName, TagSearchInScopeSvg))
705+
return false;
706+
}
691707
}
692708
//Validate.fail("Should not be reachable"); // would end up false because hitting 'html' at root (basetypes)
693709
return false;

src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -1323,7 +1323,7 @@ private boolean anythingElse(Token t, HtmlTreeBuilder tb) {
13231323
Token.StartTag startTag = t.asStartTag();
13241324
String name = startTag.normalName();
13251325

1326-
if (inSorted(name, InCellNames)) { // th, th
1326+
if (inSorted(name, InCellNames)) { // td, th
13271327
tb.clearStackToTableRowContext();
13281328
tb.insertElementFor(startTag);
13291329
tb.transition(InCell);
@@ -1396,7 +1396,7 @@ private boolean anythingElse(Token t, HtmlTreeBuilder tb) {
13961396
Token.EndTag endTag = t.asEndTag();
13971397
String name = endTag.normalName();
13981398

1399-
if (inSorted(name, Constants.InCellNames)) {
1399+
if (inSorted(name, Constants.InCellNames)) { // td, th
14001400
if (!tb.inTableScope(name)) {
14011401
tb.error(this);
14021402
tb.transition(InRow); // might not be in scope if empty: <td /> and processing fake end tag

src/test/java/org/jsoup/parser/HtmlTreeBuilderTest.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public class HtmlTreeBuilderTest {
1717
public void ensureSearchArraysAreSorted() {
1818
List<Object[]> constants = HtmlTreeBuilderStateTest.findConstantArrays(HtmlTreeBuilder.class);
1919
HtmlTreeBuilderStateTest.ensureSorted(constants);
20-
assertEquals(11, constants.size());
20+
assertEquals(13, constants.size());
2121
}
2222

2323
@Test
945 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)