Skip to content

Commit 0feac5a

Browse files
committed
fix(dep): HTML parsing of processing instructions
Added test coverage to describe behavior of our parsers, and update nekohtml to 1.9.22.noko2.
1 parent db72b90 commit 0feac5a

File tree

3 files changed

+35
-8
lines changed

3 files changed

+35
-8
lines changed

lib/nekohtml.jar

2.04 KB
Binary file not shown.

test/html4/test_document.rb

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,17 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262
727727
assert_equal(0, doc.errors.length)
728728
end
729729

730+
def test_leaking_dtd_nodes_after_internal_subset_removal
731+
# see https://github.com/sparklemotion/nokogiri/issues/1784
732+
#
733+
# just checking that this doesn't raise a valgrind error. we
734+
# don't otherwise have any test coverage for removing DTDs.
735+
#
736+
100.times do |_i|
737+
Nokogiri::HTML::Document.new.internal_subset.remove
738+
end
739+
end
740+
730741
it "skips encoding for script tags" do
731742
html = Nokogiri::HTML(<<~EOHTML)
732743
<html>
@@ -763,15 +774,17 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262
763774
assert_equal "ISO-8859-1", html.encoding.name
764775
end
765776

766-
def test_leaking_dtd_nodes_after_internal_subset_removal
767-
# see https://github.com/sparklemotion/nokogiri/issues/1784
768-
#
769-
# just checking that this doesn't raise a valgrind error. we
770-
# don't otherwise have any test coverage for removing DTDs.
771-
#
772-
100.times do |_i|
773-
Nokogiri::HTML::Document.new.internal_subset.remove
777+
it "handles ill-formed processing instructions" do
778+
html = %{<html><body><!--><?a/}
779+
doc = Nokogiri::HTML4::Document.parse(html)
780+
expected = if Nokogiri.jruby?
781+
[Nokogiri::XML::Node::COMMENT_NODE, Nokogiri::XML::Node::PI_NODE]
782+
elsif Nokogiri.libxml2_patches.include?("0008-htmlParseComment-handle-abruptly-closed-comments.patch")
783+
[Nokogiri::XML::Node::COMMENT_NODE]
784+
else
785+
[]
774786
end
787+
assert_equal(expected, doc.at_css("body").children.map(&:type))
775788
end
776789

777790
describe ".parse" do

test/html5/test_nokogumbo.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,20 @@ def test_line_cdata
322322
assert_equal(3, node.line)
323323
end
324324

325+
it "handles ill-formed processing instructions in a document" do
326+
html = %{<html><body><!--><?a/}
327+
doc = Nokogiri::HTML5::Document.parse(html)
328+
expected = [Nokogiri::XML::Node::COMMENT_NODE, Nokogiri::XML::Node::COMMENT_NODE]
329+
assert_equal(expected, doc.at_css("body").children.map(&:type))
330+
end
331+
332+
it "handles ill-formed processing instructions in a fragment" do
333+
html = %{<div><!--><?a/}
334+
frag = Nokogiri::HTML5::DocumentFragment.parse(html)
335+
expected = [Nokogiri::XML::Node::COMMENT_NODE, Nokogiri::XML::Node::COMMENT_NODE]
336+
assert_equal(expected, frag.at_css("div").children.map(&:type))
337+
end
338+
325339
private
326340

327341
def buffer

0 commit comments

Comments
 (0)