Skip to content

Commit 0af55fa

Browse files
authored
Fix ReDoS caused by very large character references using repeated 0s (#169)
This patch will fix the ReDoS that is caused by large string of 0s on a character reference (like `&#00000000...`). This is occurred in Ruby 3.1 or earlier.
1 parent b8a5f4c commit 0af55fa

File tree

2 files changed

+51
-14
lines changed

2 files changed

+51
-14
lines changed

lib/rexml/text.rb

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -151,25 +151,45 @@ def Text.check string, pattern, doctype
151151
end
152152
end
153153

154-
# context sensitive
155-
string.scan(pattern) do
156-
if $1[-1] != ?;
157-
raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
158-
elsif $1[0] == ?&
159-
if $5 and $5[0] == ?#
160-
case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
161-
when *VALID_CHAR
154+
pos = 0
155+
while (index = string.index(/<|&/, pos))
156+
if string[index] == "<"
157+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
158+
end
159+
160+
unless (end_index = string.index(/[^\s];/, index + 1))
161+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162+
end
163+
164+
value = string[(index + 1)..end_index]
165+
if /\s/.match?(value)
166+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
167+
end
168+
169+
if value[0] == "#"
170+
character_reference = value[1..-1]
171+
172+
unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
173+
if character_reference[0] == "x" || character_reference[-1] == "x"
174+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162175
else
163-
raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
176+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
164177
end
165-
# FIXME: below can't work but this needs API change.
166-
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
167-
# if !doctype or !doctype.entities.has_key?($3)
168-
# raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
169-
# end
170178
end
179+
180+
case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
181+
when *VALID_CHAR
182+
else
183+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
184+
end
185+
elsif !(/\A#{Entity::NAME}\z/um.match?(value))
186+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
171187
end
188+
189+
pos = end_index + 1
172190
end
191+
192+
string
173193
end
174194

175195
def node_type
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
require "test/unit"
2+
require "core_assertions"
3+
4+
require "rexml/document"
5+
6+
module REXMLTests
7+
class TestParseCharacterReference < Test::Unit::TestCase
8+
include Test::Unit::CoreAssertions
9+
10+
def test_gt_linear_performance_many_preceding_zeros
11+
seq = [10000, 50000, 100000, 150000, 200000]
12+
assert_linear_performance(seq, rehearsal: 10) do |n|
13+
REXML::Document.new('<test testing="&#' + "0" * n + '97;"/>')
14+
end
15+
end
16+
end
17+
end

0 commit comments

Comments
 (0)