@@ -151,25 +151,45 @@ def Text.check string, pattern, doctype
151
151
end
152
152
end
153
153
154
- # context sensitive
155
- string . scan ( pattern ) do
156
- if $1[ -1 ] != ?;
157
- raise "Illegal character #{ $1. inspect } in raw string #{ string . inspect } "
158
- elsif $1[ 0 ] == ?&
159
- if $5 and $5[ 0 ] == ?#
160
- case ( $5[ 1 ] == ?x ? $5[ 2 ..-1 ] . to_i ( 16 ) : $5[ 1 ..-1 ] . to_i )
161
- when *VALID_CHAR
154
+ pos = 0
155
+ while ( index = string . index ( /<|&/ , pos ) )
156
+ if string [ index ] == "<"
157
+ raise "Illegal character \" #{ string [ index ] } \" in raw string #{ string . inspect } "
158
+ end
159
+
160
+ unless ( end_index = string . index ( /[^\s ];/ , index + 1 ) )
161
+ raise "Illegal character \" #{ string [ index ] } \" in raw string #{ string . inspect } "
162
+ end
163
+
164
+ value = string [ ( index + 1 ) ..end_index ]
165
+ if /\s / . match? ( value )
166
+ raise "Illegal character \" #{ string [ index ] } \" in raw string #{ string . inspect } "
167
+ end
168
+
169
+ if value [ 0 ] == "#"
170
+ character_reference = value [ 1 ..-1 ]
171
+
172
+ unless ( /\A (\d +|x[0-9a-fA-F]+)\z / . match? ( character_reference ) )
173
+ if character_reference [ 0 ] == "x" || character_reference [ -1 ] == "x"
174
+ raise "Illegal character \" #{ string [ index ] } \" in raw string #{ string . inspect } "
162
175
else
163
- raise "Illegal character #{ $1 . inspect } in raw string #{ string . inspect } "
176
+ raise "Illegal character #{ string . inspect } in raw string #{ string . inspect } "
164
177
end
165
- # FIXME: below can't work but this needs API change.
166
- # elsif @parent and $3 and !SUBSTITUTES.include?($1)
167
- # if !doctype or !doctype.entities.has_key?($3)
168
- # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
169
- # end
170
178
end
179
+
180
+ case ( character_reference [ 0 ] == "x" ? character_reference [ 1 ..-1 ] . to_i ( 16 ) : character_reference [ 0 ..-1 ] . to_i )
181
+ when *VALID_CHAR
182
+ else
183
+ raise "Illegal character #{ string . inspect } in raw string #{ string . inspect } "
184
+ end
185
+ elsif !( /\A #{ Entity ::NAME } \z /um . match? ( value ) )
186
+ raise "Illegal character \" #{ string [ index ] } \" in raw string #{ string . inspect } "
171
187
end
188
+
189
+ pos = end_index + 1
172
190
end
191
+
192
+ string
173
193
end
174
194
175
195
def node_type
0 commit comments