@@ -92,6 +92,9 @@ pub fn decode_utf8(src: &[u8]) -> Option<(char, usize)> {
92
92
return None ;
93
93
}
94
94
let b1 = src[ 1 ] ;
95
+ if 0b11_000000 & b1 != TAG_CONT {
96
+ return None ;
97
+ }
95
98
let cp = ( ( b0 & !TAG_TWO ) as u32 ) << 6
96
99
| ( ( b1 & !TAG_CONT ) as u32 ) ;
97
100
match cp {
@@ -104,6 +107,12 @@ pub fn decode_utf8(src: &[u8]) -> Option<(char, usize)> {
104
107
return None ;
105
108
}
106
109
let ( b1, b2) = ( src[ 1 ] , src[ 2 ] ) ;
110
+ if 0b11_000000 & b1 != TAG_CONT {
111
+ return None ;
112
+ }
113
+ if 0b11_000000 & b2 != TAG_CONT {
114
+ return None ;
115
+ }
107
116
let cp = ( ( b0 & !TAG_THREE ) as u32 ) << 12
108
117
| ( ( b1 & !TAG_CONT ) as u32 ) << 6
109
118
| ( ( b2 & !TAG_CONT ) as u32 ) ;
@@ -118,6 +127,15 @@ pub fn decode_utf8(src: &[u8]) -> Option<(char, usize)> {
118
127
return None ;
119
128
}
120
129
let ( b1, b2, b3) = ( src[ 1 ] , src[ 2 ] , src[ 3 ] ) ;
130
+ if 0b11_000000 & b1 != TAG_CONT {
131
+ return None ;
132
+ }
133
+ if 0b11_000000 & b2 != TAG_CONT {
134
+ return None ;
135
+ }
136
+ if 0b11_000000 & b3 != TAG_CONT {
137
+ return None ;
138
+ }
121
139
let cp = ( ( b0 & !TAG_FOUR ) as u32 ) << 18
122
140
| ( ( b1 & !TAG_CONT ) as u32 ) << 12
123
141
| ( ( b2 & !TAG_CONT ) as u32 ) << 6
@@ -236,6 +254,8 @@ mod tests {
236
254
assert_eq ! ( decode_utf8( & [ 0xFF ] ) , None ) ;
237
255
// Surrogate pair
238
256
assert_eq ! ( decode_utf8( & [ 0xED , 0xA0 , 0x81 ] ) , None ) ;
257
+ // Invalid continuation byte.
258
+ assert_eq ! ( decode_utf8( & [ 0xD4 , 0xC2 ] ) , None ) ;
239
259
// Bad lengths
240
260
assert_eq ! ( decode_utf8( & [ 0xC3 ] ) , None ) ; // 2 bytes
241
261
assert_eq ! ( decode_utf8( & [ 0xEF , 0xBF ] ) , None ) ; // 3 bytes
0 commit comments