@@ -101,32 +101,45 @@ where
101
101
}
102
102
}
103
103
104
- /// A unit within CStr. Must not be a nul character.
105
- pub enum CStrUnit {
106
- Byte ( u8 ) ,
104
+ /// Used for mixed utf8 string literals, i.e. those that allow both unicode
105
+ /// chars and high bytes.
106
+ pub enum MixedUnit {
107
+ /// Used for ASCII chars (written directly or via `\x00`..`\x7f` escapes)
108
+ /// and Unicode chars (written directly or via `\u` escapes).
109
+ ///
110
+ /// For example, if '¥' appears in a string it is represented here as
111
+ /// `MixedUnit::Char('¥')`, and it will be appended to the relevant byte
112
+ /// string as the two-byte UTF-8 sequence `[0xc2, 0xa5]`
107
113
Char ( char ) ,
114
+
115
+ /// Used for high bytes (`\x80`..`\xff`).
116
+ ///
117
+ /// For example, if `\xa5` appears in a string it is represented here as
118
+ /// `MixedUnit::HighByte(0xa5)`, and it will be appended to the relevant
119
+ /// byte string as the single byte `0xa5`.
120
+ HighByte ( u8 ) ,
108
121
}
109
122
110
- impl From < u8 > for CStrUnit {
111
- fn from ( value : u8 ) -> Self {
112
- CStrUnit :: Byte ( value )
123
+ impl From < char > for MixedUnit {
124
+ fn from ( c : char ) -> Self {
125
+ MixedUnit :: Char ( c )
113
126
}
114
127
}
115
128
116
- impl From < char > for CStrUnit {
117
- fn from ( value : char ) -> Self {
118
- CStrUnit :: Char ( value )
129
+ impl From < u8 > for MixedUnit {
130
+ fn from ( n : u8 ) -> Self {
131
+ if n . is_ascii ( ) { MixedUnit :: Char ( n as char ) } else { MixedUnit :: HighByte ( n ) }
119
132
}
120
133
}
121
134
122
135
pub fn unescape_c_string < F > ( src : & str , mode : Mode , callback : & mut F )
123
136
where
124
- F : FnMut ( Range < usize > , Result < CStrUnit , EscapeError > ) ,
137
+ F : FnMut ( Range < usize > , Result < MixedUnit , EscapeError > ) ,
125
138
{
126
139
match mode {
127
140
CStr => {
128
141
unescape_non_raw_common ( src, mode, & mut |r, mut result| {
129
- if let Ok ( CStrUnit :: Byte ( 0 ) | CStrUnit :: Char ( '\0' ) ) = result {
142
+ if let Ok ( MixedUnit :: Char ( '\0' ) ) = result {
130
143
result = Err ( EscapeError :: NulInCStr ) ;
131
144
}
132
145
callback ( r, result)
@@ -137,7 +150,8 @@ where
137
150
if let Ok ( '\0' ) = result {
138
151
result = Err ( EscapeError :: NulInCStr ) ;
139
152
}
140
- callback ( r, result. map ( CStrUnit :: Char ) )
153
+ // High bytes aren't possible in raw strings.
154
+ callback ( r, result. map ( MixedUnit :: Char ) )
141
155
} ) ;
142
156
}
143
157
Char | Byte | Str | RawStr | ByteStr | RawByteStr => unreachable ! ( ) ,
@@ -217,20 +231,19 @@ impl Mode {
217
231
}
218
232
}
219
233
220
- fn scan_escape < T : From < u8 > + From < char > > (
234
+ fn scan_escape < T : From < char > + From < u8 > > (
221
235
chars : & mut Chars < ' _ > ,
222
236
mode : Mode ,
223
237
) -> Result < T , EscapeError > {
224
238
// Previous character was '\\', unescape what follows.
225
- let res: u8 = match chars. next ( ) . ok_or ( EscapeError :: LoneSlash ) ? {
226
- '"' => b'"' ,
227
- 'n' => b'\n' ,
228
- 'r' => b'\r' ,
229
- 't' => b'\t' ,
230
- '\\' => b'\\' ,
231
- '\'' => b'\'' ,
232
- '0' => b'\0' ,
233
-
239
+ let res: char = match chars. next ( ) . ok_or ( EscapeError :: LoneSlash ) ? {
240
+ '"' => '"' ,
241
+ 'n' => '\n' ,
242
+ 'r' => '\r' ,
243
+ 't' => '\t' ,
244
+ '\\' => '\\' ,
245
+ '\'' => '\'' ,
246
+ '0' => '\0' ,
234
247
'x' => {
235
248
// Parse hexadecimal character code.
236
249
@@ -240,15 +253,17 @@ fn scan_escape<T: From<u8> + From<char>>(
240
253
let lo = chars. next ( ) . ok_or ( EscapeError :: TooShortHexEscape ) ?;
241
254
let lo = lo. to_digit ( 16 ) . ok_or ( EscapeError :: InvalidCharInHexEscape ) ?;
242
255
243
- let value = hi * 16 + lo;
244
-
245
- if mode. ascii_escapes_should_be_ascii ( ) && !is_ascii ( value) {
246
- return Err ( EscapeError :: OutOfRangeHexEscape ) ;
247
- }
256
+ let value = ( hi * 16 + lo) as u8 ;
248
257
249
- value as u8
258
+ return if mode. ascii_escapes_should_be_ascii ( ) && !value. is_ascii ( ) {
259
+ Err ( EscapeError :: OutOfRangeHexEscape )
260
+ } else {
261
+ // This may be a high byte, but that will only happen if `T` is
262
+ // `MixedUnit`, because of the `ascii_escapes_should_be_ascii`
263
+ // check above.
264
+ Ok ( T :: from ( value as u8 ) )
265
+ } ;
250
266
}
251
-
252
267
'u' => return scan_unicode ( chars, mode. is_unicode_escape_disallowed ( ) ) . map ( T :: from) ,
253
268
_ => return Err ( EscapeError :: InvalidEscape ) ,
254
269
} ;
@@ -336,7 +351,7 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, Esca
336
351
337
352
/// Takes a contents of a string literal (without quotes) and produces a
338
353
/// sequence of escaped characters or errors.
339
- fn unescape_non_raw_common < F , T : From < u8 > + From < char > > ( src : & str , mode : Mode , callback : & mut F )
354
+ fn unescape_non_raw_common < F , T : From < char > + From < u8 > > ( src : & str , mode : Mode , callback : & mut F )
340
355
where
341
356
F : FnMut ( Range < usize > , Result < T , EscapeError > ) ,
342
357
{
@@ -430,7 +445,3 @@ pub fn byte_from_char(c: char) -> u8 {
430
445
debug_assert ! ( res <= u8 :: MAX as u32 , "guaranteed because of ByteStr" ) ;
431
446
res as u8
432
447
}
433
-
434
- fn is_ascii ( x : u32 ) -> bool {
435
- x <= 0x7F
436
- }
0 commit comments