Skip to content

Commit 9085079

Browse files
committed
Prohibit bare CRs in raw byte strings
1 parent 8d7f254 commit 9085079

File tree

5 files changed

+65
-74
lines changed

5 files changed

+65
-74
lines changed

src/libsyntax/parse/lexer/mod.rs

+24-70
Original file line numberDiff line numberDiff line change
@@ -292,15 +292,6 @@ impl<'a> StringReader<'a> {
292292
self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
293293
}
294294

295-
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
296-
/// escaped character to the error message
297-
fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) {
298-
let mut m = m.to_string();
299-
m.push_str(": ");
300-
push_escaped_char(&mut m, c);
301-
self.err_span_(from_pos, to_pos, &m[..]);
302-
}
303-
304295
/// Advance peek_token to refer to the next token, and
305296
/// possibly update the interner.
306297
fn advance_token(&mut self) -> Result<(), ()> {
@@ -1070,7 +1061,13 @@ impl<'a> StringReader<'a> {
10701061
self.validate_byte_str_escape(start_with_quote);
10711062
(token::ByteStr, symbol)
10721063
},
1073-
Some('r') => self.scan_raw_byte_string(),
1064+
Some('r') => {
1065+
let (start, end, hash_count) = self.scan_raw_string();
1066+
let symbol = self.name_from_to(start, end);
1067+
self.validate_raw_byte_str_escape(start, end);
1068+
1069+
(token::ByteStrRaw(hash_count), symbol)
1070+
}
10741071
_ => unreachable!(), // Should have been a token::Ident above.
10751072
};
10761073
let suffix = self.scan_optional_raw_name();
@@ -1300,66 +1297,6 @@ impl<'a> StringReader<'a> {
13001297
(content_start_bpos, content_end_bpos, hash_count)
13011298
}
13021299

1303-
fn scan_raw_byte_string(&mut self) -> (token::LitKind, Symbol) {
1304-
let start_bpos = self.pos;
1305-
self.bump();
1306-
let mut hash_count = 0;
1307-
while self.ch_is('#') {
1308-
if hash_count == 65535 {
1309-
let bpos = self.next_pos;
1310-
self.fatal_span_(start_bpos,
1311-
bpos,
1312-
"too many `#` symbols: raw strings may be \
1313-
delimited by up to 65535 `#` symbols").raise();
1314-
}
1315-
self.bump();
1316-
hash_count += 1;
1317-
}
1318-
1319-
if self.is_eof() {
1320-
self.fail_unterminated_raw_string(start_bpos, hash_count);
1321-
} else if !self.ch_is('"') {
1322-
let last_bpos = self.pos;
1323-
let curr_char = self.ch.unwrap();
1324-
self.fatal_span_char(start_bpos,
1325-
pos,
1326-
"found invalid character; only `#` is allowed in raw \
1327-
string delimitation",
1328-
ch).raise();
1329-
}
1330-
self.bump();
1331-
let content_start_bpos = self.pos;
1332-
let mut content_end_bpos;
1333-
'outer: loop {
1334-
match self.ch {
1335-
None => {
1336-
self.fail_unterminated_raw_string(start_bpos, hash_count);
1337-
}
1338-
Some('"') => {
1339-
content_end_bpos = self.pos;
1340-
for _ in 0..hash_count {
1341-
self.bump();
1342-
if !self.ch_is('#') {
1343-
continue 'outer;
1344-
}
1345-
}
1346-
break;
1347-
}
1348-
Some(c) => {
1349-
if c > '\x7F' {
1350-
let pos = self.pos;
1351-
self.err_span_char(pos, pos, "raw byte string must be ASCII", c);
1352-
}
1353-
}
1354-
}
1355-
self.bump();
1356-
}
1357-
1358-
self.bump();
1359-
1360-
(token::ByteStrRaw(hash_count), self.name_from_to(content_start_bpos, content_end_bpos))
1361-
}
1362-
13631300
fn validate_char_escape(&self, start_with_quote: BytePos) {
13641301
self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
13651302
if let Err((off, err)) = unescape::unescape_char(lit) {
@@ -1424,6 +1361,23 @@ impl<'a> StringReader<'a> {
14241361
});
14251362
}
14261363

1364+
fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
1365+
self.with_str_from_to(content_start, content_end, |lit: &str| {
1366+
unescape::unescape_raw_byte_str(lit, &mut |range, c| {
1367+
if let Err(err) = c {
1368+
emit_unescape_error(
1369+
&self.sess.span_diagnostic,
1370+
lit,
1371+
self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
1372+
unescape::Mode::ByteStr,
1373+
range,
1374+
err,
1375+
)
1376+
}
1377+
})
1378+
});
1379+
}
1380+
14271381
fn validate_byte_str_escape(&self, start_with_quote: BytePos) {
14281382
self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
14291383
unescape::unescape_byte_str(lit, &mut |range, c| {

src/libsyntax/parse/unescape.rs

+24
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ pub(crate) enum EscapeError {
2929

3030
UnicodeEscapeInByte,
3131
NonAsciiCharInByte,
32+
NonAsciiCharInByteString,
3233
}
3334

3435
/// Takes a contents of a char literal (without quotes), and returns an
@@ -88,6 +89,29 @@ where
8889
}
8990
}
9091

92+
/// Takes a contents of a string literal (without quotes) and produces a
93+
/// sequence of characters or errors.
94+
/// NOTE: Raw strings do not perform any explicit character escaping, here we
95+
/// only translate CRLF to LF and produce errors on bare CR.
96+
pub(crate) fn unescape_raw_byte_str<F>(literal_text: &str, callback: &mut F)
97+
where
98+
F: FnMut(Range<usize>, Result<char, EscapeError>),
99+
{
100+
let mut byte_offset: usize = 0;
101+
102+
let mut chars = literal_text.chars().peekable();
103+
while let Some(curr) = chars.next() {
104+
let result = match (curr, chars.peek()) {
105+
('\r', Some('\n')) => Ok(curr),
106+
('\r', _) => Err(EscapeError::BareCarriageReturn),
107+
(c, _) if c > '\x7F' => Err(EscapeError::NonAsciiCharInByteString),
108+
_ => Ok(curr),
109+
};
110+
callback(byte_offset..(byte_offset + curr.len_utf8()), result);
111+
byte_offset += curr.len_utf8();
112+
}
113+
}
114+
91115
#[derive(Debug, Clone, Copy)]
92116
pub(crate) enum Mode {
93117
Char,

src/libsyntax/parse/unescape_error_reporting.rs

+5
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,11 @@ pub(crate) fn emit_unescape_error(
124124
handler.span_err(span, "byte constant must be ASCII. \
125125
Use a \\xHH escape for a non-ASCII byte")
126126
}
127+
EscapeError::NonAsciiCharInByteString => {
128+
assert!(mode.is_bytes());
129+
let (_c, span) = last_char();
130+
handler.span_err(span, "raw byte string must be ASCII")
131+
}
127132
EscapeError::OutOfRangeHexEscape => {
128133
handler.span_err(span, "this form of character escape may only be used \
129134
with characters in the range [\\x00-\\x7f]")

src/test/ui/parser/raw-byte-string-literals.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pub fn main() {
2+
br"a"; //~ ERROR bare CR not allowed in string
23
br"é"; //~ ERROR raw byte string must be ASCII
34
br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation
45
}
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,21 @@
1-
error: raw byte string must be ASCII: \u{e9}
2-
--> $DIR/raw-byte-string-literals.rs:2:8
1+
error: bare CR not allowed in string, use \r instead
2+
--> $DIR/raw-byte-string-literals.rs:5:9
3+
|
4+
LL | br"a
5+
";
6+
| ^
7+
8+
error: raw byte string must be ASCII
9+
--> $DIR/raw-byte-string-literals.rs:6:8
310
|
411
LL | br"é";
512
| ^
613

714
error: found invalid character; only `#` is allowed in raw string delimitation: ~
8-
--> $DIR/raw-byte-string-literals.rs:3:6
15+
--> $DIR/raw-byte-string-literals.rs:7:6
916
|
1017
LL | br##~"a"~##;
1118
| ^^^
1219

13-
error: aborting due to 2 previous errors
20+
error: aborting due to 3 previous errors
1421

0 commit comments

Comments
 (0)