Skip to content

Commit 8e0a7a8

Browse files
Rollup merge of rust-lang#103651 - Alexendoo:parse-format-unicode-escapes, r=wesleywiser
Fix `rustc_parse_format` spans following escaped utf-8 multibyte chars Currently too many skips are created for char escapes that are larger than 1 byte when encoded in UTF-8, [playground:](https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=c77a9dc669b69b167271b59ed2c8d88c) ```rust fn main() { format!("\u{df}{a}"); format!("\u{211d}{a}"); format!("\u{1f4a3}{a}"); } ``` ``` error[[E0425]](https://doc.rust-lang.org/stable/error-index.html#E0425): cannot find value `a` in this scope --> src/main.rs:2:22 | 2 | format!("\u{df}{a}"); | ^ not found in this scope error[[E0425]](https://doc.rust-lang.org/stable/error-index.html#E0425): cannot find value `a` in this scope --> src/main.rs:3:25 | 3 | format!("\u{211d}{a}"); | ^ not found in this scope error[[E0425]](https://doc.rust-lang.org/stable/error-index.html#E0425): cannot find value `a` in this scope --> src/main.rs:4:27 | 4 | format!("\u{1f4a3}{a}"); | ^ not found in this scope ``` This reduces the number of skips to account for that Fixes rust-lang/rust-clippy#9727
2 parents 976973f + f5e390e commit 8e0a7a8

File tree

3 files changed

+111
-18
lines changed

3 files changed

+111
-18
lines changed

compiler/rustc_parse_format/src/lib.rs

+29-18
Original file line numberDiff line numberDiff line change
@@ -819,27 +819,27 @@ fn find_skips_from_snippet(
819819
};
820820

821821
fn find_skips(snippet: &str, is_raw: bool) -> Vec<usize> {
822-
let mut s = snippet.char_indices().peekable();
822+
let mut s = snippet.char_indices();
823823
let mut skips = vec![];
824824
while let Some((pos, c)) = s.next() {
825-
match (c, s.peek()) {
825+
match (c, s.clone().next()) {
826826
// skip whitespace and empty lines ending in '\\'
827827
('\\', Some((next_pos, '\n'))) if !is_raw => {
828828
skips.push(pos);
829-
skips.push(*next_pos);
829+
skips.push(next_pos);
830830
let _ = s.next();
831831

832-
while let Some((pos, c)) = s.peek() {
832+
while let Some((pos, c)) = s.clone().next() {
833833
if matches!(c, ' ' | '\n' | '\t') {
834-
skips.push(*pos);
834+
skips.push(pos);
835835
let _ = s.next();
836836
} else {
837837
break;
838838
}
839839
}
840840
}
841841
('\\', Some((next_pos, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
842-
skips.push(*next_pos);
842+
skips.push(next_pos);
843843
let _ = s.next();
844844
}
845845
('\\', Some((_, 'x'))) if !is_raw => {
@@ -858,19 +858,30 @@ fn find_skips_from_snippet(
858858
}
859859
if let Some((next_pos, next_c)) = s.next() {
860860
if next_c == '{' {
861-
skips.push(next_pos);
862-
let mut i = 0; // consume up to 6 hexanumeric chars + closing `}`
863-
while let (Some((next_pos, c)), true) = (s.next(), i < 7) {
864-
if c.is_digit(16) {
865-
skips.push(next_pos);
866-
} else if c == '}' {
867-
skips.push(next_pos);
868-
break;
869-
} else {
870-
break;
871-
}
872-
i += 1;
861+
// consume up to 6 hexanumeric chars
862+
let digits_len =
863+
s.clone().take(6).take_while(|(_, c)| c.is_digit(16)).count();
864+
865+
let len_utf8 = s
866+
.as_str()
867+
.get(..digits_len)
868+
.and_then(|digits| u32::from_str_radix(digits, 16).ok())
869+
.and_then(char::from_u32)
870+
.map_or(1, char::len_utf8);
871+
872+
// Skip the digits, for chars that encode to more than 1 utf-8 byte
873+
// exclude as many digits as it is greater than 1 byte
874+
//
875+
// So for a 3 byte character, exclude 2 digits
876+
let required_skips =
877+
digits_len.saturating_sub(len_utf8.saturating_sub(1));
878+
879+
// skip '{' and '}' also
880+
for pos in (next_pos..).take(required_skips + 2) {
881+
skips.push(pos)
873882
}
883+
884+
s.nth(digits_len);
874885
} else if next_c.is_digit(16) {
875886
skips.push(next_pos);
876887
// We suggest adding `{` and `}` when appropriate, accept it here as if
+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
fn main() {
2+
// 1 byte in UTF-8
3+
format!("\u{000041}{a}"); //~ ERROR cannot find value
4+
format!("\u{0041}{a}"); //~ ERROR cannot find value
5+
format!("\u{41}{a}"); //~ ERROR cannot find value
6+
format!("\u{0}{a}"); //~ ERROR cannot find value
7+
8+
// 2 bytes
9+
format!("\u{0df}{a}"); //~ ERROR cannot find value
10+
format!("\u{df}{a}"); //~ ERROR cannot find value
11+
12+
// 3 bytes
13+
format!("\u{00211d}{a}"); //~ ERROR cannot find value
14+
format!("\u{211d}{a}"); //~ ERROR cannot find value
15+
16+
// 4 bytes
17+
format!("\u{1f4a3}{a}"); //~ ERROR cannot find value
18+
format!("\u{10ffff}{a}"); //~ ERROR cannot find value
19+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
error[E0425]: cannot find value `a` in this scope
2+
--> $DIR/unicode-escape-spans.rs:3:25
3+
|
4+
LL | format!("\u{000041}{a}");
5+
| ^ not found in this scope
6+
7+
error[E0425]: cannot find value `a` in this scope
8+
--> $DIR/unicode-escape-spans.rs:4:23
9+
|
10+
LL | format!("\u{0041}{a}");
11+
| ^ not found in this scope
12+
13+
error[E0425]: cannot find value `a` in this scope
14+
--> $DIR/unicode-escape-spans.rs:5:21
15+
|
16+
LL | format!("\u{41}{a}");
17+
| ^ not found in this scope
18+
19+
error[E0425]: cannot find value `a` in this scope
20+
--> $DIR/unicode-escape-spans.rs:6:20
21+
|
22+
LL | format!("\u{0}{a}");
23+
| ^ not found in this scope
24+
25+
error[E0425]: cannot find value `a` in this scope
26+
--> $DIR/unicode-escape-spans.rs:9:22
27+
|
28+
LL | format!("\u{0df}{a}");
29+
| ^ not found in this scope
30+
31+
error[E0425]: cannot find value `a` in this scope
32+
--> $DIR/unicode-escape-spans.rs:10:21
33+
|
34+
LL | format!("\u{df}{a}");
35+
| ^ not found in this scope
36+
37+
error[E0425]: cannot find value `a` in this scope
38+
--> $DIR/unicode-escape-spans.rs:13:25
39+
|
40+
LL | format!("\u{00211d}{a}");
41+
| ^ not found in this scope
42+
43+
error[E0425]: cannot find value `a` in this scope
44+
--> $DIR/unicode-escape-spans.rs:14:23
45+
|
46+
LL | format!("\u{211d}{a}");
47+
| ^ not found in this scope
48+
49+
error[E0425]: cannot find value `a` in this scope
50+
--> $DIR/unicode-escape-spans.rs:17:24
51+
|
52+
LL | format!("\u{1f4a3}{a}");
53+
| ^ not found in this scope
54+
55+
error[E0425]: cannot find value `a` in this scope
56+
--> $DIR/unicode-escape-spans.rs:18:25
57+
|
58+
LL | format!("\u{10ffff}{a}");
59+
| ^ not found in this scope
60+
61+
error: aborting due to 10 previous errors
62+
63+
For more information about this error, try `rustc --explain E0425`.

0 commit comments

Comments
 (0)