Skip to content

Commit f156d3b

Browse files
committed
Improve invalid UTF-8 lint by finding the expression initializer
1 parent 6192690 commit f156d3b

File tree

3 files changed

+137
-50
lines changed

3 files changed

+137
-50
lines changed

compiler/rustc_lint/src/invalid_from_utf8.rs

+14-11
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::str::Utf8Error;
22

3-
use rustc_ast::{BorrowKind, LitKind};
3+
use rustc_ast::LitKind;
44
use rustc_hir::{Expr, ExprKind};
55
use rustc_span::source_map::Spanned;
66
use rustc_span::sym;
@@ -11,7 +11,7 @@ use crate::{LateContext, LateLintPass, LintContext};
1111
declare_lint! {
1212
/// The `invalid_from_utf8_unchecked` lint checks for calls to
1313
/// `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`
14-
/// with an invalid UTF-8 literal.
14+
/// with a known invalid UTF-8 value.
1515
///
1616
/// ### Example
1717
///
@@ -36,7 +36,7 @@ declare_lint! {
3636
declare_lint! {
3737
/// The `invalid_from_utf8` lint checks for calls to
3838
/// `std::str::from_utf8` and `std::str::from_utf8_mut`
39-
/// with an invalid UTF-8 literal.
39+
/// with a known invalid UTF-8 value.
4040
///
4141
/// ### Example
4242
///
@@ -67,8 +67,7 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
6767
&& [sym::str_from_utf8, sym::str_from_utf8_mut,
6868
sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
6969
{
70-
let lint = |utf8_error: Utf8Error| {
71-
let label = arg.span;
70+
let lint = |label, utf8_error: Utf8Error| {
7271
let method = diag_item.as_str().strip_prefix("str_").unwrap();
7372
let method = format!("std::str::{method}");
7473
let valid_up_to = utf8_error.valid_up_to();
@@ -78,22 +77,26 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
7877
if is_unchecked_variant { INVALID_FROM_UTF8_UNCHECKED } else { INVALID_FROM_UTF8 },
7978
expr.span,
8079
if is_unchecked_variant {
81-
InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label }
80+
InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label }
8281
} else {
83-
InvalidFromUtf8Diag::Checked { method, valid_up_to, label }
82+
InvalidFromUtf8Diag::Checked { method, valid_up_to, label }
8483
}
8584
)
8685
};
8786

88-
match &arg.kind {
87+
let mut init = cx.expr_or_init(arg);
88+
while let ExprKind::AddrOf(.., inner) = init.kind {
89+
init = cx.expr_or_init(inner);
90+
}
91+
match init.kind {
8992
ExprKind::Lit(Spanned { node: lit, .. }) => {
9093
if let LitKind::ByteStr(bytes, _) = &lit
9194
&& let Err(utf8_error) = std::str::from_utf8(bytes)
9295
{
93-
lint(utf8_error);
96+
lint(init.span, utf8_error);
9497
}
9598
},
96-
ExprKind::AddrOf(BorrowKind::Ref, _, Expr { kind: ExprKind::Array(args), .. }) => {
99+
ExprKind::Array(args) => {
97100
let elements = args.iter().map(|e|{
98101
match &e.kind {
99102
ExprKind::Lit(Spanned { node: lit, .. }) => match lit {
@@ -108,7 +111,7 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
108111
if let Some(elements) = elements
109112
&& let Err(utf8_error) = std::str::from_utf8(&elements)
110113
{
111-
lint(utf8_error);
114+
lint(init.span, utf8_error);
112115
}
113116
}
114117
_ => {}

tests/ui/lint/invalid_from_utf8.rs

+27
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// check-pass
22

3+
#![feature(inline_const)]
34
#![feature(concat_bytes)]
5+
46
#![warn(invalid_from_utf8_unchecked)]
57
#![warn(invalid_from_utf8)]
68

@@ -90,4 +92,29 @@ pub fn from_utf8() {
9092
}
9193
}
9294

95+
pub fn from_utf8_with_indirections() {
96+
let mut a = [99, 108, 130, 105, 112, 112, 121];
97+
std::str::from_utf8_mut(&mut a);
98+
//~^ WARN calls to `std::str::from_utf8_mut`
99+
let mut b = &mut a;
100+
let mut c = b;
101+
std::str::from_utf8_mut(c);
102+
//~^ WARN calls to `std::str::from_utf8_mut`
103+
let mut c = &[99, 108, 130, 105, 112, 112, 121];
104+
std::str::from_utf8(c);
105+
//~^ WARN calls to `std::str::from_utf8`
106+
const INVALID_1: [u8; 7] = [99, 108, 130, 105, 112, 112, 121];
107+
std::str::from_utf8(&INVALID_1);
108+
//~^ WARN calls to `std::str::from_utf8`
109+
static INVALID_2: [u8; 7] = [99, 108, 130, 105, 112, 112, 121];
110+
std::str::from_utf8(&INVALID_2);
111+
//~^ WARN calls to `std::str::from_utf8`
112+
const INVALID_3: &'static [u8; 7] = &[99, 108, 130, 105, 112, 112, 121];
113+
std::str::from_utf8(INVALID_3);
114+
//~^ WARN calls to `std::str::from_utf8`
115+
const INVALID_4: &'static [u8; 7] = { &[99, 108, 130, 105, 112, 112, 121] };
116+
std::str::from_utf8(INVALID_4);
117+
//~^ WARN calls to `std::str::from_utf8`
118+
}
119+
93120
fn main() {}
+96-39
Original file line numberDiff line numberDiff line change
@@ -1,110 +1,167 @@
11
warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
2-
--> $DIR/invalid_from_utf8.rs:19:9
2+
--> $DIR/invalid_from_utf8.rs:21:9
33
|
44
LL | std::str::from_utf8_unchecked_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
5-
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
6-
| |
7-
| the literal was valid UTF-8 up to the 2 bytes
5+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------^
6+
| |
7+
| the literal was valid UTF-8 up to the 2 bytes
88
|
99
note: the lint level is defined here
10-
--> $DIR/invalid_from_utf8.rs:4:9
10+
--> $DIR/invalid_from_utf8.rs:6:9
1111
|
1212
LL | #![warn(invalid_from_utf8_unchecked)]
1313
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^
1414

1515
warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
16-
--> $DIR/invalid_from_utf8.rs:21:9
16+
--> $DIR/invalid_from_utf8.rs:23:9
1717
|
1818
LL | std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
19-
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
20-
| |
21-
| the literal was valid UTF-8 up to the 2 bytes
19+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^
20+
| |
21+
| the literal was valid UTF-8 up to the 2 bytes
2222

2323
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
24-
--> $DIR/invalid_from_utf8.rs:39:9
24+
--> $DIR/invalid_from_utf8.rs:41:9
2525
|
2626
LL | std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
27-
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-----------------------------------^
28-
| |
29-
| the literal was valid UTF-8 up to the 2 bytes
27+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------^
28+
| |
29+
| the literal was valid UTF-8 up to the 2 bytes
3030

3131
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
32-
--> $DIR/invalid_from_utf8.rs:41:9
32+
--> $DIR/invalid_from_utf8.rs:43:9
3333
|
3434
LL | std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
35-
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
36-
| |
37-
| the literal was valid UTF-8 up to the 2 bytes
35+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^
36+
| |
37+
| the literal was valid UTF-8 up to the 2 bytes
3838

3939
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
40-
--> $DIR/invalid_from_utf8.rs:43:9
40+
--> $DIR/invalid_from_utf8.rs:45:9
4141
|
4242
LL | std::str::from_utf8_unchecked(b"cl\x82ippy");
4343
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-------------^
4444
| |
4545
| the literal was valid UTF-8 up to the 2 bytes
4646

4747
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
48-
--> $DIR/invalid_from_utf8.rs:45:9
48+
--> $DIR/invalid_from_utf8.rs:47:9
4949
|
5050
LL | std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy"));
5151
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------^
5252
| |
5353
| the literal was valid UTF-8 up to the 2 bytes
5454

5555
warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
56-
--> $DIR/invalid_from_utf8.rs:62:9
56+
--> $DIR/invalid_from_utf8.rs:64:9
5757
|
5858
LL | std::str::from_utf8_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
59-
| ^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
60-
| |
61-
| the literal was valid UTF-8 up to the 2 bytes
59+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------^
60+
| |
61+
| the literal was valid UTF-8 up to the 2 bytes
6262
|
6363
note: the lint level is defined here
64-
--> $DIR/invalid_from_utf8.rs:5:9
64+
--> $DIR/invalid_from_utf8.rs:7:9
6565
|
6666
LL | #![warn(invalid_from_utf8)]
6767
| ^^^^^^^^^^^^^^^^^
6868

6969
warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
70-
--> $DIR/invalid_from_utf8.rs:64:9
70+
--> $DIR/invalid_from_utf8.rs:66:9
7171
|
7272
LL | std::str::from_utf8_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
73-
| ^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
74-
| |
75-
| the literal was valid UTF-8 up to the 2 bytes
73+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^
74+
| |
75+
| the literal was valid UTF-8 up to the 2 bytes
7676

7777
warning: calls to `std::str::from_utf8` with a invalid literal always return an error
78-
--> $DIR/invalid_from_utf8.rs:82:9
78+
--> $DIR/invalid_from_utf8.rs:84:9
7979
|
8080
LL | std::str::from_utf8(&[99, 108, 130, 105, 112, 112, 121]);
81-
| ^^^^^^^^^^^^^^^^^^^^-----------------------------------^
82-
| |
83-
| the literal was valid UTF-8 up to the 2 bytes
81+
| ^^^^^^^^^^^^^^^^^^^^^----------------------------------^
82+
| |
83+
| the literal was valid UTF-8 up to the 2 bytes
8484

8585
warning: calls to `std::str::from_utf8` with a invalid literal always return an error
86-
--> $DIR/invalid_from_utf8.rs:84:9
86+
--> $DIR/invalid_from_utf8.rs:86:9
8787
|
8888
LL | std::str::from_utf8(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
89-
| ^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
90-
| |
91-
| the literal was valid UTF-8 up to the 2 bytes
89+
| ^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^
90+
| |
91+
| the literal was valid UTF-8 up to the 2 bytes
9292

9393
warning: calls to `std::str::from_utf8` with a invalid literal always return an error
94-
--> $DIR/invalid_from_utf8.rs:86:9
94+
--> $DIR/invalid_from_utf8.rs:88:9
9595
|
9696
LL | std::str::from_utf8(b"cl\x82ippy");
9797
| ^^^^^^^^^^^^^^^^^^^^-------------^
9898
| |
9999
| the literal was valid UTF-8 up to the 2 bytes
100100

101101
warning: calls to `std::str::from_utf8` with a invalid literal always return an error
102-
--> $DIR/invalid_from_utf8.rs:88:9
102+
--> $DIR/invalid_from_utf8.rs:90:9
103103
|
104104
LL | std::str::from_utf8(concat_bytes!(b"cl", b"\x82ippy"));
105105
| ^^^^^^^^^^^^^^^^^^^^---------------------------------^
106106
| |
107107
| the literal was valid UTF-8 up to the 2 bytes
108108

109-
warning: 12 warnings emitted
109+
warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
110+
--> $DIR/invalid_from_utf8.rs:97:5
111+
|
112+
LL | let mut a = [99, 108, 130, 105, 112, 112, 121];
113+
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
114+
LL | std::str::from_utf8_mut(&mut a);
115+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
116+
117+
warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
118+
--> $DIR/invalid_from_utf8.rs:101:5
119+
|
120+
LL | let mut a = [99, 108, 130, 105, 112, 112, 121];
121+
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
122+
...
123+
LL | std::str::from_utf8_mut(c);
124+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^
125+
126+
warning: calls to `std::str::from_utf8` with a invalid literal always return an error
127+
--> $DIR/invalid_from_utf8.rs:104:5
128+
|
129+
LL | let mut c = &[99, 108, 130, 105, 112, 112, 121];
130+
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
131+
LL | std::str::from_utf8(c);
132+
| ^^^^^^^^^^^^^^^^^^^^^^
133+
134+
warning: calls to `std::str::from_utf8` with a invalid literal always return an error
135+
--> $DIR/invalid_from_utf8.rs:107:5
136+
|
137+
LL | const INVALID_1: [u8; 7] = [99, 108, 130, 105, 112, 112, 121];
138+
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
139+
LL | std::str::from_utf8(&INVALID_1);
140+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
141+
142+
warning: calls to `std::str::from_utf8` with a invalid literal always return an error
143+
--> $DIR/invalid_from_utf8.rs:110:5
144+
|
145+
LL | static INVALID_2: [u8; 7] = [99, 108, 130, 105, 112, 112, 121];
146+
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
147+
LL | std::str::from_utf8(&INVALID_2);
148+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
149+
150+
warning: calls to `std::str::from_utf8` with a invalid literal always return an error
151+
--> $DIR/invalid_from_utf8.rs:113:5
152+
|
153+
LL | const INVALID_3: &'static [u8; 7] = &[99, 108, 130, 105, 112, 112, 121];
154+
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
155+
LL | std::str::from_utf8(INVALID_3);
156+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
157+
158+
warning: calls to `std::str::from_utf8` with a invalid literal always return an error
159+
--> $DIR/invalid_from_utf8.rs:116:5
160+
|
161+
LL | const INVALID_4: &'static [u8; 7] = { &[99, 108, 130, 105, 112, 112, 121] };
162+
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
163+
LL | std::str::from_utf8(INVALID_4);
164+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
165+
166+
warning: 19 warnings emitted
110167

0 commit comments

Comments
 (0)