Skip to content

Commit ed207ae

Browse files
authored
Rollup merge of rust-lang#120259 - HTGAzureX1212:HTGAzureX1212/split-diagnostics-uncommon-codepoints, r=Manishearth
Split Diagnostics for Uncommon Codepoints: Add List to Display Characters Involved This Pull Request adds a list of the uncommon codepoints involved in the `uncommon_codepoints` lint, as outlined as a first step in rust-lang#120228. Example rendered diagnostic: ``` error: identifier contains an uncommon Unicode codepoint: 'µ' --> $DIR/lint-uncommon-codepoints.rs:3:7 | LL | const µ: f64 = 0.000001; | ^ | note: the lint level is defined here --> $DIR/lint-uncommon-codepoints.rs:1:9 | LL | #![deny(uncommon_codepoints)] | ^^^^^^^^^^^^^^^^^^^ ``` (Retrying rust-lang#120258.)
2 parents b0267be + da1d0c4 commit ed207ae

File tree

8 files changed

+34
-10
lines changed

8 files changed

+34
-10
lines changed

Diff for: compiler/rustc_errors/src/diagnostic_impls.rs

+8
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,14 @@ impl IntoDiagnosticArg for char {
110110
}
111111
}
112112

113+
impl IntoDiagnosticArg for Vec<char> {
114+
fn into_diagnostic_arg(self) -> DiagnosticArgValue<'static> {
115+
DiagnosticArgValue::StrListSepByAnd(
116+
self.into_iter().map(|c| Cow::Owned(format!("{c:?}"))).collect(),
117+
)
118+
}
119+
}
120+
113121
impl IntoDiagnosticArg for Symbol {
114122
fn into_diagnostic_arg(self) -> DiagnosticArgValue<'static> {
115123
self.to_ident_string().into_diagnostic_arg()

Diff for: compiler/rustc_lint/messages.ftl

+4-1
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,10 @@ lint_hidden_unicode_codepoints = unicode codepoint changing visible direction of
240240
241241
lint_identifier_non_ascii_char = identifier contains non-ASCII characters
242242
243-
lint_identifier_uncommon_codepoints = identifier contains uncommon Unicode codepoints
243+
lint_identifier_uncommon_codepoints = identifier contains {$codepoints_len ->
244+
[one] an uncommon Unicode codepoint
245+
*[other] uncommon Unicode codepoints
246+
}: {$codepoints}
244247
245248
lint_ignored_unless_crate_specified = {$level}({$name}) is ignored unless specified at crate level
246249

Diff for: compiler/rustc_lint/src/lints.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -1107,7 +1107,10 @@ pub struct IdentifierNonAsciiChar;
11071107

11081108
#[derive(LintDiagnostic)]
11091109
#[diag(lint_identifier_uncommon_codepoints)]
1110-
pub struct IdentifierUncommonCodepoints;
1110+
pub struct IdentifierUncommonCodepoints {
1111+
pub codepoints: Vec<char>,
1112+
pub codepoints_len: usize,
1113+
}
11111114

11121115
#[derive(LintDiagnostic)]
11131116
#[diag(lint_confusable_identifier_pair)]

Diff for: compiler/rustc_lint/src/non_ascii_idents.rs

+11-1
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,17 @@ impl EarlyLintPass for NonAsciiIdents {
190190
if check_uncommon_codepoints
191191
&& !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed)
192192
{
193-
cx.emit_span_lint(UNCOMMON_CODEPOINTS, sp, IdentifierUncommonCodepoints);
193+
let codepoints: Vec<_> = symbol_str
194+
.chars()
195+
.filter(|c| !GeneralSecurityProfile::identifier_allowed(*c))
196+
.collect();
197+
let codepoints_len = codepoints.len();
198+
199+
cx.emit_span_lint(
200+
UNCOMMON_CODEPOINTS,
201+
sp,
202+
IdentifierUncommonCodepoints { codepoints, codepoints_len },
203+
);
194204
}
195205
}
196206

Diff for: tests/ui/lexer/lex-emoji-identifiers.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ fn invalid_emoji_usages() {
44
let wireless🛜 = "basic emoji"; //~ ERROR: identifiers cannot contain emoji
55
// FIXME
66
let key1️⃣ = "keycap sequence"; //~ ERROR: unknown start of token
7-
//~^ WARN: identifier contains uncommon Unicode codepoints
7+
//~^ WARN: identifier contains an uncommon Unicode codepoint
88
let flag🇺🇳 = "flag sequence"; //~ ERROR: identifiers cannot contain emoji
99
let wales🏴 = "tag sequence"; //~ ERROR: identifiers cannot contain emoji
1010
let folded🙏🏿 = "modifier sequence"; //~ ERROR: identifiers cannot contain emoji

Diff for: tests/ui/lexer/lex-emoji-identifiers.stderr

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ error: identifiers cannot contain emoji: `folded🙏🏿`
4040
LL | let folded🙏🏿 = "modifier sequence";
4141
| ^^^^^^^^^^
4242

43-
warning: identifier contains uncommon Unicode codepoints
43+
warning: identifier contains an uncommon Unicode codepoint: '\u{fe0f}'
4444
--> $DIR/lex-emoji-identifiers.rs:6:9
4545
|
4646
LL | let key1️⃣ = "keycap sequence";

Diff for: tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#![deny(uncommon_codepoints)]
22

3-
const µ: f64 = 0.000001; //~ ERROR identifier contains uncommon Unicode codepoints
3+
const µ: f64 = 0.000001; //~ ERROR identifier contains an uncommon Unicode codepoint
44
//~| WARNING should have an upper case name
55

6-
fn dijkstra() {} //~ ERROR identifier contains uncommon Unicode codepoints
6+
fn dijkstra() {} //~ ERROR identifier contains an uncommon Unicode codepoint
77

88
fn main() {
99
let ㇻㇲㇳ = "rust"; //~ ERROR identifier contains uncommon Unicode codepoints

Diff for: tests/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
error: identifier contains uncommon Unicode codepoints
1+
error: identifier contains an uncommon Unicode codepoint: 'µ'
22
--> $DIR/lint-uncommon-codepoints.rs:3:7
33
|
44
LL | const µ: f64 = 0.000001;
@@ -10,13 +10,13 @@ note: the lint level is defined here
1010
LL | #![deny(uncommon_codepoints)]
1111
| ^^^^^^^^^^^^^^^^^^^
1212

13-
error: identifier contains uncommon Unicode codepoints
13+
error: identifier contains an uncommon Unicode codepoint: 'ij'
1414
--> $DIR/lint-uncommon-codepoints.rs:6:4
1515
|
1616
LL | fn dijkstra() {}
1717
| ^^^^^^^
1818

19-
error: identifier contains uncommon Unicode codepoints
19+
error: identifier contains uncommon Unicode codepoints: 'ㇻ', 'ㇲ', and 'ㇳ'
2020
--> $DIR/lint-uncommon-codepoints.rs:9:9
2121
|
2222
LL | let ㇻㇲㇳ = "rust";

0 commit comments

Comments
 (0)