Skip to content

Commit 770bd3d

Browse files
committed
Auto merge of #75349 - nnethercote:tweak-confusable-idents-checking, r=petrochenkov
Tweak confusable idents checking The confusable idents checking does some sub-optimal things with symbols. r? @petrochenkov cc @crlf0710
2 parents d495ef5 + 0a597bd commit 770bd3d

File tree

4 files changed

+43
-65
lines changed

4 files changed

+43
-65
lines changed

src/librustc_lint/non_ascii_idents.rs

+29-61
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::{EarlyContext, EarlyLintPass, LintContext};
22
use rustc_ast::ast;
33
use rustc_data_structures::fx::FxHashMap;
4-
use rustc_span::symbol::SymbolStr;
4+
use rustc_span::symbol::Symbol;
55

66
declare_lint! {
77
pub NON_ASCII_IDENTS,
@@ -39,7 +39,6 @@ impl EarlyLintPass for NonAsciiIdents {
3939
use rustc_span::Span;
4040
use std::collections::BTreeMap;
4141
use unicode_security::GeneralSecurityProfile;
42-
use utils::CowBoxSymStr;
4342

4443
let check_non_ascii_idents = cx.builder.lint_level(NON_ASCII_IDENTS).0 != Level::Allow;
4544
let check_uncommon_codepoints =
@@ -58,6 +57,12 @@ impl EarlyLintPass for NonAsciiIdents {
5857

5958
let mut has_non_ascii_idents = false;
6059
let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
60+
61+
// Sort by `Span` so that error messages make sense with respect to the
62+
// order of identifier locations in the code.
63+
let mut symbols: Vec<_> = symbols.iter().collect();
64+
symbols.sort_by_key(|k| k.1);
65+
6166
for (symbol, &sp) in symbols.iter() {
6267
let symbol_str = symbol.as_str();
6368
if symbol_str.is_ascii() {
@@ -77,33 +82,34 @@ impl EarlyLintPass for NonAsciiIdents {
7782
}
7883

7984
if has_non_ascii_idents && check_confusable_idents {
80-
let mut skeleton_map: FxHashMap<CowBoxSymStr, (SymbolStr, Span, bool)> =
85+
let mut skeleton_map: FxHashMap<Symbol, (Symbol, Span, bool)> =
8186
FxHashMap::with_capacity_and_hasher(symbols.len(), Default::default());
82-
let mut str_buf = String::new();
83-
for (symbol, &sp) in symbols.iter() {
84-
fn calc_skeleton(symbol_str: &SymbolStr, buffer: &mut String) -> CowBoxSymStr {
85-
use std::mem::replace;
86-
use unicode_security::confusable_detection::skeleton;
87-
buffer.clear();
88-
buffer.extend(skeleton(symbol_str));
89-
if *symbol_str == *buffer {
90-
CowBoxSymStr::Interned(symbol_str.clone())
91-
} else {
92-
let owned = replace(buffer, String::new());
93-
CowBoxSymStr::Owned(owned.into_boxed_str())
94-
}
95-
}
87+
let mut skeleton_buf = String::new();
88+
89+
for (&symbol, &sp) in symbols.iter() {
90+
use unicode_security::confusable_detection::skeleton;
91+
9692
let symbol_str = symbol.as_str();
9793
let is_ascii = symbol_str.is_ascii();
98-
let skeleton = calc_skeleton(&symbol_str, &mut str_buf);
94+
95+
// Get the skeleton as a `Symbol`.
96+
skeleton_buf.clear();
97+
skeleton_buf.extend(skeleton(&symbol_str));
98+
let skeleton_sym = if *symbol_str == *skeleton_buf {
99+
symbol
100+
} else {
101+
Symbol::intern(&skeleton_buf)
102+
};
103+
99104
skeleton_map
100-
.entry(skeleton)
101-
.and_modify(|(existing_symbolstr, existing_span, existing_is_ascii)| {
105+
.entry(skeleton_sym)
106+
.and_modify(|(existing_symbol, existing_span, existing_is_ascii)| {
102107
if !*existing_is_ascii || !is_ascii {
103108
cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
104109
lint.build(&format!(
105110
"identifier pair considered confusable between `{}` and `{}`",
106-
existing_symbolstr, symbol_str
111+
existing_symbol.as_str(),
112+
symbol.as_str()
107113
))
108114
.span_label(
109115
*existing_span,
@@ -113,12 +119,12 @@ impl EarlyLintPass for NonAsciiIdents {
113119
});
114120
}
115121
if *existing_is_ascii && !is_ascii {
116-
*existing_symbolstr = symbol_str.clone();
122+
*existing_symbol = symbol;
117123
*existing_span = sp;
118124
*existing_is_ascii = is_ascii;
119125
}
120126
})
121-
.or_insert((symbol_str, sp, is_ascii));
127+
.or_insert((symbol, sp, is_ascii));
122128
}
123129
}
124130

@@ -232,41 +238,3 @@ impl EarlyLintPass for NonAsciiIdents {
232238
}
233239
}
234240
}
235-
236-
mod utils {
237-
use rustc_span::symbol::SymbolStr;
238-
use std::hash::{Hash, Hasher};
239-
use std::ops::Deref;
240-
241-
pub(super) enum CowBoxSymStr {
242-
Interned(SymbolStr),
243-
Owned(Box<str>),
244-
}
245-
246-
impl Deref for CowBoxSymStr {
247-
type Target = str;
248-
249-
fn deref(&self) -> &str {
250-
match self {
251-
CowBoxSymStr::Interned(interned) => interned,
252-
CowBoxSymStr::Owned(ref owned) => owned,
253-
}
254-
}
255-
}
256-
257-
impl Hash for CowBoxSymStr {
258-
#[inline]
259-
fn hash<H: Hasher>(&self, state: &mut H) {
260-
Hash::hash(&**self, state)
261-
}
262-
}
263-
264-
impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
265-
#[inline]
266-
fn eq(&self, other: &CowBoxSymStr) -> bool {
267-
PartialEq::eq(&**self, &**other)
268-
}
269-
}
270-
271-
impl Eq for CowBoxSymStr {}
272-
}

src/librustc_session/parse.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ use rustc_span::hygiene::ExpnId;
1313
use rustc_span::source_map::{FilePathMapping, SourceMap};
1414
use rustc_span::{MultiSpan, Span, Symbol};
1515

16-
use std::collections::BTreeMap;
1716
use std::path::PathBuf;
1817
use std::str;
1918

@@ -64,7 +63,7 @@ impl GatedSpans {
6463
#[derive(Default)]
6564
pub struct SymbolGallery {
6665
/// All symbols occurred and their first occurrence span.
67-
pub symbols: Lock<BTreeMap<Symbol, Span>>,
66+
pub symbols: Lock<FxHashMap<Symbol, Span>>,
6867
}
6968

7069
impl SymbolGallery {

src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs

+2
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
#![allow(uncommon_codepoints, non_upper_case_globals)]
44

55
const: usize = 42;
6+
const s_s: usize = 42;
67

78
fn main() {
89
let s = "rust"; //~ ERROR identifier pair considered confusable
10+
let s_s = "rust2"; //~ ERROR identifier pair considered confusable
911
not_affected();
1012
}
1113

src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr

+11-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
error: identifier pair considered confusable between `s` and `s`
2-
--> $DIR/lint-confusable-idents.rs:8:9
2+
--> $DIR/lint-confusable-idents.rs:9:9
33
|
44
LL | const s: usize = 42;
55
| -- this is where the previous identifier occurred
@@ -13,5 +13,14 @@ note: the lint level is defined here
1313
LL | #![deny(confusable_idents)]
1414
| ^^^^^^^^^^^^^^^^^
1515

16-
error: aborting due to previous error
16+
error: identifier pair considered confusable between `s_s` and `s_s`
17+
--> $DIR/lint-confusable-idents.rs:10:9
18+
|
19+
LL | const s_s: usize = 42;
20+
| --- this is where the previous identifier occurred
21+
...
22+
LL | let s_s = "rust2";
23+
| ^^^^^
24+
25+
error: aborting due to 2 previous errors
1726

0 commit comments

Comments
 (0)