|
| 1 | +use clippy_utils::diagnostics::span_lint; |
| 2 | +use rustc_ast::ast; |
| 3 | +use rustc_data_structures::fx::FxHashSet; |
| 4 | +use rustc_lint::{EarlyContext, EarlyLintPass, Level}; |
| 5 | +use rustc_session::{declare_tool_lint, impl_lint_pass}; |
| 6 | +use unicode_script::{Script, UnicodeScript}; |
| 7 | + |
| 8 | +declare_clippy_lint! { |
| 9 | + /// **What it does:** Checks for usage of unicode scripts other than those explicitly allowed |
| 10 | + /// by the lint config. |
| 11 | + /// |
| 12 | + /// This lint doesn't take into account non-text scripts such as `Unknown` and `Linear_A`. |
| 13 | + /// It also ignores the `Common` script type. |
| 14 | + /// While configuring, be sure to use official script name [aliases] from |
| 15 | + /// [the list of supported scripts][supported_scripts]. |
| 16 | + /// |
| 17 | + /// See also: [`non_ascii_idents`]. |
| 18 | + /// |
| 19 | + /// [aliases]: http://www.unicode.org/reports/tr24/tr24-31.html#Script_Value_Aliases |
| 20 | + /// [supported_scripts]: https://www.unicode.org/iso15924/iso15924-codes.html |
| 21 | + /// |
| 22 | + /// **Why is this bad?** It may be not desired to have many different scripts for |
| 23 | + /// identifiers in the codebase. |
| 24 | + /// |
| 25 | + /// Note that if you only want to allow plain English, you might want to use |
| 26 | + /// built-in [`non_ascii_idents`] lint instead. |
| 27 | + /// |
| 28 | + /// [`non_ascii_idents`]: https://doc.rust-lang.org/rustc/lints/listing/allowed-by-default.html#non-ascii-idents |
| 29 | + /// |
| 30 | + /// **Known problems:** None. |
| 31 | + /// |
| 32 | + /// **Example:** |
| 33 | + /// ```rust |
| 34 | + /// // Assuming that `clippy.toml` contains the following line: |
| 35 | + /// // allowed-locales = ["Latin", "Cyrillic"] |
| 36 | + /// let counter = 10; // OK, latin is allowed. |
| 37 | + /// let счётчик = 10; // OK, cyrillic is allowed. |
| 38 | + /// let zähler = 10; // OK, it's still latin. |
| 39 | + /// let カウンタ = 10; // Will spawn the lint. |
| 40 | + /// ``` |
| 41 | + pub DISALLOWED_SCRIPT_IDENTS, |
| 42 | + restriction, |
| 43 | + "usage of non-allowed Unicode scripts" |
| 44 | +} |
| 45 | + |
| 46 | +#[derive(Clone, Debug)] |
| 47 | +pub struct DisallowedScriptIdents { |
| 48 | + whitelist: FxHashSet<Script>, |
| 49 | +} |
| 50 | + |
| 51 | +impl DisallowedScriptIdents { |
| 52 | + pub fn new(whitelist: &[String]) -> Self { |
| 53 | + let whitelist = whitelist |
| 54 | + .iter() |
| 55 | + .map(String::as_str) |
| 56 | + .filter_map(Script::from_full_name) |
| 57 | + .collect(); |
| 58 | + Self { whitelist } |
| 59 | + } |
| 60 | +} |
| 61 | + |
| 62 | +impl_lint_pass!(DisallowedScriptIdents => [DISALLOWED_SCRIPT_IDENTS]); |
| 63 | + |
| 64 | +impl EarlyLintPass for DisallowedScriptIdents { |
| 65 | + fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) { |
| 66 | + // Implementation is heavily inspired by the implementation of [`non_ascii_idents`] lint: |
| 67 | + // https://github.com/rust-lang/rust/blob/master/compiler/rustc_lint/src/non_ascii_idents.rs |
| 68 | + |
| 69 | + let check_disallowed_script_idents = cx.builder.lint_level(DISALLOWED_SCRIPT_IDENTS).0 != Level::Allow; |
| 70 | + if !check_disallowed_script_idents { |
| 71 | + return; |
| 72 | + } |
| 73 | + |
| 74 | + let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock(); |
| 75 | + // Sort by `Span` so that error messages make sense with respect to the |
| 76 | + // order of identifier locations in the code. |
| 77 | + let mut symbols: Vec<_> = symbols.iter().collect(); |
| 78 | + symbols.sort_unstable_by_key(|k| k.1); |
| 79 | + |
| 80 | + for (symbol, &span) in &symbols { |
| 81 | + // Note: `symbol.as_str()` is an expensive operation, thus should not be called |
| 82 | + // more than once for a single symbol. |
| 83 | + let symbol_str = symbol.as_str(); |
| 84 | + if symbol_str.is_ascii() { |
| 85 | + continue; |
| 86 | + } |
| 87 | + |
| 88 | + for c in symbol_str.chars() { |
| 89 | + // We want to iterate through all the scripts associated with this character |
| 90 | + // and check whether at least of one scripts is in the whitelist. |
| 91 | + let forbidden_script = c |
| 92 | + .script_extension() |
| 93 | + .iter() |
| 94 | + .find(|script| !self.whitelist.contains(script)); |
| 95 | + if let Some(script) = forbidden_script { |
| 96 | + span_lint( |
| 97 | + cx, |
| 98 | + DISALLOWED_SCRIPT_IDENTS, |
| 99 | + span, |
| 100 | + &format!( |
| 101 | + "identifier `{}` has a Unicode script that is not allowed by configuration: {}", |
| 102 | + symbol_str, |
| 103 | + script.full_name() |
| 104 | + ), |
| 105 | + ); |
| 106 | + // We don't want to spawn warning multiple times over a single identifier. |
| 107 | + break; |
| 108 | + } |
| 109 | + } |
| 110 | + } |
| 111 | + } |
| 112 | +} |
0 commit comments