From 1e33dd17115ca948c6e5ebf319695ed64490b2bf Mon Sep 17 00:00:00 2001 From: Harrison Kaiser Date: Sun, 15 Dec 2024 20:06:02 -0500 Subject: [PATCH] Fix logical error with what text is considered whitespace. There is a logical issue around what counts as leading white-space. There is code which does a subtraction assuming that no errors will be reported inside the leading whitespace. However we compute the length of that whitespace with std::char::is_whitespace and not rustc_lexer::is_whitespace. The former will include a no-break space while later will excluded it. We can only safely make the assumption that no errors will be reported in whitespace if it is all "Rust Standard" whitespace. Indeed an error does occur in unicode whitespace if it contains a no-break space. --- Cargo.lock | 1 + compiler/rustc_errors/Cargo.toml | 1 + compiler/rustc_errors/src/emitter.rs | 10 ++++++++-- tests/ui/errors/emitter-overflow-bad-whitespace.rs | 11 +++++++++++ .../errors/emitter-overflow-bad-whitespace.stderr | 13 +++++++++++++ 5 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 tests/ui/errors/emitter-overflow-bad-whitespace.rs create mode 100644 tests/ui/errors/emitter-overflow-bad-whitespace.stderr diff --git a/Cargo.lock b/Cargo.lock index 5eeb855aacb87..b53d89ba88640 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3703,6 +3703,7 @@ dependencies = [ "rustc_fluent_macro", "rustc_hir", "rustc_index", + "rustc_lexer", "rustc_lint_defs", "rustc_macros", "rustc_serialize", diff --git a/compiler/rustc_errors/Cargo.toml b/compiler/rustc_errors/Cargo.toml index 06bae57638f3c..66b9adbead0b6 100644 --- a/compiler/rustc_errors/Cargo.toml +++ b/compiler/rustc_errors/Cargo.toml @@ -16,6 +16,7 @@ rustc_error_messages = { path = "../rustc_error_messages" } rustc_fluent_macro = { path = "../rustc_fluent_macro" } rustc_hir = { path = "../rustc_hir" } rustc_index = { path = "../rustc_index" } +rustc_lexer = { path = "../rustc_lexer" } rustc_lint_defs = { path = "../rustc_lint_defs" } rustc_macros = { path = "../rustc_macros" } rustc_serialize = { path = "../rustc_serialize" } diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs index ac2f91cdeb3fe..17908bca26afc 100644 --- a/compiler/rustc_errors/src/emitter.rs +++ b/compiler/rustc_errors/src/emitter.rs @@ -19,6 +19,7 @@ use derive_setters::Setters; use rustc_data_structures::fx::{FxHashMap, FxIndexMap, FxIndexSet}; use rustc_data_structures::sync::{DynSend, IntoDynSyncSend, Lrc}; use rustc_error_messages::{FluentArgs, SpanLabel}; +use rustc_lexer; use rustc_lint_defs::pluralize; use rustc_span::hygiene::{ExpnKind, MacroKind}; use rustc_span::source_map::SourceMap; @@ -1698,9 +1699,14 @@ impl HumanEmitter { if let Some(source_string) = line.line_index.checked_sub(1).and_then(|l| file.get_line(l)) { + // Whitespace can only be removed (aka considered leading) + // if the lexer considers it whitespace. + // non-rustc_lexer::is_whitespace() chars are reported as an + // error (ex. no-break-spaces \u{a0}), and thus can't be considered + // for removal during error reporting. let leading_whitespace = source_string .chars() - .take_while(|c| c.is_whitespace()) + .take_while(|c| rustc_lexer::is_whitespace(*c)) .map(|c| { match c { // Tabs are displayed as 4 spaces @@ -1709,7 +1715,7 @@ impl HumanEmitter { } }) .sum(); - if source_string.chars().any(|c| !c.is_whitespace()) { + if source_string.chars().any(|c| !rustc_lexer::is_whitespace(c)) { whitespace_margin = min(whitespace_margin, leading_whitespace); } } diff --git a/tests/ui/errors/emitter-overflow-bad-whitespace.rs b/tests/ui/errors/emitter-overflow-bad-whitespace.rs new file mode 100644 index 0000000000000..d024d1f3921b2 --- /dev/null +++ b/tests/ui/errors/emitter-overflow-bad-whitespace.rs @@ -0,0 +1,11 @@ +// Invalid whitespace (not listed here: https://doc.rust-lang.org/reference/whitespace.html +// e.g. \u{a0}) before any other syntax on the line should not cause any integer overflow +// in the emitter, even when the terminal width causes the line to be truncated. +// +// issue #132918 + +//@ check-fail +//@ needs-rustc-debug-assertions +//@ compile-flags: --diagnostic-width=1 +                                        fn main() { return; } +//~^ ERROR unknown start of token: \u{a0} diff --git a/tests/ui/errors/emitter-overflow-bad-whitespace.stderr b/tests/ui/errors/emitter-overflow-bad-whitespace.stderr new file mode 100644 index 0000000000000..8c0c097bcb9c4 --- /dev/null +++ b/tests/ui/errors/emitter-overflow-bad-whitespace.stderr @@ -0,0 +1,13 @@ +error: unknown start of token: \u{a0} + --> $DIR/emitter-overflow-bad-whitespace.rs:10:1 + | +LL |     ... + | ^ + | +help: Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not + | +LL |                                       fn main() { return; } + | + + +error: aborting due to 1 previous error +