Skip to content

Commit 42ed400

Browse files
committed
Be more accurate about calculating display_col from a BytePos
No longer track "zero-width" chars in `SourceMap`, read directly from the line when calculating the `display_col` of a `BytePos`. Move `char_width` to `rustc_span` and use it from the emitter. This change allows the following to properly align in terminals (depending on the font, the replaced control codepoints are rendered as 1 or 2 width, on my terminal they are rendered as 1, on VSCode text they are rendered as 2): ``` error: this file contains an unclosed delimiter --> $DIR/issue-68629.rs:5:17 | LL | ␜␟ts␀![{i | -- unclosed delimiter | | | unclosed delimiter LL | ␀␀ fn rݻoa>rݻm | ^ ```
1 parent c60b38c commit 42ed400

File tree

60 files changed

+141
-285
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+141
-285
lines changed

Cargo.lock

-1
Original file line numberDiff line numberDiff line change
@@ -3982,7 +3982,6 @@ dependencies = [
39823982
"termcolor",
39833983
"termize",
39843984
"tracing",
3985-
"unicode-width",
39863985
"windows",
39873986
]
39883987

compiler/rustc_errors/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ serde_json = "1.0.59"
2626
termcolor = "1.2.0"
2727
termize = "0.1.1"
2828
tracing = "0.1"
29-
unicode-width = "0.1.4"
3029
# tidy-alphabetical-end
3130

3231
[target.'cfg(windows)'.dependencies.windows]

compiler/rustc_errors/src/emitter.rs

+1-16
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
//! The output types are defined in `rustc_session::config::ErrorOutputType`.
99
1010
use rustc_span::source_map::SourceMap;
11-
use rustc_span::{FileLines, FileName, SourceFile, Span};
11+
use rustc_span::{char_width, FileLines, FileName, SourceFile, Span};
1212

1313
use crate::snippet::{
1414
Annotation, AnnotationColumn, AnnotationType, Line, MultilineAnnotation, Style, StyledString,
@@ -2614,21 +2614,6 @@ fn normalize_whitespace(str: &str) -> String {
26142614
s
26152615
}
26162616

2617-
fn char_width(ch: char) -> usize {
2618-
// FIXME: `unicode_width` sometimes disagrees with terminals on how wide a `char` is. For now,
2619-
// just accept that sometimes the code line will be longer than desired.
2620-
match ch {
2621-
'\t' => 4,
2622-
'\u{0000}' | '\u{0001}' | '\u{0002}' | '\u{0003}' | '\u{0004}' | '\u{0005}'
2623-
| '\u{0006}' | '\u{0007}' | '\u{0008}' | '\u{000B}' | '\u{000C}' | '\u{000D}'
2624-
| '\u{000E}' | '\u{000F}' | '\u{0010}' | '\u{0011}' | '\u{0012}' | '\u{0013}'
2625-
| '\u{0014}' | '\u{0015}' | '\u{0016}' | '\u{0017}' | '\u{0018}' | '\u{0019}'
2626-
| '\u{001A}' | '\u{001B}' | '\u{001C}' | '\u{001D}' | '\u{001E}' | '\u{001F}'
2627-
| '\u{007F}' => 1,
2628-
_ => unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1),
2629-
}
2630-
}
2631-
26322617
fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) {
26332618
buffer.puts(line, col, "| ", Style::LineNumber);
26342619
}

compiler/rustc_metadata/src/rmeta/decoder.rs

-2
Original file line numberDiff line numberDiff line change
@@ -1727,7 +1727,6 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
17271727
source_len,
17281728
lines,
17291729
multibyte_chars,
1730-
non_narrow_chars,
17311730
normalized_pos,
17321731
stable_id,
17331732
..
@@ -1779,7 +1778,6 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
17791778
self.cnum,
17801779
lines,
17811780
multibyte_chars,
1782-
non_narrow_chars,
17831781
normalized_pos,
17841782
source_file_index,
17851783
);

compiler/rustc_query_system/src/ich/impls_syntax.rs

-6
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
7373
source_len: _,
7474
lines: _,
7575
ref multibyte_chars,
76-
ref non_narrow_chars,
7776
ref normalized_pos,
7877
} = *self;
7978

@@ -98,11 +97,6 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
9897
char_pos.hash_stable(hcx, hasher);
9998
}
10099

101-
non_narrow_chars.len().hash_stable(hcx, hasher);
102-
for &char_pos in non_narrow_chars.iter() {
103-
char_pos.hash_stable(hcx, hasher);
104-
}
105-
106100
normalized_pos.len().hash_stable(hcx, hasher);
107101
for &char_pos in normalized_pos.iter() {
108102
char_pos.hash_stable(hcx, hasher);

compiler/rustc_span/src/analyze_source_file.rs

+12-41
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
use super::*;
2-
use unicode_width::UnicodeWidthChar;
32

43
#[cfg(test)]
54
mod tests;
@@ -9,15 +8,12 @@ mod tests;
98
///
109
/// This function will use an SSE2 enhanced implementation if hardware support
1110
/// is detected at runtime.
12-
pub fn analyze_source_file(
13-
src: &str,
14-
) -> (Vec<RelativeBytePos>, Vec<MultiByteChar>, Vec<NonNarrowChar>) {
11+
pub fn analyze_source_file(src: &str) -> (Vec<RelativeBytePos>, Vec<MultiByteChar>) {
1512
let mut lines = vec![RelativeBytePos::from_u32(0)];
1613
let mut multi_byte_chars = vec![];
17-
let mut non_narrow_chars = vec![];
1814

1915
// Calls the right implementation, depending on hardware support available.
20-
analyze_source_file_dispatch(src, &mut lines, &mut multi_byte_chars, &mut non_narrow_chars);
16+
analyze_source_file_dispatch(src, &mut lines, &mut multi_byte_chars);
2117

2218
// The code above optimistically registers a new line *after* each \n
2319
// it encounters. If that point is already outside the source_file, remove
@@ -30,29 +26,26 @@ pub fn analyze_source_file(
3026
}
3127
}
3228

33-
(lines, multi_byte_chars, non_narrow_chars)
29+
(lines, multi_byte_chars)
3430
}
3531

3632
cfg_match! {
3733
cfg(any(target_arch = "x86", target_arch = "x86_64")) => {
3834
fn analyze_source_file_dispatch(src: &str,
3935
lines: &mut Vec<RelativeBytePos>,
40-
multi_byte_chars: &mut Vec<MultiByteChar>,
41-
non_narrow_chars: &mut Vec<NonNarrowChar>) {
36+
multi_byte_chars: &mut Vec<MultiByteChar>) {
4237
if is_x86_feature_detected!("sse2") {
4338
unsafe {
4439
analyze_source_file_sse2(src,
4540
lines,
46-
multi_byte_chars,
47-
non_narrow_chars);
41+
multi_byte_chars);
4842
}
4943
} else {
5044
analyze_source_file_generic(src,
5145
src.len(),
5246
RelativeBytePos::from_u32(0),
5347
lines,
54-
multi_byte_chars,
55-
non_narrow_chars);
48+
multi_byte_chars);
5649

5750
}
5851
}
@@ -64,8 +57,7 @@ cfg_match! {
6457
#[target_feature(enable = "sse2")]
6558
unsafe fn analyze_source_file_sse2(src: &str,
6659
lines: &mut Vec<RelativeBytePos>,
67-
multi_byte_chars: &mut Vec<MultiByteChar>,
68-
non_narrow_chars: &mut Vec<NonNarrowChar>) {
60+
multi_byte_chars: &mut Vec<MultiByteChar>) {
6961
#[cfg(target_arch = "x86")]
7062
use std::arch::x86::*;
7163
#[cfg(target_arch = "x86_64")]
@@ -157,7 +149,6 @@ cfg_match! {
157149
RelativeBytePos::from_usize(scan_start),
158150
lines,
159151
multi_byte_chars,
160-
non_narrow_chars
161152
);
162153
}
163154

@@ -168,23 +159,20 @@ cfg_match! {
168159
src.len() - tail_start,
169160
RelativeBytePos::from_usize(tail_start),
170161
lines,
171-
multi_byte_chars,
172-
non_narrow_chars);
162+
multi_byte_chars);
173163
}
174164
}
175165
}
176166
_ => {
177167
// The target (or compiler version) does not support SSE2 ...
178168
fn analyze_source_file_dispatch(src: &str,
179169
lines: &mut Vec<RelativeBytePos>,
180-
multi_byte_chars: &mut Vec<MultiByteChar>,
181-
non_narrow_chars: &mut Vec<NonNarrowChar>) {
170+
multi_byte_chars: &mut Vec<MultiByteChar>) {
182171
analyze_source_file_generic(src,
183172
src.len(),
184173
RelativeBytePos::from_u32(0),
185174
lines,
186-
multi_byte_chars,
187-
non_narrow_chars);
175+
multi_byte_chars);
188176
}
189177
}
190178
}
@@ -198,7 +186,6 @@ fn analyze_source_file_generic(
198186
output_offset: RelativeBytePos,
199187
lines: &mut Vec<RelativeBytePos>,
200188
multi_byte_chars: &mut Vec<MultiByteChar>,
201-
non_narrow_chars: &mut Vec<NonNarrowChar>,
202189
) -> usize {
203190
assert!(src.len() >= scan_len);
204191
let mut i = 0;
@@ -220,16 +207,8 @@ fn analyze_source_file_generic(
220207

221208
let pos = RelativeBytePos::from_usize(i) + output_offset;
222209

223-
match byte {
224-
b'\n' => {
225-
lines.push(pos + RelativeBytePos(1));
226-
}
227-
b'\t' => {
228-
non_narrow_chars.push(NonNarrowChar::Tab(pos));
229-
}
230-
_ => {
231-
non_narrow_chars.push(NonNarrowChar::ZeroWidth(pos));
232-
}
210+
if let b'\n' = byte {
211+
lines.push(pos + RelativeBytePos(1));
233212
}
234213
} else if byte >= 127 {
235214
// The slow path:
@@ -245,14 +224,6 @@ fn analyze_source_file_generic(
245224
let mbc = MultiByteChar { pos, bytes: char_len as u8 };
246225
multi_byte_chars.push(mbc);
247226
}
248-
249-
// Assume control characters are zero width.
250-
// FIXME: How can we decide between `width` and `width_cjk`?
251-
let char_width = UnicodeWidthChar::width(c).unwrap_or(0);
252-
253-
if char_width != 1 {
254-
non_narrow_chars.push(NonNarrowChar::new(pos, char_width));
255-
}
256227
}
257228

258229
i += char_len;

compiler/rustc_span/src/analyze_source_file/tests.rs

+2-22
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,10 @@ macro_rules! test {
44
(case: $test_name:ident,
55
text: $text:expr,
66
lines: $lines:expr,
7-
multi_byte_chars: $multi_byte_chars:expr,
8-
non_narrow_chars: $non_narrow_chars:expr,) => {
7+
multi_byte_chars: $multi_byte_chars:expr,) => {
98
#[test]
109
fn $test_name() {
11-
let (lines, multi_byte_chars, non_narrow_chars) = analyze_source_file($text);
10+
let (lines, multi_byte_chars) = analyze_source_file($text);
1211

1312
let expected_lines: Vec<RelativeBytePos> =
1413
$lines.into_iter().map(RelativeBytePos).collect();
@@ -21,13 +20,6 @@ macro_rules! test {
2120
.collect();
2221

2322
assert_eq!(multi_byte_chars, expected_mbcs);
24-
25-
let expected_nncs: Vec<NonNarrowChar> = $non_narrow_chars
26-
.into_iter()
27-
.map(|(pos, width)| NonNarrowChar::new(RelativeBytePos(pos), width))
28-
.collect();
29-
30-
assert_eq!(non_narrow_chars, expected_nncs);
3123
}
3224
};
3325
}
@@ -37,93 +29,81 @@ test!(
3729
text: "",
3830
lines: vec![],
3931
multi_byte_chars: vec![],
40-
non_narrow_chars: vec![],
4132
);
4233

4334
test!(
4435
case: newlines_short,
4536
text: "a\nc",
4637
lines: vec![0, 2],
4738
multi_byte_chars: vec![],
48-
non_narrow_chars: vec![],
4939
);
5040

5141
test!(
5242
case: newlines_long,
5343
text: "012345678\nabcdef012345678\na",
5444
lines: vec![0, 10, 26],
5545
multi_byte_chars: vec![],
56-
non_narrow_chars: vec![],
5746
);
5847

5948
test!(
6049
case: newline_and_multi_byte_char_in_same_chunk,
6150
text: "01234β789\nbcdef0123456789abcdef",
6251
lines: vec![0, 11],
6352
multi_byte_chars: vec![(5, 2)],
64-
non_narrow_chars: vec![],
6553
);
6654

6755
test!(
6856
case: newline_and_control_char_in_same_chunk,
6957
text: "01234\u{07}6789\nbcdef0123456789abcdef",
7058
lines: vec![0, 11],
7159
multi_byte_chars: vec![],
72-
non_narrow_chars: vec![(5, 0)],
7360
);
7461

7562
test!(
7663
case: multi_byte_char_short,
7764
text: "aβc",
7865
lines: vec![0],
7966
multi_byte_chars: vec![(1, 2)],
80-
non_narrow_chars: vec![],
8167
);
8268

8369
test!(
8470
case: multi_byte_char_long,
8571
text: "0123456789abcΔf012345β",
8672
lines: vec![0],
8773
multi_byte_chars: vec![(13, 2), (22, 2)],
88-
non_narrow_chars: vec![],
8974
);
9075

9176
test!(
9277
case: multi_byte_char_across_chunk_boundary,
9378
text: "0123456789abcdeΔ123456789abcdef01234",
9479
lines: vec![0],
9580
multi_byte_chars: vec![(15, 2)],
96-
non_narrow_chars: vec![],
9781
);
9882

9983
test!(
10084
case: multi_byte_char_across_chunk_boundary_tail,
10185
text: "0123456789abcdeΔ....",
10286
lines: vec![0],
10387
multi_byte_chars: vec![(15, 2)],
104-
non_narrow_chars: vec![],
10588
);
10689

10790
test!(
10891
case: non_narrow_short,
10992
text: "0\t2",
11093
lines: vec![0],
11194
multi_byte_chars: vec![],
112-
non_narrow_chars: vec![(1, 4)],
11395
);
11496

11597
test!(
11698
case: non_narrow_long,
11799
text: "01\t3456789abcdef01234567\u{07}9",
118100
lines: vec![0],
119101
multi_byte_chars: vec![],
120-
non_narrow_chars: vec![(2, 4), (24, 0)],
121102
);
122103

123104
test!(
124105
case: output_offset_all,
125106
text: "01\t345\n789abcΔf01234567\u{07}9\nbcΔf",
126107
lines: vec![0, 7, 27],
127108
multi_byte_chars: vec![(13, 2), (29, 2)],
128-
non_narrow_chars: vec![(2, 4), (24, 0)],
129109
);

0 commit comments

Comments
 (0)