Skip to content

Commit b7ee702

Browse files
committed
Replace all uses of unescape_unicode: no more unreachable!
1 parent 009fe25 commit b7ee702

File tree

10 files changed

+136
-165
lines changed

10 files changed

+136
-165
lines changed

Diff for: compiler/rustc_ast/src/util/literal.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ impl LitKind {
8989
// programs with many long strings containing escapes.
9090
unescape_str(
9191
s,
92-
&mut #[inline(always)]
92+
#[inline(always)]
9393
|_, res| match res {
9494
Ok(c) => buf.push(c),
9595
Err(err) => {
@@ -110,7 +110,7 @@ impl LitKind {
110110
token::ByteStr => {
111111
let s = symbol.as_str();
112112
let mut buf = Vec::with_capacity(s.len());
113-
unescape_byte_str(s, &mut |_, res| match res {
113+
unescape_byte_str(s, |_, res| match res {
114114
Ok(b) => buf.push(b),
115115
Err(err) => {
116116
assert!(!err.is_fatal(), "failed to unescape string literal")
@@ -127,7 +127,7 @@ impl LitKind {
127127
token::CStr => {
128128
let s = symbol.as_str();
129129
let mut buf = Vec::with_capacity(s.len());
130-
unescape_cstr(s, &mut |_span, c| match c {
130+
unescape_cstr(s, |_span, c| match c {
131131
Ok(MixedUnit::Char(c)) => {
132132
buf.extend_from_slice(c.get().encode_utf8(&mut [0; 4]).as_bytes())
133133
}

Diff for: compiler/rustc_lexer/src/unescape.rs

+44-23
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ macro_rules! check {
139139
" literal (without quotes) and produce a sequence of results of ",
140140
stringify!($unit_ty), " or error (returned via `callback`).",
141141
"\nNB: Raw strings don't do any unescaping, but do produce errors on bare CR.")]
142-
pub fn $check(src: &str, callback: &mut impl FnMut(Range<usize>, Result<$unit, EscapeError>))
142+
pub fn $check(src: &str, mut callback: impl FnMut(Range<usize>, Result<$unit, EscapeError>))
143143
{
144144
src.char_indices().for_each(|(pos, c)| {
145145
callback(
@@ -162,7 +162,7 @@ macro_rules! unescape {
162162
#[doc = concat!("Take the contents of a ", stringify!($string_ty),
163163
" literal (without quotes) and produce a sequence of results of escaped ",
164164
stringify!($unit_ty), " or error (returned via `callback`).")]
165-
pub fn $unescape(src: &str, callback: &mut impl FnMut(Range<usize>, Result<$unit, EscapeError>))
165+
pub fn $unescape(src: &str, mut callback: impl FnMut(Range<usize>, Result<$unit, EscapeError>))
166166
{
167167
let mut chars = src.chars();
168168
while let Some(c) = chars.next() {
@@ -356,36 +356,57 @@ fn unicode_escape(chars: &mut impl Iterator<Item = char>) -> Result<u32, EscapeE
356356
}
357357
}
358358

359-
/// Takes the contents of a unicode-only (non-mixed-utf8) literal (without quotes)
360-
/// and produces a sequence of unescaped characters or errors,
359+
/// Takes the contents of a literal (without quotes)
360+
/// and produces a sequence of errors,
361361
/// which are returned by invoking `callback`.
362-
///
363-
/// For `Char` and `Byte` modes, the callback will be called exactly once.
364-
pub fn unescape_unicode<F>(src: &str, mode: Mode, callback: &mut F)
365-
where
366-
F: FnMut(Range<usize>, Result<char, EscapeError>),
367-
{
368-
let mut byte_callback =
369-
|range, res: Result<u8, EscapeError>| callback(range, res.map(char::from));
362+
pub fn unescape_for_errors(
363+
src: &str,
364+
mode: Mode,
365+
mut error_callback: impl FnMut(Range<usize>, EscapeError),
366+
) {
370367
match mode {
371368
Char => {
372369
let mut chars = src.chars();
373-
let res = unescape_char_iter(&mut chars);
374-
callback(0..(src.len() - chars.as_str().len()), res);
370+
if let Err(e) = unescape_char_iter(&mut chars) {
371+
error_callback(0..(src.len() - chars.as_str().len()), e);
372+
}
375373
}
376374
Byte => {
377375
let mut chars = src.chars();
378-
let res = unescape_byte_iter(&mut chars).map(char::from);
379-
callback(0..(src.len() - chars.as_str().len()), res);
376+
if let Err(e) = unescape_byte_iter(&mut chars) {
377+
error_callback(0..(src.len() - chars.as_str().len()), e);
378+
}
380379
}
381-
Str => unescape_str(src, callback),
382-
ByteStr => unescape_byte_str(src, &mut byte_callback),
383-
RawStr => check_raw_str(src, callback),
384-
RawByteStr => check_raw_byte_str(src, &mut byte_callback),
385-
RawCStr => check_raw_cstr(src, &mut |r, res: Result<NonZero<char>, EscapeError>| {
386-
callback(r, res.map(|c| c.get()))
380+
Str => unescape_str(src, |range, res| {
381+
if let Err(e) = res {
382+
error_callback(range, e);
383+
}
384+
}),
385+
ByteStr => unescape_byte_str(src, |range, res| {
386+
if let Err(e) = res {
387+
error_callback(range, e);
388+
}
389+
}),
390+
CStr => unescape_cstr(src, |range, res| {
391+
if let Err(e) = res {
392+
error_callback(range, e);
393+
}
394+
}),
395+
RawStr => check_raw_str(src, |range, res| {
396+
if let Err(e) = res {
397+
error_callback(range, e);
398+
}
399+
}),
400+
RawByteStr => check_raw_byte_str(src, |range, res| {
401+
if let Err(e) = res {
402+
error_callback(range, e);
403+
}
404+
}),
405+
RawCStr => check_raw_cstr(src, |range, res| {
406+
if let Err(e) = res {
407+
error_callback(range, e);
408+
}
387409
}),
388-
CStr => unreachable!(),
389410
}
390411
}
391412

Diff for: compiler/rustc_lexer/src/unescape/tests.rs

+7-7
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ fn test_unescape_char_good() {
100100
fn test_unescape_str_warn() {
101101
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
102102
let mut unescaped = Vec::with_capacity(literal.len());
103-
unescape_unicode(literal, Mode::Str, &mut |range, res| unescaped.push((range, res)));
103+
unescape_str(literal, |range, res| unescaped.push((range, res)));
104104
assert_eq!(unescaped, expected);
105105
}
106106

@@ -124,7 +124,7 @@ fn test_unescape_str_warn() {
124124
fn test_unescape_str_good() {
125125
fn check(literal_text: &str, expected: &str) {
126126
let mut buf = Ok(String::with_capacity(literal_text.len()));
127-
unescape_unicode(literal_text, Mode::Str, &mut |range, c| {
127+
unescape_str(literal_text, |range, c| {
128128
if let Ok(b) = &mut buf {
129129
match c {
130130
Ok(c) => b.push(c),
@@ -241,7 +241,7 @@ fn test_unescape_byte_good() {
241241
fn test_unescape_byte_str_good() {
242242
fn check(literal_text: &str, expected: &[u8]) {
243243
let mut buf = Ok(Vec::with_capacity(literal_text.len()));
244-
unescape_unicode(literal_text, Mode::ByteStr, &mut |range, c| {
244+
unescape_byte_str(literal_text, |range, c| {
245245
if let Ok(b) = &mut buf {
246246
match c {
247247
Ok(c) => b.push(c as u8),
@@ -264,7 +264,7 @@ fn test_unescape_byte_str_good() {
264264
fn test_unescape_raw_str() {
265265
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
266266
let mut unescaped = Vec::with_capacity(literal.len());
267-
unescape_unicode(literal, Mode::RawStr, &mut |range, res| unescaped.push((range, res)));
267+
check_raw_str(literal, |range, res| unescaped.push((range, res)));
268268
assert_eq!(unescaped, expected);
269269
}
270270

@@ -274,13 +274,13 @@ fn test_unescape_raw_str() {
274274

275275
#[test]
276276
fn test_unescape_raw_byte_str() {
277-
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
277+
fn check(literal: &str, expected: &[(Range<usize>, Result<u8, EscapeError>)]) {
278278
let mut unescaped = Vec::with_capacity(literal.len());
279-
unescape_unicode(literal, Mode::RawByteStr, &mut |range, res| unescaped.push((range, res)));
279+
check_raw_byte_str(literal, |range, res| unescaped.push((range, res)));
280280
assert_eq!(unescaped, expected);
281281
}
282282

283283
check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]);
284284
check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByte))]);
285-
check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok('a'))]);
285+
check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok(b'a'))]);
286286
}

Diff for: compiler/rustc_parse/src/lexer/mod.rs

+27-63
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use std::ops::Range;
2-
31
use rustc_ast::ast::{self, AttrStyle};
42
use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
53
use rustc_ast::tokenstream::TokenStream;
@@ -525,7 +523,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
525523
}
526524
err.emit()
527525
}
528-
self.cook_unicode(token::Char, Mode::Char, start, end, 1, 1) // ' '
526+
self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' '
529527
}
530528
rustc_lexer::LiteralKind::Byte { terminated } => {
531529
if !terminated {
@@ -537,7 +535,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
537535
.with_code(E0763)
538536
.emit()
539537
}
540-
self.cook_unicode(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
538+
self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
541539
}
542540
rustc_lexer::LiteralKind::Str { terminated } => {
543541
if !terminated {
@@ -549,7 +547,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
549547
.with_code(E0765)
550548
.emit()
551549
}
552-
self.cook_unicode(token::Str, Mode::Str, start, end, 1, 1) // " "
550+
self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " "
553551
}
554552
rustc_lexer::LiteralKind::ByteStr { terminated } => {
555553
if !terminated {
@@ -561,7 +559,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
561559
.with_code(E0766)
562560
.emit()
563561
}
564-
self.cook_unicode(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
562+
self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
565563
}
566564
rustc_lexer::LiteralKind::CStr { terminated } => {
567565
if !terminated {
@@ -573,13 +571,13 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
573571
.with_code(E0767)
574572
.emit()
575573
}
576-
self.cook_mixed(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
574+
self.cook_quoted(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
577575
}
578576
rustc_lexer::LiteralKind::RawStr { n_hashes } => {
579577
if let Some(n_hashes) = n_hashes {
580578
let n = u32::from(n_hashes);
581579
let kind = token::StrRaw(n_hashes);
582-
self.cook_unicode(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "##
580+
self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "##
583581
} else {
584582
self.report_raw_str_error(start, 1);
585583
}
@@ -588,7 +586,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
588586
if let Some(n_hashes) = n_hashes {
589587
let n = u32::from(n_hashes);
590588
let kind = token::ByteStrRaw(n_hashes);
591-
self.cook_unicode(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "##
589+
self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "##
592590
} else {
593591
self.report_raw_str_error(start, 2);
594592
}
@@ -597,7 +595,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
597595
if let Some(n_hashes) = n_hashes {
598596
let n = u32::from(n_hashes);
599597
let kind = token::CStrRaw(n_hashes);
600-
self.cook_unicode(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
598+
self.cook_quoted(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
601599
} else {
602600
self.report_raw_str_error(start, 2);
603601
}
@@ -913,40 +911,36 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
913911
self.dcx().emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
914912
}
915913

916-
fn cook_common(
914+
fn cook_quoted(
917915
&self,
918916
mut kind: token::LitKind,
919917
mode: Mode,
920918
start: BytePos,
921919
end: BytePos,
922920
prefix_len: u32,
923921
postfix_len: u32,
924-
unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)),
925922
) -> (token::LitKind, Symbol) {
926923
let content_start = start + BytePos(prefix_len);
927924
let content_end = end - BytePos(postfix_len);
928925
let lit_content = self.str_from_to(content_start, content_end);
929-
unescape(lit_content, mode, &mut |range, result| {
930-
// Here we only check for errors. The actual unescaping is done later.
931-
if let Err(err) = result {
932-
let span_with_quotes = self.mk_sp(start, end);
933-
let (start, end) = (range.start as u32, range.end as u32);
934-
let lo = content_start + BytePos(start);
935-
let hi = lo + BytePos(end - start);
936-
let span = self.mk_sp(lo, hi);
937-
let is_fatal = err.is_fatal();
938-
if let Some(guar) = emit_unescape_error(
939-
self.dcx(),
940-
lit_content,
941-
span_with_quotes,
942-
span,
943-
mode,
944-
range,
945-
err,
946-
) {
947-
assert!(is_fatal);
948-
kind = token::Err(guar);
949-
}
926+
unescape::unescape_for_errors(lit_content, mode, |range, err| {
927+
let span_with_quotes = self.mk_sp(start, end);
928+
let (start, end) = (range.start as u32, range.end as u32);
929+
let lo = content_start + BytePos(start);
930+
let hi = lo + BytePos(end - start);
931+
let span = self.mk_sp(lo, hi);
932+
let is_fatal = err.is_fatal();
933+
if let Some(guar) = emit_unescape_error(
934+
self.dcx(),
935+
lit_content,
936+
span_with_quotes,
937+
span,
938+
mode,
939+
range,
940+
err,
941+
) {
942+
assert!(is_fatal);
943+
kind = token::Err(guar);
950944
}
951945
});
952946

@@ -959,36 +953,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
959953
};
960954
(kind, sym)
961955
}
962-
963-
fn cook_unicode(
964-
&self,
965-
kind: token::LitKind,
966-
mode: Mode,
967-
start: BytePos,
968-
end: BytePos,
969-
prefix_len: u32,
970-
postfix_len: u32,
971-
) -> (token::LitKind, Symbol) {
972-
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
973-
unescape::unescape_unicode(src, mode, &mut |span, result| {
974-
callback(span, result.map(drop))
975-
})
976-
})
977-
}
978-
979-
fn cook_mixed(
980-
&self,
981-
kind: token::LitKind,
982-
mode: Mode,
983-
start: BytePos,
984-
end: BytePos,
985-
prefix_len: u32,
986-
postfix_len: u32,
987-
) -> (token::LitKind, Symbol) {
988-
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, _mode, callback| {
989-
unescape::unescape_cstr(src, &mut |span, result| callback(span, result.map(drop)))
990-
})
991-
}
992956
}
993957

994958
pub fn nfc_normalize(string: &str) -> Symbol {

Diff for: compiler/rustc_parse_format/src/lib.rs

+3-5
Original file line numberDiff line numberDiff line change
@@ -1094,11 +1094,9 @@ fn find_width_map_from_snippet(
10941094
fn unescape_string(string: &str) -> Option<String> {
10951095
let mut buf = String::new();
10961096
let mut ok = true;
1097-
unescape::unescape_unicode(string, unescape::Mode::Str, &mut |_, unescaped_char| {
1098-
match unescaped_char {
1099-
Ok(c) => buf.push(c),
1100-
Err(_) => ok = false,
1101-
}
1097+
unescape::unescape_str(string, &mut |_, unescaped_char| match unescaped_char {
1098+
Ok(c) => buf.push(c),
1099+
Err(_) => ok = false,
11021100
});
11031101

11041102
ok.then_some(buf)

Diff for: src/tools/clippy/clippy_dev/src/update_lints.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -830,7 +830,7 @@ fn remove_line_splices(s: &str) -> String {
830830
.and_then(|s| s.strip_suffix('"'))
831831
.unwrap_or_else(|| panic!("expected quoted string, found `{s}`"));
832832
let mut res = String::with_capacity(s.len());
833-
unescape::unescape_unicode(s, unescape::Mode::Str, &mut |range, ch| {
833+
unescape::unescape_str(s, |range, ch| {
834834
if ch.is_ok() {
835835
res.push_str(&s[range]);
836836
}

Diff for: src/tools/rust-analyzer/crates/hir-expand/src/attrs.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ fn unescape(s: &str) -> Option<Cow<'_, str>> {
415415
let mut buf = String::new();
416416
let mut prev_end = 0;
417417
let mut has_error = false;
418-
unescape::unescape_unicode(s, unescape::Mode::Str, &mut |char_range, unescaped_char| match (
418+
unescape::unescape_str(s, |char_range, unescaped_char| match (
419419
unescaped_char,
420420
buf.capacity() == 0,
421421
) {

0 commit comments

Comments
 (0)