From df73b268e5f66b9fc55974a78586b8fa9f956c31 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 18 Aug 2019 12:16:57 +0300 Subject: [PATCH] convert \r\n -> \n in include_str! macro Ideally, the meaning of the program should be independent of the line endings used, because, for example, git can change line endings during a checkout. We currently do line-ending conversion in almost all cases, with `include_str` being an exception. This commit removes this exception, bringing `include_str` closer in behavior to string literals. Note that this is technically a breaking change. In case that you really mean to include a string with DOS line endings, you can use `include_bytes!` macro which is guaranteed to not do any translation, like this pub fn my_text() -> &'static str { unsafe { std::str::from_utf8_unchecked(MY_TEXT_BYTES); } } const MY_TEXT_BYTES: &[u8] = include_bytes("my_text.bin"); #[test] fn test_encoding() { std::str::from_utf8(MY_TEXT_BYTES) .unwrap(); } --- src/libcore/macros.rs | 4 +++- src/libsyntax_ext/source_util.rs | 16 ++++++---------- src/test/ui/include-macros/normalization.rs | 2 +- ...lf-line-endings-string-literal-doc-comment.rs | 3 ++- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/libcore/macros.rs b/src/libcore/macros.rs index 6c88a766a2f10..b048546b2104e 100644 --- a/src/libcore/macros.rs +++ b/src/libcore/macros.rs @@ -978,7 +978,9 @@ pub(crate) mod builtin { /// modules are found) /// /// This macro will yield an expression of type `&'static str` which is the - /// contents of the file. + /// contents of the file. The string is normalized: + /// * Byte Order Mark (BOM), if any, is removed, + /// * DOS line endings (`\r\n`) are converted to `\n`. /// /// # Examples /// diff --git a/src/libsyntax_ext/source_util.rs b/src/libsyntax_ext/source_util.rs index e008ed710e4d0..67780e68794e8 100644 --- a/src/libsyntax_ext/source_util.rs +++ b/src/libsyntax_ext/source_util.rs @@ -112,16 +112,12 @@ pub fn expand_include_str(cx: &mut ExtCtxt<'_>, sp: Span, tts: &[tokenstream::To None => return DummyResult::any(sp) }; let file = cx.resolve_path(file, sp); - match cx.source_map().load_binary_file(&file) { - Ok(bytes) => match std::str::from_utf8(&bytes) { - Ok(src) => { - let interned_src = Symbol::intern(&src); - base::MacEager::expr(cx.expr_str(sp, interned_src)) - } - Err(_) => { - cx.span_err(sp, &format!("{} wasn't a utf-8 file", file.display())); - DummyResult::any(sp) - } + match cx.source_map().load_file(&file) { + Ok(source_file) => { + let src = source_file.src.as_ref() + .expect("freshly loaded file should have a source"); + let interned_src = Symbol::intern(src.as_str()); + base::MacEager::expr(cx.expr_str(sp, interned_src)) }, Err(e) => { cx.span_err(sp, &format!("couldn't read {}: {}", file.display(), e)); diff --git a/src/test/ui/include-macros/normalization.rs b/src/test/ui/include-macros/normalization.rs index 889f08e606ec9..b2e486b89ee16 100644 --- a/src/test/ui/include-macros/normalization.rs +++ b/src/test/ui/include-macros/normalization.rs @@ -7,6 +7,6 @@ fn main() { ); assert_eq!( include_str!("data.bin"), - "\u{FEFF}This file starts with BOM.\r\nLines are separated by \\r\\n.\r\n", + "This file starts with BOM.\nLines are separated by \\r\\n.\n", ); } diff --git a/src/test/ui/lexer-crlf-line-endings-string-literal-doc-comment.rs b/src/test/ui/lexer-crlf-line-endings-string-literal-doc-comment.rs index ada253aacfb90..97eed2b458596 100644 --- a/src/test/ui/lexer-crlf-line-endings-string-literal-doc-comment.rs +++ b/src/test/ui/lexer-crlf-line-endings-string-literal-doc-comment.rs @@ -36,6 +36,7 @@ literal"; assert_eq!(s, "byte string\nliteral".as_bytes()); // validate that our source file has CRLF endings - let source = include_str!("lexer-crlf-line-endings-string-literal-doc-comment.rs"); + let source = include_bytes!("lexer-crlf-line-endings-string-literal-doc-comment.rs"); + let source = std::str::from_utf8(&source[..]).unwrap(); assert!(source.contains("string\r\nliteral")); }