From df73b268e5f66b9fc55974a78586b8fa9f956c31 Mon Sep 17 00:00:00 2001
From: Aleksey Kladov <aleksey.kladov@gmail.com>
Date: Sun, 18 Aug 2019 12:16:57 +0300
Subject: [PATCH] convert \r\n -> \n in include_str! macro

Ideally, the meaning of the program should be independent of the line
endings used, because, for example, git can change line endings during
a checkout.

We currently do line-ending conversion in almost all cases, with
`include_str` being an exception. This commit removes this exception,
bringing `include_str` closer in behavior to string literals.

Note that this is technically a breaking change.

In case that you really mean to include a string with DOS line
endings, you can use `include_bytes!` macro which is guaranteed to not
do any translation, like this

    pub fn my_text() -> &'static str {
        unsafe {
            std::str::from_utf8_unchecked(MY_TEXT_BYTES);
        }
    }

    const MY_TEXT_BYTES: &[u8] = include_bytes("my_text.bin");

    #[test]
    fn test_encoding() {
        std::str::from_utf8(MY_TEXT_BYTES)
	    .unwrap();
    }
---
 src/libcore/macros.rs                            |  4 +++-
 src/libsyntax_ext/source_util.rs                 | 16 ++++++----------
 src/test/ui/include-macros/normalization.rs      |  2 +-
 ...lf-line-endings-string-literal-doc-comment.rs |  3 ++-
 4 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/src/libcore/macros.rs b/src/libcore/macros.rs
index 6c88a766a2f10..b048546b2104e 100644
--- a/src/libcore/macros.rs
+++ b/src/libcore/macros.rs
@@ -978,7 +978,9 @@ pub(crate) mod builtin {
     /// modules are found)
     ///
     /// This macro will yield an expression of type `&'static str` which is the
-    /// contents of the file.
+    /// contents of the file. The string is normalized:
+    ///   * Byte Order Mark (BOM), if any, is removed,
+    ///   * DOS line endings (`\r\n`) are converted to `\n`.
     ///
     /// # Examples
     ///
diff --git a/src/libsyntax_ext/source_util.rs b/src/libsyntax_ext/source_util.rs
index e008ed710e4d0..67780e68794e8 100644
--- a/src/libsyntax_ext/source_util.rs
+++ b/src/libsyntax_ext/source_util.rs
@@ -112,16 +112,12 @@ pub fn expand_include_str(cx: &mut ExtCtxt<'_>, sp: Span, tts: &[tokenstream::To
         None => return DummyResult::any(sp)
     };
     let file = cx.resolve_path(file, sp);
-    match cx.source_map().load_binary_file(&file) {
-        Ok(bytes) => match std::str::from_utf8(&bytes) {
-            Ok(src) => {
-                let interned_src = Symbol::intern(&src);
-                base::MacEager::expr(cx.expr_str(sp, interned_src))
-            }
-            Err(_) => {
-                cx.span_err(sp, &format!("{} wasn't a utf-8 file", file.display()));
-                DummyResult::any(sp)
-            }
+    match cx.source_map().load_file(&file) {
+        Ok(source_file) => {
+            let src = source_file.src.as_ref()
+                .expect("freshly loaded file should have a source");
+            let interned_src = Symbol::intern(src.as_str());
+            base::MacEager::expr(cx.expr_str(sp, interned_src))
         },
         Err(e) => {
             cx.span_err(sp, &format!("couldn't read {}: {}", file.display(), e));
diff --git a/src/test/ui/include-macros/normalization.rs b/src/test/ui/include-macros/normalization.rs
index 889f08e606ec9..b2e486b89ee16 100644
--- a/src/test/ui/include-macros/normalization.rs
+++ b/src/test/ui/include-macros/normalization.rs
@@ -7,6 +7,6 @@ fn main() {
     );
     assert_eq!(
         include_str!("data.bin"),
-        "\u{FEFF}This file starts with BOM.\r\nLines are separated by \\r\\n.\r\n",
+        "This file starts with BOM.\nLines are separated by \\r\\n.\n",
     );
 }
diff --git a/src/test/ui/lexer-crlf-line-endings-string-literal-doc-comment.rs b/src/test/ui/lexer-crlf-line-endings-string-literal-doc-comment.rs
index ada253aacfb90..97eed2b458596 100644
--- a/src/test/ui/lexer-crlf-line-endings-string-literal-doc-comment.rs
+++ b/src/test/ui/lexer-crlf-line-endings-string-literal-doc-comment.rs
@@ -36,6 +36,7 @@ literal";
     assert_eq!(s, "byte string\nliteral".as_bytes());
 
     // validate that our source file has CRLF endings
-    let source = include_str!("lexer-crlf-line-endings-string-literal-doc-comment.rs");
+    let source = include_bytes!("lexer-crlf-line-endings-string-literal-doc-comment.rs");
+    let source = std::str::from_utf8(&source[..]).unwrap();
     assert!(source.contains("string\r\nliteral"));
 }