implement rfc 1054: split_whitespace() fn, deprecate words()

kwantam · kwantam · commit a085352b7e21 · 2015-04-21T14:43:43.000-04:00
For now, words() is left in (but deprecated), and Words is a type alias for struct SplitWhitespace. Also cleaned up references to s.words() throughout codebase. Closes #15628
diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs
@@ -78,7 +78,7 @@ pub use core::str::{Matches, RMatches};
 pub use core::str::{MatchIndices, RMatchIndices};
 pub use core::str::{from_utf8, Chars, CharIndices, Bytes};
 pub use core::str::{from_utf8_unchecked, ParseBoolError};
-pub use rustc_unicode::str::{Words, Graphemes, GraphemeIndices};
+pub use rustc_unicode::str::{SplitWhitespace, Words, Graphemes, GraphemeIndices};
 pub use core::str::pattern;
 
 /*
@@ -1739,27 +1739,44 @@ impl str {
         UnicodeStr::grapheme_indices(&self[..], is_extended)
     }
 
-    /// An iterator over the non-empty words of `self`.
-    ///
-    /// A 'word' is a subsequence separated by any sequence of whitespace.
-    /// Sequences of whitespace
-    /// are collapsed, so empty "words" are not included.
+    /// An iterator over the non-empty substrings of `self` which contain no whitespace,
+    /// and which are separated by any amount of whitespace.
     ///
     /// # Examples
     ///
     /// ```
     /// # #![feature(str_words)]
+    /// # #![allow(deprecated)]
     /// let some_words = " Mary   had\ta little  \n\t lamb";
     /// let v: Vec<&str> = some_words.words().collect();
     ///
     /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
     /// ```
+    #[deprecated(reason = "words() will be removed. Use split_whitespace() instead",
+                 since = "1.1.0")]
     #[unstable(feature = "str_words",
                reason = "the precise algorithm to use is unclear")]
+    #[allow(deprecated)]
     pub fn words(&self) -> Words {
         UnicodeStr::words(&self[..])
     }
 
+    /// An iterator over the non-empty substrings of `self` which contain no whitespace,
+    /// and which are separated by any amount of whitespace.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// let some_words = " Mary   had\ta little  \n\t lamb";
+    /// let v: Vec<&str> = some_words.split_whitespace().collect();
+    ///
+    /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
+    /// ```
+    #[stable(feature = "split_whitespace", since = "1.1.0")]
+    pub fn split_whitespace(&self) -> SplitWhitespace {
+        UnicodeStr::split_whitespace(&self[..])
+    }
+
     /// Returns a string's displayed width in columns.
     ///
     /// Control characters have zero width.
diff --git a/src/libcollectionstest/lib.rs b/src/libcollectionstest/lib.rs
@@ -14,7 +14,6 @@
 #![feature(hash)]
 #![feature(rand)]
 #![feature(rustc_private)]
-#![feature(str_words)]
 #![feature(test)]
 #![feature(unboxed_closures)]
 #![feature(unicode)]
diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs
@@ -939,9 +939,9 @@ fn test_rsplitn() {
 }
 
 #[test]
-fn test_words() {
+fn test_split_whitespace() {
     let data = "\n \tMäry   häd\tä  little lämb\nLittle lämb\n";
-    let words: Vec<&str> = data.words().collect();
+    let words: Vec<&str> = data.split_whitespace().collect();
     assert_eq!(words, ["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
 }
 
diff --git a/src/libgetopts/lib.rs b/src/libgetopts/lib.rs
@@ -93,7 +93,6 @@
 
 #![deny(missing_docs)]
 #![feature(staged_api)]
-#![feature(str_words)]
 #![feature(str_char)]
 #![cfg_attr(test, feature(rustc_private))]
 
@@ -773,7 +772,7 @@ pub fn usage(brief: &str, opts: &[OptGroup]) -> String {
 
         // Normalize desc to contain words separated by one space character
         let mut desc_normalized_whitespace = String::new();
-        for word in desc.words() {
+        for word in desc.split_whitespace() {
             desc_normalized_whitespace.push_str(word);
             desc_normalized_whitespace.push(' ');
         }
diff --git a/src/librustc/lib.rs b/src/librustc/lib.rs
@@ -38,7 +38,6 @@
 #![feature(staged_api)]
 #![feature(std_misc)]
 #![feature(path_ext)]
-#![feature(str_words)]
 #![feature(str_char)]
 #![feature(into_cow)]
 #![feature(slice_patterns)]
diff --git a/src/librustc/session/config.rs b/src/librustc/session/config.rs
@@ -418,7 +418,7 @@ macro_rules! options {
                       -> bool {
             match v {
                 Some(s) => {
-                    for s in s.words() {
+                    for s in s.split_whitespace() {
                         slot.push(s.to_string());
                     }
                     true
@@ -431,7 +431,7 @@ macro_rules! options {
                       -> bool {
             match v {
                 Some(s) => {
-                    let v = s.words().map(|s| s.to_string()).collect();
+                    let v = s.split_whitespace().map(|s| s.to_string()).collect();
                     *slot = Some(v);
                     true
                 },
diff --git a/src/librustc_unicode/lib.rs b/src/librustc_unicode/lib.rs
@@ -45,7 +45,7 @@ mod u_str;
 pub mod char;
 
 pub mod str {
-    pub use u_str::{UnicodeStr, Words, Graphemes, GraphemeIndices};
+    pub use u_str::{UnicodeStr, SplitWhitespace, Words, Graphemes, GraphemeIndices};
     pub use u_str::{utf8_char_width, is_utf16, Utf16Items, Utf16Item};
     pub use u_str::{utf16_items, Utf16Encoder};
 }
diff --git a/src/librustc_unicode/u_str.rs b/src/librustc_unicode/u_str.rs
@@ -27,9 +27,16 @@ use core::str::Split;
 
 use tables::grapheme::GraphemeCat;
 
-/// An iterator over the words of a string, separated by a sequence of whitespace
-#[stable(feature = "rust1", since = "1.0.0")]
-pub struct Words<'a> {
+#[deprecated(reason = "struct Words is being replaced by struct SplitWhitespace",
+             since = "1.1.0")]
+#[unstable(feature = "unicode",
+           reason = "per RFC 1054, deprecating in favor of SplitWhitespace")]
+pub type Words<'a> = SplitWhitespace<'a>;
+
+/// An iterator over the non-whitespace substrings of a string,
+/// separated by any amount of whitespace.
+#[stable(feature = "split_whitespace", since = "1.1.0")]
+pub struct SplitWhitespace<'a> {
     inner: Filter<Split<'a, fn(char) -> bool>, fn(&&str) -> bool>,
 }
 
@@ -38,7 +45,9 @@ pub struct Words<'a> {
 pub trait UnicodeStr {
     fn graphemes<'a>(&'a self, is_extended: bool) -> Graphemes<'a>;
     fn grapheme_indices<'a>(&'a self, is_extended: bool) -> GraphemeIndices<'a>;
+    #[allow(deprecated)]
     fn words<'a>(&'a self) -> Words<'a>;
+    fn split_whitespace<'a>(&'a self) -> SplitWhitespace<'a>;
     fn is_whitespace(&self) -> bool;
     fn is_alphanumeric(&self) -> bool;
     fn width(&self, is_cjk: bool) -> usize;
@@ -58,15 +67,21 @@ impl UnicodeStr for str {
         GraphemeIndices { start_offset: self.as_ptr() as usize, iter: self.graphemes(is_extended) }
     }
 
+    #[allow(deprecated)]
     #[inline]
     fn words(&self) -> Words {
+        self.split_whitespace()
+    }
+
+    #[inline]
+    fn split_whitespace(&self) -> SplitWhitespace {
         fn is_not_empty(s: &&str) -> bool { !s.is_empty() }
         let is_not_empty: fn(&&str) -> bool = is_not_empty; // coerce to fn pointer
 
         fn is_whitespace(c: char) -> bool { c.is_whitespace() }
         let is_whitespace: fn(char) -> bool = is_whitespace; // coerce to fn pointer
 
-        Words { inner: self.split(is_whitespace).filter(is_not_empty) }
+        SplitWhitespace { inner: self.split(is_whitespace).filter(is_not_empty) }
     }
 
     #[inline]
@@ -547,11 +562,11 @@ impl<I> Iterator for Utf16Encoder<I> where I: Iterator<Item=char> {
     }
 }
 
-impl<'a> Iterator for Words<'a> {
+impl<'a> Iterator for SplitWhitespace<'a> {
     type Item = &'a str;
 
     fn next(&mut self) -> Option<&'a str> { self.inner.next() }
 }
-impl<'a> DoubleEndedIterator for Words<'a> {
+impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
     fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() }
 }
diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs
@@ -274,7 +274,7 @@ pub fn render(w: &mut fmt::Formatter, s: &str, print_toc: bool) -> fmt::Result {
         };
 
         // Transform the contents of the header into a hyphenated string
-        let id = s.words().map(|s| s.to_ascii_lowercase())
+        let id = s.split_whitespace().map(|s| s.to_ascii_lowercase())
             .collect::<Vec<String>>().connect("-");
 
         // This is a terrible hack working around how hoedown gives us rendered
diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs
@@ -31,7 +31,6 @@
 #![feature(std_misc)]
 #![feature(test)]
 #![feature(unicode)]
-#![feature(str_words)]
 #![feature(path_ext)]
 #![feature(path_relative_from)]
 #![feature(slice_patterns)]
@@ -240,7 +239,7 @@ pub fn main_args(args: &[String]) -> isize {
 
     let test_args = matches.opt_strs("test-args");
     let test_args: Vec<String> = test_args.iter()
-                                          .flat_map(|s| s.words())
+                                          .flat_map(|s| s.split_whitespace())
                                           .map(|s| s.to_string())
                                           .collect();
 
@@ -404,13 +403,13 @@ fn rust_input(cratefile: &str, externs: core::Externs, matches: &getopts::Matche
                     }
                     clean::NameValue(ref x, ref value)
                             if "passes" == *x => {
-                        for pass in value.words() {
+                        for pass in value.split_whitespace() {
                             passes.push(pass.to_string());
                         }
                     }
                     clean::NameValue(ref x, ref value)
                             if "plugins" == *x => {
-                        for p in value.words() {
+                        for p in value.split_whitespace() {
                             plugins.push(p.to_string());
                         }
                     }
diff --git a/src/test/run-pass/drop-with-type-ascription-1.rs b/src/test/run-pass/drop-with-type-ascription-1.rs
@@ -9,11 +9,9 @@
 // except according to those terms.
 
 
-#![feature(str_words)]
-
 fn main() {
     let foo = "hello".to_string();
-    let foo: Vec<&str> = foo.words().collect();
+    let foo: Vec<&str> = foo.split_whitespace().collect();
     let invalid_string = &foo[0];
     assert_eq!(*invalid_string, "hello");
 }

Original file line number	Diff line number	Diff line change
`@@ -939,9 +939,9 @@ fn test_rsplitn() {`
`939`	`939`	`}`
`940`	`940`
`941`	`941`	`#[test]`
`942`		`-fn test_words() {`
	`942`	`+fn test_split_whitespace() {`
`943`	`943`	`let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";`
`944`		`- let words: Vec<&str> = data.words().collect();`
	`944`	`+ let words: Vec<&str> = data.split_whitespace().collect();`
`945`	`945`	`assert_eq!(words, ["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])`
`946`	`946`	`}`
`947`	`947`
Original file line number	Diff line number	Diff line change
`@@ -45,7 +45,7 @@ mod u_str;`
`45`	`45`	`pub mod char;`
`46`	`46`
`47`	`47`	`pub mod str {`
`48`		`- pub use u_str::{UnicodeStr, Words, Graphemes, GraphemeIndices};`
	`48`	`+ pub use u_str::{UnicodeStr, SplitWhitespace, Words, Graphemes, GraphemeIndices};`
`49`	`49`	`pub use u_str::{utf8_char_width, is_utf16, Utf16Items, Utf16Item};`
`50`	`50`	`pub use u_str::{utf16_items, Utf16Encoder};`
`51`	`51`	`}`
Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,6 @@`
`31`	`31`	`#![feature(std_misc)]`
`32`	`32`	`#![feature(test)]`
`33`	`33`	`#![feature(unicode)]`
`34`		`-#![feature(str_words)]`
`35`	`34`	`#![feature(path_ext)]`
`36`	`35`	`#![feature(path_relative_from)]`
`37`	`36`	`#![feature(slice_patterns)]`
`@@ -240,7 +239,7 @@ pub fn main_args(args: &[String]) -> isize {`
`240`	`239`
`241`	`240`	`let test_args = matches.opt_strs("test-args");`
`242`	`241`	`let test_args: Vec<String> = test_args.iter()`
`243`		`- .flat_map(\|s\| s.words())`
	`242`	`+ .flat_map(\|s\| s.split_whitespace())`
`244`	`243`	`.map(\|s\| s.to_string())`
`245`	`244`	`.collect();`
`246`	`245`
`@@ -404,13 +403,13 @@ fn rust_input(cratefile: &str, externs: core::Externs, matches: &getopts::Matche`
`404`	`403`	`}`
`405`	`404`	`clean::NameValue(ref x, ref value)`
`406`	`405`	`if "passes" == *x => {`
`407`		`- for pass in value.words() {`
	`406`	`+ for pass in value.split_whitespace() {`
`408`	`407`	`passes.push(pass.to_string());`
`409`	`408`	`}`
`410`	`409`	`}`
`411`	`410`	`clean::NameValue(ref x, ref value)`
`412`	`411`	`if "plugins" == *x => {`
`413`		`- for p in value.words() {`
	`412`	`+ for p in value.split_whitespace() {`
`414`	`413`	`plugins.push(p.to_string());`
`415`	`414`	`}`
`416`	`415`	`}`