Skip to content

Commit a085352

Browse files
committed
implement rfc 1054: split_whitespace() fn, deprecate words()
For now, words() is left in (but deprecated), and Words is a type alias for struct SplitWhitespace. Also cleaned up references to s.words() throughout codebase. Closes #15628
1 parent e959fab commit a085352

File tree

11 files changed

+55
-29
lines changed

11 files changed

+55
-29
lines changed

Diff for: src/libcollections/str.rs

+23-6
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ pub use core::str::{Matches, RMatches};
7878
pub use core::str::{MatchIndices, RMatchIndices};
7979
pub use core::str::{from_utf8, Chars, CharIndices, Bytes};
8080
pub use core::str::{from_utf8_unchecked, ParseBoolError};
81-
pub use rustc_unicode::str::{Words, Graphemes, GraphemeIndices};
81+
pub use rustc_unicode::str::{SplitWhitespace, Words, Graphemes, GraphemeIndices};
8282
pub use core::str::pattern;
8383

8484
/*
@@ -1739,27 +1739,44 @@ impl str {
17391739
UnicodeStr::grapheme_indices(&self[..], is_extended)
17401740
}
17411741

1742-
/// An iterator over the non-empty words of `self`.
1743-
///
1744-
/// A 'word' is a subsequence separated by any sequence of whitespace.
1745-
/// Sequences of whitespace
1746-
/// are collapsed, so empty "words" are not included.
1742+
/// An iterator over the non-empty substrings of `self` which contain no whitespace,
1743+
/// and which are separated by any amount of whitespace.
17471744
///
17481745
/// # Examples
17491746
///
17501747
/// ```
17511748
/// # #![feature(str_words)]
1749+
/// # #![allow(deprecated)]
17521750
/// let some_words = " Mary had\ta little \n\t lamb";
17531751
/// let v: Vec<&str> = some_words.words().collect();
17541752
///
17551753
/// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
17561754
/// ```
1755+
#[deprecated(reason = "words() will be removed. Use split_whitespace() instead",
1756+
since = "1.1.0")]
17571757
#[unstable(feature = "str_words",
17581758
reason = "the precise algorithm to use is unclear")]
1759+
#[allow(deprecated)]
17591760
pub fn words(&self) -> Words {
17601761
UnicodeStr::words(&self[..])
17611762
}
17621763

1764+
/// An iterator over the non-empty substrings of `self` which contain no whitespace,
1765+
/// and which are separated by any amount of whitespace.
1766+
///
1767+
/// # Examples
1768+
///
1769+
/// ```
1770+
/// let some_words = " Mary had\ta little \n\t lamb";
1771+
/// let v: Vec<&str> = some_words.split_whitespace().collect();
1772+
///
1773+
/// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
1774+
/// ```
1775+
#[stable(feature = "split_whitespace", since = "1.1.0")]
1776+
pub fn split_whitespace(&self) -> SplitWhitespace {
1777+
UnicodeStr::split_whitespace(&self[..])
1778+
}
1779+
17631780
/// Returns a string's displayed width in columns.
17641781
///
17651782
/// Control characters have zero width.

Diff for: src/libcollectionstest/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#![feature(hash)]
1515
#![feature(rand)]
1616
#![feature(rustc_private)]
17-
#![feature(str_words)]
1817
#![feature(test)]
1918
#![feature(unboxed_closures)]
2019
#![feature(unicode)]

Diff for: src/libcollectionstest/str.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -939,9 +939,9 @@ fn test_rsplitn() {
939939
}
940940

941941
#[test]
942-
fn test_words() {
942+
fn test_split_whitespace() {
943943
let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
944-
let words: Vec<&str> = data.words().collect();
944+
let words: Vec<&str> = data.split_whitespace().collect();
945945
assert_eq!(words, ["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
946946
}
947947

Diff for: src/libgetopts/lib.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@
9393

9494
#![deny(missing_docs)]
9595
#![feature(staged_api)]
96-
#![feature(str_words)]
9796
#![feature(str_char)]
9897
#![cfg_attr(test, feature(rustc_private))]
9998

@@ -773,7 +772,7 @@ pub fn usage(brief: &str, opts: &[OptGroup]) -> String {
773772

774773
// Normalize desc to contain words separated by one space character
775774
let mut desc_normalized_whitespace = String::new();
776-
for word in desc.words() {
775+
for word in desc.split_whitespace() {
777776
desc_normalized_whitespace.push_str(word);
778777
desc_normalized_whitespace.push(' ');
779778
}

Diff for: src/librustc/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
#![feature(staged_api)]
3939
#![feature(std_misc)]
4040
#![feature(path_ext)]
41-
#![feature(str_words)]
4241
#![feature(str_char)]
4342
#![feature(into_cow)]
4443
#![feature(slice_patterns)]

Diff for: src/librustc/session/config.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ macro_rules! options {
418418
-> bool {
419419
match v {
420420
Some(s) => {
421-
for s in s.words() {
421+
for s in s.split_whitespace() {
422422
slot.push(s.to_string());
423423
}
424424
true
@@ -431,7 +431,7 @@ macro_rules! options {
431431
-> bool {
432432
match v {
433433
Some(s) => {
434-
let v = s.words().map(|s| s.to_string()).collect();
434+
let v = s.split_whitespace().map(|s| s.to_string()).collect();
435435
*slot = Some(v);
436436
true
437437
},

Diff for: src/librustc_unicode/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ mod u_str;
4545
pub mod char;
4646

4747
pub mod str {
48-
pub use u_str::{UnicodeStr, Words, Graphemes, GraphemeIndices};
48+
pub use u_str::{UnicodeStr, SplitWhitespace, Words, Graphemes, GraphemeIndices};
4949
pub use u_str::{utf8_char_width, is_utf16, Utf16Items, Utf16Item};
5050
pub use u_str::{utf16_items, Utf16Encoder};
5151
}

Diff for: src/librustc_unicode/u_str.rs

+21-6
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,16 @@ use core::str::Split;
2727

2828
use tables::grapheme::GraphemeCat;
2929

30-
/// An iterator over the words of a string, separated by a sequence of whitespace
31-
#[stable(feature = "rust1", since = "1.0.0")]
32-
pub struct Words<'a> {
30+
#[deprecated(reason = "struct Words is being replaced by struct SplitWhitespace",
31+
since = "1.1.0")]
32+
#[unstable(feature = "unicode",
33+
reason = "per RFC 1054, deprecating in favor of SplitWhitespace")]
34+
pub type Words<'a> = SplitWhitespace<'a>;
35+
36+
/// An iterator over the non-whitespace substrings of a string,
37+
/// separated by any amount of whitespace.
38+
#[stable(feature = "split_whitespace", since = "1.1.0")]
39+
pub struct SplitWhitespace<'a> {
3340
inner: Filter<Split<'a, fn(char) -> bool>, fn(&&str) -> bool>,
3441
}
3542

@@ -38,7 +45,9 @@ pub struct Words<'a> {
3845
pub trait UnicodeStr {
3946
fn graphemes<'a>(&'a self, is_extended: bool) -> Graphemes<'a>;
4047
fn grapheme_indices<'a>(&'a self, is_extended: bool) -> GraphemeIndices<'a>;
48+
#[allow(deprecated)]
4149
fn words<'a>(&'a self) -> Words<'a>;
50+
fn split_whitespace<'a>(&'a self) -> SplitWhitespace<'a>;
4251
fn is_whitespace(&self) -> bool;
4352
fn is_alphanumeric(&self) -> bool;
4453
fn width(&self, is_cjk: bool) -> usize;
@@ -58,15 +67,21 @@ impl UnicodeStr for str {
5867
GraphemeIndices { start_offset: self.as_ptr() as usize, iter: self.graphemes(is_extended) }
5968
}
6069

70+
#[allow(deprecated)]
6171
#[inline]
6272
fn words(&self) -> Words {
73+
self.split_whitespace()
74+
}
75+
76+
#[inline]
77+
fn split_whitespace(&self) -> SplitWhitespace {
6378
fn is_not_empty(s: &&str) -> bool { !s.is_empty() }
6479
let is_not_empty: fn(&&str) -> bool = is_not_empty; // coerce to fn pointer
6580

6681
fn is_whitespace(c: char) -> bool { c.is_whitespace() }
6782
let is_whitespace: fn(char) -> bool = is_whitespace; // coerce to fn pointer
6883

69-
Words { inner: self.split(is_whitespace).filter(is_not_empty) }
84+
SplitWhitespace { inner: self.split(is_whitespace).filter(is_not_empty) }
7085
}
7186

7287
#[inline]
@@ -547,11 +562,11 @@ impl<I> Iterator for Utf16Encoder<I> where I: Iterator<Item=char> {
547562
}
548563
}
549564

550-
impl<'a> Iterator for Words<'a> {
565+
impl<'a> Iterator for SplitWhitespace<'a> {
551566
type Item = &'a str;
552567

553568
fn next(&mut self) -> Option<&'a str> { self.inner.next() }
554569
}
555-
impl<'a> DoubleEndedIterator for Words<'a> {
570+
impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
556571
fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() }
557572
}

Diff for: src/librustdoc/html/markdown.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ pub fn render(w: &mut fmt::Formatter, s: &str, print_toc: bool) -> fmt::Result {
274274
};
275275

276276
// Transform the contents of the header into a hyphenated string
277-
let id = s.words().map(|s| s.to_ascii_lowercase())
277+
let id = s.split_whitespace().map(|s| s.to_ascii_lowercase())
278278
.collect::<Vec<String>>().connect("-");
279279

280280
// This is a terrible hack working around how hoedown gives us rendered

Diff for: src/librustdoc/lib.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
#![feature(std_misc)]
3232
#![feature(test)]
3333
#![feature(unicode)]
34-
#![feature(str_words)]
3534
#![feature(path_ext)]
3635
#![feature(path_relative_from)]
3736
#![feature(slice_patterns)]
@@ -240,7 +239,7 @@ pub fn main_args(args: &[String]) -> isize {
240239

241240
let test_args = matches.opt_strs("test-args");
242241
let test_args: Vec<String> = test_args.iter()
243-
.flat_map(|s| s.words())
242+
.flat_map(|s| s.split_whitespace())
244243
.map(|s| s.to_string())
245244
.collect();
246245

@@ -404,13 +403,13 @@ fn rust_input(cratefile: &str, externs: core::Externs, matches: &getopts::Matche
404403
}
405404
clean::NameValue(ref x, ref value)
406405
if "passes" == *x => {
407-
for pass in value.words() {
406+
for pass in value.split_whitespace() {
408407
passes.push(pass.to_string());
409408
}
410409
}
411410
clean::NameValue(ref x, ref value)
412411
if "plugins" == *x => {
413-
for p in value.words() {
412+
for p in value.split_whitespace() {
414413
plugins.push(p.to_string());
415414
}
416415
}

Diff for: src/test/run-pass/drop-with-type-ascription-1.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,9 @@
99
// except according to those terms.
1010

1111

12-
#![feature(str_words)]
13-
1412
fn main() {
1513
let foo = "hello".to_string();
16-
let foo: Vec<&str> = foo.words().collect();
14+
let foo: Vec<&str> = foo.split_whitespace().collect();
1715
let invalid_string = &foo[0];
1816
assert_eq!(*invalid_string, "hello");
1917
}

0 commit comments

Comments
 (0)