Skip to content

Commit 67eb38e

Browse files
committed
Auto merge of #22466 - Kimundi:str_pattern_ai_safe, r=aturon
This is not a complete implementation of the RFC: - only existing methods got updated, no new ones added - doc comments are not extensive enough yet - optimizations got lost and need to be reimplemented See rust-lang/rfcs#528 Technically a [breaking-change]
2 parents dcc6ce2 + c8dd2d0 commit 67eb38e

File tree

20 files changed

+1076
-350
lines changed

20 files changed

+1076
-350
lines changed

Diff for: src/compiletest/errors.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ pub fn load_errors(testfile: &Path) -> Vec<ExpectedError> {
5858
fn parse_expected(last_nonfollow_error: Option<uint>,
5959
line_num: uint,
6060
line: &str) -> Option<(WhichLine, ExpectedError)> {
61-
let start = match line.find_str("//~") { Some(i) => i, None => return None };
61+
let start = match line.find("//~") { Some(i) => i, None => return None };
6262
let (follow, adjusts) = if line.char_at(start + 3) == '|' {
6363
(true, 0)
6464
} else {

Diff for: src/compiletest/header.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ fn parse_name_directive(line: &str, directive: &str) -> bool {
330330
pub fn parse_name_value_directive(line: &str, directive: &str)
331331
-> Option<String> {
332332
let keycolon = format!("{}:", directive);
333-
match line.find_str(&keycolon) {
333+
match line.find(&keycolon) {
334334
Some(colon) => {
335335
let value = line[(colon + keycolon.len()) .. line.len()].to_string();
336336
debug!("{}: {}", directive, value);

Diff for: src/compiletest/runtest.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -847,7 +847,7 @@ fn check_debugger_output(debugger_run_result: &ProcRes, check_lines: &[String])
847847
check_lines.iter().map(|s| {
848848
s
849849
.trim()
850-
.split_str("[...]")
850+
.split("[...]")
851851
.map(|x| x.to_string())
852852
.collect()
853853
}).collect();
@@ -866,7 +866,7 @@ fn check_debugger_output(debugger_run_result: &ProcRes, check_lines: &[String])
866866
None
867867
}
868868
} else {
869-
rest.find_str(frag)
869+
rest.find(frag)
870870
};
871871
match found {
872872
None => {

Diff for: src/libcollections/str.rs

+35-56
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ pub use core::str::{SplitN, RSplitN};
8282
pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
8383
pub use core::str::{from_utf8_unchecked, from_c_str, ParseBoolError};
8484
pub use unicode::str::{Words, Graphemes, GraphemeIndices};
85+
pub use core::str::Pattern;
86+
pub use core::str::{Searcher, ReverseSearcher, DoubleEndedSearcher, SearchStep};
8587

8688
/*
8789
Section: Creating a string
@@ -530,7 +532,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
530532
/// assert!("bananas".contains("nana"));
531533
/// ```
532534
#[stable(feature = "rust1", since = "1.0.0")]
533-
fn contains(&self, pat: &str) -> bool {
535+
fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
534536
core_str::StrExt::contains(&self[..], pat)
535537
}
536538

@@ -545,9 +547,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
545547
/// ```rust
546548
/// assert!("hello".contains_char('e'));
547549
/// ```
548-
#[unstable(feature = "collections",
549-
reason = "might get removed in favour of a more generic contains()")]
550-
fn contains_char<P: CharEq>(&self, pat: P) -> bool {
550+
#[unstable(feature = "collections")]
551+
#[deprecated(since = "1.0.0", reason = "use `contains()` with a char")]
552+
fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
551553
core_str::StrExt::contains_char(&self[..], pat)
552554
}
553555

@@ -603,7 +605,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
603605
/// assert_eq!(v, vec![""]);
604606
/// ```
605607
#[stable(feature = "rust1", since = "1.0.0")]
606-
fn split<P: CharEq>(&self, pat: P) -> Split<P> {
608+
fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
607609
core_str::StrExt::split(&self[..], pat)
608610
}
609611

@@ -630,7 +632,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
630632
/// assert_eq!(v, vec![""]);
631633
/// ```
632634
#[stable(feature = "rust1", since = "1.0.0")]
633-
fn splitn<P: CharEq>(&self, count: usize, pat: P) -> SplitN<P> {
635+
fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
634636
core_str::StrExt::splitn(&self[..], count, pat)
635637
}
636638

@@ -658,8 +660,8 @@ pub trait StrExt: Index<RangeFull, Output = str> {
658660
/// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
659661
/// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
660662
/// ```
661-
#[unstable(feature = "collections", reason = "might get removed")]
662-
fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
663+
#[stable(feature = "rust1", since = "1.0.0")]
664+
fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
663665
core_str::StrExt::split_terminator(&self[..], pat)
664666
}
665667

@@ -680,7 +682,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
680682
/// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
681683
/// ```
682684
#[stable(feature = "rust1", since = "1.0.0")]
683-
fn rsplitn<P: CharEq>(&self, count: usize, pat: P) -> RSplitN<P> {
685+
fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> {
684686
core_str::StrExt::rsplitn(&self[..], count, pat)
685687
}
686688

@@ -706,7 +708,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
706708
/// ```
707709
#[unstable(feature = "collections",
708710
reason = "might have its iterator type changed")]
709-
fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
711+
// NB: Right now MatchIndices yields `(usize, usize)`,
712+
// but it would be more consistent and useful to return `(usize, &str)`
713+
fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
710714
core_str::StrExt::match_indices(&self[..], pat)
711715
}
712716

@@ -721,9 +725,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
721725
/// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
722726
/// assert_eq!(v, vec!["1", "", "2"]);
723727
/// ```
724-
#[unstable(feature = "collections",
725-
reason = "might get removed in the future in favor of a more generic split()")]
726-
fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
728+
#[unstable(feature = "collections")]
729+
#[deprecated(since = "1.0.0", reason = "use `split()` with a `&str`")]
730+
fn split_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitStr<'a, P> {
727731
core_str::StrExt::split_str(&self[..], pat)
728732
}
729733

@@ -825,7 +829,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
825829
/// assert!("banana".starts_with("ba"));
826830
/// ```
827831
#[stable(feature = "rust1", since = "1.0.0")]
828-
fn starts_with(&self, pat: &str) -> bool {
832+
fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
829833
core_str::StrExt::starts_with(&self[..], pat)
830834
}
831835

@@ -837,7 +841,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
837841
/// assert!("banana".ends_with("nana"));
838842
/// ```
839843
#[stable(feature = "rust1", since = "1.0.0")]
840-
fn ends_with(&self, pat: &str) -> bool {
844+
fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
845+
where P::Searcher: ReverseSearcher<'a>
846+
{
841847
core_str::StrExt::ends_with(&self[..], pat)
842848
}
843849

@@ -857,7 +863,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
857863
/// assert_eq!("123foo1bar123".trim_matches(|c: char| c.is_numeric()), "foo1bar");
858864
/// ```
859865
#[stable(feature = "rust1", since = "1.0.0")]
860-
fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
866+
fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
867+
where P::Searcher: DoubleEndedSearcher<'a>
868+
{
861869
core_str::StrExt::trim_matches(&self[..], pat)
862870
}
863871

@@ -877,7 +885,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
877885
/// assert_eq!("123foo1bar123".trim_left_matches(|c: char| c.is_numeric()), "foo1bar123");
878886
/// ```
879887
#[stable(feature = "rust1", since = "1.0.0")]
880-
fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
888+
fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
881889
core_str::StrExt::trim_left_matches(&self[..], pat)
882890
}
883891

@@ -897,7 +905,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
897905
/// assert_eq!("123foo1bar123".trim_right_matches(|c: char| c.is_numeric()), "123foo1bar");
898906
/// ```
899907
#[stable(feature = "rust1", since = "1.0.0")]
900-
fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
908+
fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
909+
where P::Searcher: ReverseSearcher<'a>
910+
{
901911
core_str::StrExt::trim_right_matches(&self[..], pat)
902912
}
903913

@@ -1074,7 +1084,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
10741084
/// assert_eq!(s.find(x), None);
10751085
/// ```
10761086
#[stable(feature = "rust1", since = "1.0.0")]
1077-
fn find<P: CharEq>(&self, pat: P) -> Option<usize> {
1087+
fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
10781088
core_str::StrExt::find(&self[..], pat)
10791089
}
10801090

@@ -1102,7 +1112,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
11021112
/// assert_eq!(s.rfind(x), None);
11031113
/// ```
11041114
#[stable(feature = "rust1", since = "1.0.0")]
1105-
fn rfind<P: CharEq>(&self, pat: P) -> Option<usize> {
1115+
fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
1116+
where P::Searcher: ReverseSearcher<'a>
1117+
{
11061118
core_str::StrExt::rfind(&self[..], pat)
11071119
}
11081120

@@ -1125,9 +1137,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
11251137
/// assert_eq!(s.find_str("老虎 L"), Some(6));
11261138
/// assert_eq!(s.find_str("muffin man"), None);
11271139
/// ```
1128-
#[unstable(feature = "collections",
1129-
reason = "might get removed in favor of a more generic find in the future")]
1130-
fn find_str(&self, needle: &str) -> Option<usize> {
1140+
#[unstable(feature = "collections")]
1141+
#[deprecated(since = "1.0.0", reason = "use `find()` with a `&str`")]
1142+
fn find_str<'a, P: Pattern<'a>>(&'a self, needle: P) -> Option<usize> {
11311143
core_str::StrExt::find_str(&self[..], needle)
11321144
}
11331145

@@ -2887,22 +2899,6 @@ mod bench {
28872899
b.iter(|| assert_eq!(s.split('V').count(), 3));
28882900
}
28892901

2890-
#[bench]
2891-
fn split_unicode_not_ascii(b: &mut Bencher) {
2892-
struct NotAscii(char);
2893-
impl CharEq for NotAscii {
2894-
fn matches(&mut self, c: char) -> bool {
2895-
let NotAscii(cc) = *self;
2896-
cc == c
2897-
}
2898-
fn only_ascii(&self) -> bool { false }
2899-
}
2900-
let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2901-
2902-
b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2903-
}
2904-
2905-
29062902
#[bench]
29072903
fn split_ascii(b: &mut Bencher) {
29082904
let s = "Mary had a little lamb, Little lamb, little-lamb.";
@@ -2911,23 +2907,6 @@ mod bench {
29112907
b.iter(|| assert_eq!(s.split(' ').count(), len));
29122908
}
29132909

2914-
#[bench]
2915-
fn split_not_ascii(b: &mut Bencher) {
2916-
struct NotAscii(char);
2917-
impl CharEq for NotAscii {
2918-
#[inline]
2919-
fn matches(&mut self, c: char) -> bool {
2920-
let NotAscii(cc) = *self;
2921-
cc == c
2922-
}
2923-
fn only_ascii(&self) -> bool { false }
2924-
}
2925-
let s = "Mary had a little lamb, Little lamb, little-lamb.";
2926-
let len = s.split(' ').count();
2927-
2928-
b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2929-
}
2930-
29312910
#[bench]
29322911
fn split_extern_fn(b: &mut Bencher) {
29332912
let s = "Mary had a little lamb, Little lamb, little-lamb.";

Diff for: src/libcore/char.rs

+15-12
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@ use option::Option;
2222
use slice::SliceExt;
2323

2424
// UTF-8 ranges and tags for encoding characters
25-
static TAG_CONT: u8 = 0b1000_0000u8;
26-
static TAG_TWO_B: u8 = 0b1100_0000u8;
27-
static TAG_THREE_B: u8 = 0b1110_0000u8;
28-
static TAG_FOUR_B: u8 = 0b1111_0000u8;
29-
static MAX_ONE_B: u32 = 0x80u32;
30-
static MAX_TWO_B: u32 = 0x800u32;
31-
static MAX_THREE_B: u32 = 0x10000u32;
25+
const TAG_CONT: u8 = 0b1000_0000u8;
26+
const TAG_TWO_B: u8 = 0b1100_0000u8;
27+
const TAG_THREE_B: u8 = 0b1110_0000u8;
28+
const TAG_FOUR_B: u8 = 0b1111_0000u8;
29+
const MAX_ONE_B: u32 = 0x80u32;
30+
const MAX_TWO_B: u32 = 0x800u32;
31+
const MAX_THREE_B: u32 = 0x10000u32;
3232

3333
/*
3434
Lu Uppercase_Letter an uppercase letter
@@ -398,11 +398,14 @@ impl CharExt for char {
398398
#[stable(feature = "rust1", since = "1.0.0")]
399399
fn len_utf8(self) -> usize {
400400
let code = self as u32;
401-
match () {
402-
_ if code < MAX_ONE_B => 1,
403-
_ if code < MAX_TWO_B => 2,
404-
_ if code < MAX_THREE_B => 3,
405-
_ => 4,
401+
if code < MAX_ONE_B {
402+
1
403+
} else if code < MAX_TWO_B {
404+
2
405+
} else if code < MAX_THREE_B {
406+
3
407+
} else {
408+
4
406409
}
407410
}
408411

Diff for: src/libcore/slice.rs

+4
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,8 @@ macro_rules! iterator {
657657
fn next(&mut self) -> Option<$elem> {
658658
// could be implemented with slices, but this avoids bounds checks
659659
unsafe {
660+
::intrinsics::assume(!self.ptr.is_null());
661+
::intrinsics::assume(!self.end.is_null());
660662
if self.ptr == self.end {
661663
None
662664
} else {
@@ -693,6 +695,8 @@ macro_rules! iterator {
693695
fn next_back(&mut self) -> Option<$elem> {
694696
// could be implemented with slices, but this avoids bounds checks
695697
unsafe {
698+
::intrinsics::assume(!self.ptr.is_null());
699+
::intrinsics::assume(!self.end.is_null());
696700
if self.end == self.ptr {
697701
None
698702
} else {

0 commit comments

Comments
 (0)