Skip to content

Commit a641996

Browse files
committed
Fix tidy and rebase fallout
Added a few bugfixes and additional testcases
1 parent c1de0a0 commit a641996

File tree

4 files changed

+171
-50
lines changed

4 files changed

+171
-50
lines changed

src/libcollections/str.rs

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2893,22 +2893,6 @@ mod bench {
28932893
b.iter(|| assert_eq!(s.split('V').count(), 3));
28942894
}
28952895

2896-
#[bench]
2897-
fn split_unicode_not_ascii(b: &mut Bencher) {
2898-
struct NotAscii(char);
2899-
impl CharEq for NotAscii {
2900-
fn matches(&mut self, c: char) -> bool {
2901-
let NotAscii(cc) = *self;
2902-
cc == c
2903-
}
2904-
fn only_ascii(&self) -> bool { false }
2905-
}
2906-
let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2907-
2908-
b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2909-
}
2910-
2911-
29122896
#[bench]
29132897
fn split_ascii(b: &mut Bencher) {
29142898
let s = "Mary had a little lamb, Little lamb, little-lamb.";
@@ -2917,23 +2901,6 @@ mod bench {
29172901
b.iter(|| assert_eq!(s.split(' ').count(), len));
29182902
}
29192903

2920-
#[bench]
2921-
fn split_not_ascii(b: &mut Bencher) {
2922-
struct NotAscii(char);
2923-
impl CharEq for NotAscii {
2924-
#[inline]
2925-
fn matches(&mut self, c: char) -> bool {
2926-
let NotAscii(cc) = *self;
2927-
cc == c
2928-
}
2929-
fn only_ascii(&self) -> bool { false }
2930-
}
2931-
let s = "Mary had a little lamb, Little lamb, little-lamb.";
2932-
let len = s.split(' ').count();
2933-
2934-
b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2935-
}
2936-
29372904
#[bench]
29382905
fn split_extern_fn(b: &mut Bencher) {
29392906
let s = "Mary had a little lamb, Little lamb, little-lamb.";

src/libcore/str/mod.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,6 @@ impl FromStr for bool {
156156

157157
/// An error returned when parsing a `bool` from a string fails.
158158
#[derive(Debug, Clone, PartialEq)]
159-
#[allow(missing_copy_implementations)]
160159
#[stable(feature = "rust1", since = "1.0.0")]
161160
pub struct ParseBoolError { _priv: () }
162161

@@ -235,7 +234,7 @@ pub unsafe fn from_utf8_unchecked<'a>(v: &'a [u8]) -> &'a str {
235234
pub unsafe fn from_c_str(s: *const i8) -> &'static str {
236235
let s = s as *const u8;
237236
let mut len = 0;
238-
while *s.offset(len as int) != 0 {
237+
while *s.offset(len as isize) != 0 {
239238
len += 1;
240239
}
241240
let v: &'static [u8] = ::mem::transmute(Slice { data: s, len: len });
@@ -258,7 +257,7 @@ impl CharEq for char {
258257
fn matches(&mut self, c: char) -> bool { *self == c }
259258

260259
#[inline]
261-
fn only_ascii(&self) -> bool { (*self as usize) < 128 }
260+
fn only_ascii(&self) -> bool { (*self as u32) < 128 }
262261
}
263262

264263
impl<F> CharEq for F where F: FnMut(char) -> bool {
@@ -764,7 +763,8 @@ impl TwoWaySearcher {
764763
// How far we can jump when we encounter a mismatch is all based on the fact
765764
// that (u, v) is a critical factorization for the needle.
766765
#[inline]
767-
fn next(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> Option<(usize, usize)> {
766+
fn next(&mut self, haystack: &[u8], needle: &[u8], long_period: bool)
767+
-> Option<(usize, usize)> {
768768
'search: loop {
769769
// Check that we have room to search in
770770
if self.position + needle.len() > haystack.len() {
@@ -955,6 +955,7 @@ Section: Comparing strings
955955
/// to compare &[u8] byte slices that are not necessarily valid UTF-8.
956956
#[inline]
957957
fn eq_slice_(a: &str, b: &str) -> bool {
958+
// NOTE: In theory n should be libc::size_t and not usize, but libc is not available here
958959
#[allow(improper_ctypes)]
959960
extern { fn memcmp(s1: *const i8, s2: *const i8, n: usize) -> i32; }
960961
a.len() == b.len() && unsafe {
@@ -1489,7 +1490,7 @@ impl StrExt for str {
14891490
fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
14901491
where P::Searcher: DoubleEndedSearcher<'a> {
14911492
let mut i = 0;
1492-
let mut j = self.len();
1493+
let mut j = 0;
14931494
let mut matcher = pat.into_searcher(self);
14941495
if let Some((a, b)) = matcher.next_reject() {
14951496
i = a;
@@ -1507,7 +1508,7 @@ impl StrExt for str {
15071508

15081509
#[inline]
15091510
fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
1510-
let mut i = 0;
1511+
let mut i = self.len();
15111512
let mut matcher = pat.into_searcher(self);
15121513
if let Some((a, _)) = matcher.next_reject() {
15131514
i = a;
@@ -1521,7 +1522,7 @@ impl StrExt for str {
15211522
#[inline]
15221523
fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
15231524
where P::Searcher: ReverseSearcher<'a> {
1524-
let mut j = self.len();
1525+
let mut j = 0;
15251526
let mut matcher = pat.into_searcher(self);
15261527
if let Some((_, b)) = matcher.next_reject_back() {
15271528
j = b;

src/libcore/str/pattern.rs

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ pub trait Pattern<'a>: Sized {
5858

5959
// Searcher
6060

61+
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
6162
pub enum SearchStep {
6263
Match(usize, usize),
6364
Reject(usize, usize),
@@ -190,7 +191,7 @@ impl<'a, C: CharEq> DoubleEndedSearcher<'a> for CharEqSearcher<'a, C> {}
190191

191192
// Impl for &str
192193

193-
// TODO: Optimize the naive implementation here
194+
// Todo: Optimize the naive implementation here
194195

195196
#[derive(Clone)]
196197
pub struct StrSearcher<'a, 'b> {
@@ -235,13 +236,16 @@ unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
235236
},
236237
|m: &mut StrSearcher| {
237238
// Forward step for nonempty needle
238-
let possible_match = &m.haystack[m.start .. m.start + m.needle.len()];
239+
// Compare if bytes are equal
240+
let possible_match = &m.haystack.as_bytes()[m.start .. m.start + m.needle.len()];
239241
let current_start = m.start;
240-
if possible_match == m.needle {
242+
if possible_match == m.needle.as_bytes() {
241243
m.start += m.needle.len();
242244
SearchStep::Match(current_start, m.start)
243245
} else {
244-
m.start += possible_match.chars().next().unwrap().len_utf8();
246+
// Skip a char
247+
let haystack_suffix = &m.haystack[m.start..];
248+
m.start += haystack_suffix.chars().next().unwrap().len_utf8();
245249
SearchStep::Reject(current_start, m.start)
246250
}
247251
})
@@ -262,13 +266,16 @@ unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
262266
},
263267
|m: &mut StrSearcher| {
264268
// Backward step for nonempty needle
265-
let possible_match = &m.haystack[m.end - m.needle.len() .. m.end];
269+
// Compare if bytes are equal
270+
let possible_match = &m.haystack.as_bytes()[m.end - m.needle.len() .. m.end];
266271
let current_end = m.end;
267-
if possible_match == m.needle {
272+
if possible_match == m.needle.as_bytes() {
268273
m.end -= m.needle.len();
269274
SearchStep::Match(m.end, current_end)
270275
} else {
271-
m.end -= possible_match.chars().rev().next().unwrap().len_utf8();
276+
// Skip a char
277+
let haystack_prefix = &m.haystack[..m.end];
278+
m.end -= haystack_prefix.chars().rev().next().unwrap().len_utf8();
272279
SearchStep::Reject(m.end, current_end)
273280
}
274281
})
@@ -290,6 +297,9 @@ where F: FnOnce(&mut StrSearcher) -> SearchStep,
290297
} else if m.start + m.needle.len() <= m.end {
291298
// Case for needle != ""
292299
g(&mut m)
300+
} else if m.start < m.end {
301+
m.done = true;
302+
SearchStep::Reject(m.start, m.end)
293303
} else {
294304
m.done = true;
295305
SearchStep::Done
@@ -352,7 +362,8 @@ impl<'a, F> Pattern<'a> for F where F: FnMut(char) -> bool {
352362

353363
use ops::Deref;
354364

355-
impl<'a, 'b, P: 'b + ?Sized, T: Deref<Target = P> + ?Sized> Pattern<'a> for &'b T where &'b P: Pattern<'a> {
365+
impl<'a, 'b, P: 'b + ?Sized, T: Deref<Target = P> + ?Sized> Pattern<'a> for &'b T
366+
where &'b P: Pattern<'a> {
356367
type Searcher = <&'b P as Pattern<'a>>::Searcher;
357368
associated_items!(<&'b P as Pattern<'a>>::Searcher,
358369
s, (&**s));

src/libcoretest/str.rs

Lines changed: 144 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
1+
// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
22
// file at the top-level directory of this distribution and at
33
// http://rust-lang.org/COPYRIGHT.
44
//
@@ -139,8 +139,150 @@ fn test_utf16_code_units() {
139139
vec![0xE9, 0xD83D, 0xDCA9])
140140
}
141141

142+
#[test]
143+
fn starts_with_in_unicode() {
144+
assert!(!"├── Cargo.toml".starts_with("# "));
145+
}
142146

143-
// rm x86_64-unknown-linux-gnu/stage1/test/coretesttest-x86_64-unknown-linux-gnu; env PLEASE_BENCH=1 make check-stage1-coretest TESTNAME=str::bench
147+
#[test]
148+
fn starts_short_long() {
149+
assert!(!"".starts_with("##"));
150+
assert!(!"##".starts_with("####"));
151+
assert!("####".starts_with("##"));
152+
assert!(!"##ä".starts_with("####"));
153+
assert!("####ä".starts_with("##"));
154+
assert!(!"##".starts_with("####ä"));
155+
assert!("##ä##".starts_with("##ä"));
156+
157+
assert!("".starts_with(""));
158+
assert!("ä".starts_with(""));
159+
assert!("#ä".starts_with(""));
160+
assert!("##ä".starts_with(""));
161+
assert!("ä###".starts_with(""));
162+
assert!("#ä##".starts_with(""));
163+
assert!("##ä#".starts_with(""));
164+
}
165+
166+
#[test]
167+
fn contains_weird_cases() {
168+
assert!("* \t".contains_char(' '));
169+
assert!(!"* \t".contains_char('?'));
170+
assert!(!"* \t".contains_char('\u{1F4A9}'));
171+
}
172+
173+
#[test]
174+
fn trim_ws() {
175+
assert_eq!(" \t a \t ".trim_left_matches(|c: char| c.is_whitespace()),
176+
"a \t ");
177+
assert_eq!(" \t a \t ".trim_right_matches(|c: char| c.is_whitespace()),
178+
" \t a");
179+
assert_eq!(" \t a \t ".trim_matches(|c: char| c.is_whitespace()),
180+
"a");
181+
assert_eq!(" \t \t ".trim_left_matches(|c: char| c.is_whitespace()),
182+
"");
183+
assert_eq!(" \t \t ".trim_right_matches(|c: char| c.is_whitespace()),
184+
"");
185+
assert_eq!(" \t \t ".trim_matches(|c: char| c.is_whitespace()),
186+
"");
187+
}
188+
189+
mod pattern {
190+
use std::str::Pattern;
191+
use std::str::{Searcher, ReverseSearcher, DoubleEndedSearcher};
192+
use std::str::SearchStep::{self, Match, Reject, Done};
193+
194+
macro_rules! make_test {
195+
($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => {
196+
mod $name {
197+
use std::str::Pattern;
198+
use std::str::{Searcher, ReverseSearcher, DoubleEndedSearcher};
199+
use std::str::SearchStep::{self, Match, Reject, Done};
200+
use super::{cmp_search_to_vec};
201+
#[test]
202+
fn fwd() {
203+
cmp_search_to_vec(false, $p, $h, vec![$($e),*]);
204+
}
205+
#[test]
206+
fn bwd() {
207+
cmp_search_to_vec(true, $p, $h, vec![$($e),*]);
208+
}
209+
}
210+
}
211+
}
212+
213+
fn cmp_search_to_vec<'a, P: Pattern<'a>>(rev: bool, pat: P, haystack: &'a str,
214+
right: Vec<SearchStep>)
215+
where P::Searcher: ReverseSearcher<'a>
216+
{
217+
let mut searcher = pat.into_searcher(haystack);
218+
let mut v = vec![];
219+
loop {
220+
match if !rev {searcher.next()} else {searcher.next_back()} {
221+
Match(a, b) => v.push(Match(a, b)),
222+
Reject(a, b) => v.push(Reject(a, b)),
223+
Done => break,
224+
}
225+
}
226+
if rev {
227+
v.reverse();
228+
}
229+
assert_eq!(v, right);
230+
}
231+
232+
make_test!(str_searcher_ascii_haystack, "bb", "abbcbbd", [
233+
Reject(0, 1),
234+
Match (1, 3),
235+
Reject(3, 4),
236+
Match (4, 6),
237+
Reject(6, 7),
238+
]);
239+
make_test!(str_searcher_empty_needle_ascii_haystack, "", "abbcbbd", [
240+
Match(0, 0),
241+
Match(1, 1),
242+
Match(2, 2),
243+
Match(3, 3),
244+
Match(4, 4),
245+
Match(5, 5),
246+
Match(6, 6),
247+
Match(7, 7),
248+
]);
249+
make_test!(str_searcher_mulibyte_haystack, " ", "├──", [
250+
Reject(0, 3),
251+
Reject(3, 6),
252+
Reject(6, 9),
253+
]);
254+
make_test!(str_searcher_empty_needle_mulibyte_haystack, "", "├──", [
255+
Match(0, 0),
256+
Match(3, 3),
257+
Match(6, 6),
258+
Match(9, 9),
259+
]);
260+
make_test!(str_searcher_empty_needle_empty_haystack, "", "", [
261+
Match(0, 0),
262+
]);
263+
make_test!(str_searcher_nonempty_needle_empty_haystack, "├", "", [
264+
]);
265+
make_test!(char_searcher_ascii_haystack, 'b', "abbcbbd", [
266+
Reject(0, 1),
267+
Match (1, 2),
268+
Match (2, 3),
269+
Reject(3, 4),
270+
Match (4, 5),
271+
Match (5, 6),
272+
Reject(6, 7),
273+
]);
274+
make_test!(char_searcher_mulibyte_haystack, ' ', "├──", [
275+
Reject(0, 3),
276+
Reject(3, 6),
277+
Reject(6, 9),
278+
]);
279+
make_test!(char_searcher_short_haystack, '\u{1F4A9}', "* \t", [
280+
Reject(0, 1),
281+
Reject(1, 2),
282+
Reject(2, 3),
283+
]);
284+
285+
}
144286

145287
mod bench {
146288
macro_rules! make_test_inner {

0 commit comments

Comments
 (0)