@@ -463,7 +463,7 @@ pub fn each_split_char_nonempty(s: &str, sep: char, it: &fn(&str) -> bool) {
463
463
}
464
464
465
465
fn each_split_char_inner ( s : & str , sep : char , count : uint , allow_empty : bool ,
466
- allow_trailing_empty : bool ) , it: & fn ( & str ) -> bool ) {
466
+ allow_trailing_empty : bool , it : & fn ( & str ) -> bool ) {
467
467
if sep < 128 u as char {
468
468
let b = sep as u8 , l = len ( s) ;
469
469
let mut done = 0 u;
@@ -513,8 +513,8 @@ pub fn each_split_nonempty(s: &str, sepfn: &fn(char) -> bool, it: &fn(&str) -> b
513
513
each_split_inner ( s, sepfn, len ( s) , false , false , it)
514
514
}
515
515
516
- pure fn each_split_inner ( s : & str , sepfn : & fn ( cc : char ) -> bool , count : uint ,
517
- allow_empty : bool , allow_trailing_empty : bool ) , it: & fn ( & str ) -> bool ) {
516
+ fn each_split_inner ( s : & str , sepfn : & fn ( cc : char ) -> bool , count : uint ,
517
+ allow_empty : bool , allow_trailing_empty : bool , it : & fn ( & str ) -> bool ) {
518
518
let l = len ( s) ;
519
519
let mut i = 0 u, start = 0 u, done = 0 u;
520
520
while i < l && done < count {
@@ -534,7 +534,7 @@ pure fn each_split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint,
534
534
}
535
535
536
536
// See Issue #1932 for why this is a naive search
537
- fn iter_matches ( s : & ' a str , sep : & ' b str , f : & fn ( uint , uint ) ) {
537
+ fn iter_matches ( s : & ' a str , sep : & ' b str , f : & fn ( uint , uint ) -> bool ) {
538
538
let sep_len = len ( sep) , l = len ( s) ;
539
539
fail_unless ! ( sep_len > 0 u) ;
540
540
let mut i = 0 u, match_start = 0 u, match_i = 0 u;
@@ -545,7 +545,7 @@ fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
545
545
match_i += 1 u;
546
546
// Found a match
547
547
if match_i == sep_len {
548
- f ( match_start, i + 1 u) ;
548
+ if ! f ( match_start, i + 1 u) { return ; }
549
549
match_i = 0 u;
550
550
}
551
551
i += 1 u;
@@ -561,10 +561,10 @@ fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
561
561
}
562
562
}
563
563
564
- fn iter_between_matches ( s : & ' a str , sep : & ' b str , f : & fn ( uint , uint ) ) {
564
+ fn iter_between_matches ( s : & ' a str , sep : & ' b str , f : & fn ( uint , uint ) -> bool ) {
565
565
let mut last_end = 0 u;
566
- do iter_matches ( s, sep) |from, to| {
567
- f ( last_end, from) ;
566
+ for iter_matches( s, sep) |from, to| {
567
+ if ! f ( last_end, from) { return ; }
568
568
last_end = to;
569
569
}
570
570
f ( last_end, len ( s) ) ;
@@ -580,13 +580,13 @@ fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
580
580
* ~~~
581
581
*/
582
582
pub fn each_split_str ( s : & ' a str , sep : & ' b str , it : & fn ( & str ) -> bool ) {
583
- do iter_between_matches ( s, sep) |from, to| {
583
+ for iter_between_matches( s, sep) |from, to| {
584
584
if !it ( unsafe { raw:: slice_bytes ( s, from, to) } ) { return ; }
585
585
}
586
586
}
587
587
588
588
pub fn each_split_str_nonempty ( s : & ' a str , sep : & ' b str , it : & fn ( & str ) -> bool ) {
589
- do iter_between_matches ( s, sep) |from, to| {
589
+ for iter_between_matches( s, sep) |from, to| {
590
590
if to > from {
591
591
if !it ( unsafe { raw:: slice_bytes ( s, from, to) } ) { return ; }
592
592
}
@@ -630,7 +630,7 @@ pub fn levdistance(s: &str, t: &str) -> uint {
630
630
/**
631
631
* Splits a string into a vector of the substrings separated by LF ('\n').
632
632
*/
633
- pub fn each_line ( s : & str , it : & fn ( & str ) -> bool ) { each_split_char ( s, '\n' , it) }
633
+ pub fn each_line ( s : & str , it : & fn ( & str ) -> bool ) { each_split_char_no_trailing ( s, '\n' , it) }
634
634
635
635
/**
636
636
* Splits a string into a vector of the substrings separated by LF ('\n')
@@ -656,52 +656,56 @@ pub fn each_word(s: &str, it: &fn(&str) -> bool) {
656
656
* each of which is less bytes long than a limit
657
657
*/
658
658
pub fn each_split_within( ss : & str , lim : uint , it : & fn ( & str ) -> bool ) {
659
- let words = str:: words ( ss) ;
660
-
661
- // empty?
662
- if words == ~[ ] { return ~[ ] ; }
663
-
664
- let mut rows : ~[ ~str ] = ~[ ] ;
665
- let mut row : ~str = ~"";
666
-
667
- for words. each |wptr| {
668
- let word = copy * wptr;
669
-
670
- // if adding this word to the row would go over the limit,
671
- // then start a new row
672
- if row. len ( ) + word. len ( ) + 1 > lim {
673
- rows. push ( copy row) ; // save previous row
674
- row = word; // start a new one
675
- } else {
676
- if row. len ( ) > 0 { row += ~" " } // separate words
677
- row += word; // append to this row
678
- }
659
+ // Just for fun, let's write this as an automaton
660
+ enum SplitWithinState {
661
+ A , // Leading whitespace, initial state
662
+ B , // Words
663
+ C , // Internal and trailing whitespace
679
664
}
665
+ enum Whitespace { Ws , Cr }
666
+ enum LengthLimit { UnderLim , OverLim }
680
667
681
- // save the last row
682
- if row != ~" " { rows. push ( row) ; }
668
+ let mut slice_start = 0 ;
669
+ let mut last_start = 0 ;
670
+ let mut last_end = 0 ;
671
+ let mut state = A ;
683
672
684
- rows
685
- // NOTE: Finish change here
673
+ let mut cont = true ;
674
+ let slice = || { cont = it ( ss . slice ( slice_start , last_end ) ) } ;
686
675
687
- let mut last_slice_i = 0 , last_word_i = 0 , word_start = true ;
688
- for each_chari( s) |i, c| {
689
- if ( i - last_slice_i) <= lim {
690
- if char:: is_whitespace ( c) {
676
+ let machine = |i : uint , c : char | {
677
+ let whitespace = if char:: is_whitespace ( c) { Ws } else { Cr } ;
678
+ let limit = if ( i - slice_start + 1 ) <= lim { UnderLim } else { OverLim } ;
691
679
692
- } else {
680
+ state = match ( state, whitespace, limit) {
681
+ ( A , Ws , _) => { A }
682
+ ( A , Cr , _) => { slice_start = i; last_start = i; B }
693
683
694
- }
695
- } else {
684
+ ( B , Cr , UnderLim ) => { B }
685
+ ( B , Cr , OverLim ) if ( i - last_start + 1 ) > lim
686
+ => { fail ! ( ~"word longer than limit!") }
687
+ (B, Cr, OverLim) => { slice(); slice_start = last_start; B }
688
+ (B, Ws, UnderLim) => { last_end = i; C }
689
+ (B, Ws, OverLim) => { last_end = i; slice(); A }
696
690
697
- }
691
+ (C, Cr, UnderLim) => { last_start = i; B }
692
+ (C, Cr, OverLim) => { slice(); slice_start = i; last_start = i; last_end = i; B }
693
+ (C, Ws, OverLim) => { slice(); A }
694
+ (C, Ws, UnderLim) => { C }
695
+ };
696
+ cont
697
+ };
698
698
699
+ str::each_chari(ss, machine);
699
700
701
+ // Let the automaton 'run out'
702
+ let mut fake_i = ss.len();
703
+ while cont && match state { B | C => true, A => false } {
704
+ machine(fake_i, ' ');
705
+ fake_i += 1;
700
706
}
701
707
}
702
708
703
-
704
-
705
709
/// Convert a string to lowercase. ASCII only
706
710
pub fn to_lower(s: &str) -> ~str {
707
711
map(s,
@@ -731,7 +735,7 @@ pub fn to_upper(s: &str) -> ~str {
731
735
*/
732
736
pub fn replace(s: &str, from: &str, to: &str) -> ~str {
733
737
let mut result = ~" ", first = true;
734
- do iter_between_matches ( s, from) |start, end| {
738
+ for iter_between_matches(s, from) |start, end| {
735
739
if first {
736
740
first = false;
737
741
} else {
@@ -2286,9 +2290,9 @@ pub trait StrSlice {
2286
2290
fn len(&self) -> uint;
2287
2291
fn char_len(&self) -> uint;
2288
2292
fn slice(&self, begin: uint, end: uint) -> &'self str;
2289
- fn split (&self, sepfn: &fn(char) -> bool) -> ~[~str] ;
2290
- fn split_char (&self, sep: char) -> ~[~str] ;
2291
- fn split_str (&self, sep: &'a str) -> ~[~str] ;
2293
+ fn each_split (&self, sepfn: &fn(char) -> bool, it: &fn(&str ) -> bool) ;
2294
+ fn each_split_char (&self, sep: char, it: &fn(&str ) -> bool) ;
2295
+ fn each_split_str (&self, sep: &'a str, it: &fn(&str ) -> bool) ;
2292
2296
fn starts_with(&self, needle: &'a str) -> bool;
2293
2297
fn substr(&self, begin: uint, n: uint) -> &'self str;
2294
2298
fn to_lower(&self) -> ~str;
@@ -2408,20 +2412,24 @@ impl StrSlice for &'self str {
2408
2412
}
2409
2413
/// Splits a string into substrings using a character function
2410
2414
#[inline]
2411
- fn split (&self, sepfn: &fn(char) -> bool) -> ~[~str] {
2412
- split (*self, sepfn)
2415
+ fn each_split (&self, sepfn: &fn(char) -> bool, it: &fn(&str ) -> bool) {
2416
+ each_split (*self, sepfn, it )
2413
2417
}
2414
2418
/**
2415
2419
* Splits a string into substrings at each occurrence of a given character
2416
2420
*/
2417
2421
#[inline]
2418
- fn split_char(&self, sep: char) -> ~[~str] { split_char(*self, sep) }
2422
+ fn each_split_char(&self, sep: char, it: &fn(&str) -> bool) {
2423
+ each_split_char(*self, sep, it)
2424
+ }
2419
2425
/**
2420
2426
* Splits a string into a vector of the substrings separated by a given
2421
2427
* string
2422
2428
*/
2423
2429
#[inline]
2424
- fn split_str(&self, sep: &'a str) -> ~[~str] { split_str(*self, sep) }
2430
+ fn each_split_str(&self, sep: &'a str, it: &fn(&str) -> bool) {
2431
+ each_split_str(*self, sep, it)
2432
+ }
2425
2433
/// Returns true if one string starts with another
2426
2434
#[inline]
2427
2435
fn starts_with(&self, needle: &'a str) -> bool {
0 commit comments