@@ -437,28 +437,37 @@ pub pure fn slice(s: &'a str, begin: uint, end: uint) -> &'a str {
437
437
unsafe { raw:: slice_bytes ( s, begin, end) }
438
438
}
439
439
440
- /// Splits a string into substrings at each occurrence of a given character
440
+ /// Splits a string into substrings at each occurrence of a given
441
+ /// character.
441
442
pub pure fn split_char ( s : & str , sep : char ) -> ~[ ~str ] {
442
- split_char_inner ( s, sep, len ( s) , true )
443
+ split_char_inner ( s, sep, len ( s) , true , true )
443
444
}
444
445
445
446
/**
446
447
* Splits a string into substrings at each occurrence of a given
447
- * character up to 'count' times
448
+ * character up to 'count' times.
448
449
*
449
450
* The byte must be a valid UTF-8/ASCII byte
450
451
*/
451
452
pub pure fn splitn_char ( s : & str , sep : char , count : uint ) -> ~[ ~str ] {
452
- split_char_inner ( s, sep, count, true )
453
+ split_char_inner ( s, sep, count, true , true )
453
454
}
454
455
455
456
/// Like `split_char`, but omits empty strings from the returned vector
456
457
pub pure fn split_char_nonempty ( s : & str , sep : char ) -> ~[ ~str ] {
457
- split_char_inner ( s, sep, len ( s) , false )
458
+ split_char_inner ( s, sep, len ( s) , false , false )
458
459
}
459
460
460
- pure fn split_char_inner ( s : & str , sep : char , count : uint , allow_empty : bool )
461
- -> ~[ ~str ] {
461
+ /**
462
+ * Like `split_char`, but a trailing empty string is omitted
463
+ * (e.g. `split_char_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
464
+ */
465
+ pub pure fn split_char_no_trailing ( s : & str , sep : char ) -> ~[ ~str ] {
466
+ split_char_inner ( s, sep, len ( s) , true , false )
467
+ }
468
+
469
+ pure fn split_char_inner ( s : & str , sep : char , count : uint , allow_empty : bool ,
470
+ allow_trailing_empty : bool ) -> ~[ ~str ] {
462
471
if sep < 128 u as char {
463
472
let b = sep as u8 , l = len ( s) ;
464
473
let mut result = ~[ ] , done = 0 u;
@@ -475,19 +484,20 @@ pure fn split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool)
475
484
}
476
485
i += 1 u;
477
486
}
478
- if allow_empty || start < l {
487
+ // only push a non-empty trailing substring
488
+ if allow_trailing_empty || start < l {
479
489
unsafe { result. push ( raw:: slice_bytes_unique ( s, start, l) ) } ;
480
490
}
481
491
result
482
492
} else {
483
- splitn ( s, |cur| cur == sep, count)
493
+ split_inner ( s, |cur| cur == sep, count, allow_empty , allow_trailing_empty )
484
494
}
485
495
}
486
496
487
497
488
498
/// Splits a string into substrings using a character function
489
499
pub pure fn split ( s : & str , sepfn : & fn ( char ) -> bool ) -> ~[ ~str ] {
490
- split_inner ( s, sepfn, len ( s) , true )
500
+ split_inner ( s, sepfn, len ( s) , true , true )
491
501
}
492
502
493
503
/**
@@ -498,16 +508,25 @@ pub pure fn splitn(s: &str,
498
508
sepfn : & fn ( char ) -> bool ,
499
509
count : uint )
500
510
-> ~[ ~str ] {
501
- split_inner ( s, sepfn, count, true )
511
+ split_inner ( s, sepfn, count, true , true )
502
512
}
503
513
504
514
/// Like `split`, but omits empty strings from the returned vector
505
515
pub pure fn split_nonempty ( s : & str , sepfn : & fn ( char ) -> bool ) -> ~[ ~str ] {
506
- split_inner ( s, sepfn, len ( s) , false )
516
+ split_inner ( s, sepfn, len ( s) , false , false )
517
+ }
518
+
519
+
520
+ /**
521
+ * Like `split`, but a trailing empty string is omitted
522
+ * (e.g. `split_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
523
+ */
524
+ pub pure fn split_no_trailing ( s : & str , sepfn : & fn ( char ) -> bool ) -> ~[ ~str ] {
525
+ split_inner ( s, sepfn, len ( s) , true , false )
507
526
}
508
527
509
528
pure fn split_inner ( s : & str , sepfn : & fn ( cc : char ) -> bool , count : uint ,
510
- allow_empty : bool ) -> ~[ ~str ] {
529
+ allow_empty : bool , allow_trailing_empty : bool ) -> ~[ ~str ] {
511
530
let l = len ( s) ;
512
531
let mut result = ~[ ] , i = 0 u, start = 0 u, done = 0 u;
513
532
while i < l && done < count {
@@ -523,7 +542,7 @@ pure fn split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint,
523
542
}
524
543
i = next;
525
544
}
526
- if allow_empty || start < l {
545
+ if allow_trailing_empty || start < l {
527
546
unsafe {
528
547
result. push ( raw:: slice_bytes_unique ( s, start, l) ) ;
529
548
}
@@ -630,9 +649,11 @@ pub fn levdistance(s: &str, t: &str) -> uint {
630
649
}
631
650
632
651
/**
633
- * Splits a string into a vector of the substrings separated by LF ('\n')
652
+ * Splits a string into a vector of the substrings separated by LF ('\n').
634
653
*/
635
- pub pure fn lines ( s : & str ) -> ~[ ~str ] { split_char ( s, '\n' ) }
654
+ pub pure fn lines ( s : & str ) -> ~[ ~str ] {
655
+ split_char_no_trailing ( s, '\n' )
656
+ }
636
657
637
658
/**
638
659
* Splits a string into a vector of the substrings separated by LF ('\n')
@@ -651,7 +672,7 @@ pub pure fn lines_any(s: &str) -> ~[~str] {
651
672
652
673
/// Splits a string into a vector of the substrings separated by whitespace
653
674
pub pure fn words ( s : & str ) -> ~[ ~str ] {
654
- split_nonempty ( s, |c| char:: is_whitespace ( c ) )
675
+ split_nonempty ( s, char:: is_whitespace)
655
676
}
656
677
657
678
/** Split a string into a vector of substrings,
@@ -2669,6 +2690,35 @@ mod tests {
2669
2690
2670
2691
}
2671
2692
2693
+ #[ test]
2694
+ fn test_split_char_no_trailing( ) {
2695
+ fn t( s: & str , c: char , u: & [ ~str ] ) {
2696
+ debug!( ~"split_byte: " + s) ;
2697
+ let v = split_char_no_trailing( s, c) ;
2698
+ debug!( "split_byte to: %?", v) ;
2699
+ fail_unless!( vec:: all2( v, u, |a, b| a == b) ) ;
2700
+ }
2701
+ t( ~"abc. hello. there", '.' , ~[ ~"abc", ~"hello", ~"there"] ) ;
2702
+ t( ~". hello. there", '.' , ~[ ~"", ~"hello", ~"there"] ) ;
2703
+ t( ~"...hello. there. ", '.' , ~[ ~"", ~"", ~"", ~"hello", ~"there"] ) ;
2704
+
2705
+ fail_unless!( ~[ ~"", ~"", ~"", ~"hello", ~"there"]
2706
+ == split_char_no_trailing( ~"...hello. there. ", '.' ) ) ;
2707
+
2708
+ fail_unless!( ~[ ] == split_char_no_trailing( ~"", 'z' ) ) ;
2709
+ fail_unless!( ~[ ~""] == split_char_no_trailing( ~"z", 'z' ) ) ;
2710
+ fail_unless!( ~[ ~"ok"] == split_char_no_trailing( ~"ok", 'z' ) ) ;
2711
+ }
2712
+
2713
+ #[ test]
2714
+ fn test_split_char_no_trailing_2( ) {
2715
+ let data = ~"ประเทศไทย中华Việt Nam ";
2716
+ fail_unless!( ~[ ~"ประเทศไทย中华", ~"iệt Nam "]
2717
+ == split_char_no_trailing( data, 'V' ) ) ;
2718
+ fail_unless!( ~[ ~"ประเ", ~"ศไ", ~"ย中华Việt Nam "]
2719
+ == split_char_no_trailing( data, 'ท' ) ) ;
2720
+ }
2721
+
2672
2722
#[ test]
2673
2723
fn test_split_str( ) {
2674
2724
fn t( s: & str , sep: & ' a str , i: int, k: & str ) {
@@ -2722,28 +2772,45 @@ mod tests {
2722
2772
fail_unless!( ~[ ~"ok"] == split( ~"ok", |cc| cc == 'z' ) ) ;
2723
2773
}
2724
2774
2775
+ #[ test]
2776
+ fn test_split_no_trailing( ) {
2777
+ let data = ~"ประเทศไทย中华Việt Nam ";
2778
+ fail_unless!( ~[ ~"ประเทศไทย中", ~"Việt Nam "]
2779
+ == split_no_trailing ( data, |cc| cc == '华' ) ) ;
2780
+
2781
+ fail_unless!( ~[ ~"", ~"", ~"XXX ", ~"YYY "]
2782
+ == split_no_trailing( ~"zzXXXzYYYz", char :: is_lowercase) ) ;
2783
+
2784
+ fail_unless!( ~[ ~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"]
2785
+ == split_no_trailing( ~"zzXXXzYYYz", char :: is_uppercase) ) ;
2786
+
2787
+ fail_unless!( ~[ ~""] == split_no_trailing( ~"z", |cc| cc == 'z' ) ) ;
2788
+ fail_unless!( ~[ ] == split_no_trailing( ~"", |cc| cc == 'z' ) ) ;
2789
+ fail_unless!( ~[ ~"ok"] == split_no_trailing( ~"ok", |cc| cc == 'z' ) ) ;
2790
+ }
2791
+
2725
2792
#[ test]
2726
2793
fn test_lines( ) {
2727
2794
let lf = ~"\n Mary had a little lamb\n Little lamb\n ";
2728
2795
let crlf = ~"\r \n Mary had a little lamb\r \n Little lamb\r \n ";
2729
2796
2730
- fail_unless!( ~[ ~"", ~"Mary had a little lamb", ~"Little lamb", ~"" ]
2797
+ fail_unless!( ~[ ~"", ~"Mary had a little lamb", ~"Little lamb"]
2731
2798
== lines( lf) ) ;
2732
2799
2733
- fail_unless!( ~[ ~"", ~"Mary had a little lamb", ~"Little lamb", ~"" ]
2800
+ fail_unless!( ~[ ~"", ~"Mary had a little lamb", ~"Little lamb"]
2734
2801
== lines_any( lf) ) ;
2735
2802
2736
2803
fail_unless!( ~[ ~"\r ", ~"Mary had a little lamb\r ",
2737
- ~"Little lamb\r ", ~"" ]
2804
+ ~"Little lamb\r "]
2738
2805
== lines( crlf) ) ;
2739
2806
2740
- fail_unless!( ~[ ~"", ~"Mary had a little lamb", ~"Little lamb", ~"" ]
2807
+ fail_unless!( ~[ ~"", ~"Mary had a little lamb", ~"Little lamb"]
2741
2808
== lines_any( crlf) ) ;
2742
2809
2743
- fail_unless!( ~[ ~"" ] == lines ( ~"") ) ;
2744
- fail_unless!( ~[ ~"" ] == lines_any( ~"") ) ;
2745
- fail_unless!( ~[ ~"", ~"" ] == lines ( ~"\n ") ) ;
2746
- fail_unless!( ~[ ~"", ~"" ] == lines_any( ~"\n ") ) ;
2810
+ fail_unless!( ~[ ] == lines ( ~"") ) ;
2811
+ fail_unless!( ~[ ] == lines_any( ~"") ) ;
2812
+ fail_unless!( ~[ ~""] == lines ( ~"\n ") ) ;
2813
+ fail_unless!( ~[ ~""] == lines_any( ~"\n ") ) ;
2747
2814
fail_unless!( ~[ ~"banana"] == lines ( ~"banana") ) ;
2748
2815
fail_unless!( ~[ ~"banana"] == lines_any( ~"banana") ) ;
2749
2816
}
@@ -3359,7 +3426,6 @@ mod tests {
3359
3426
0 => fail_unless!( "" == x) ,
3360
3427
1 => fail_unless!( "Mary had a little lamb" == x) ,
3361
3428
2 => fail_unless!( "Little lamb" == x) ,
3362
- 3 => fail_unless!( "" == x) ,
3363
3429
_ => ( )
3364
3430
}
3365
3431
ii += 1 ;
0 commit comments