Skip to content

Commit b9de2b5

Browse files
committed
Switched over a bunch of splitting funktions to non-allocating iterators
1 parent d74606e commit b9de2b5

File tree

13 files changed

+135
-98
lines changed

13 files changed

+135
-98
lines changed

src/libcore/num/strconv.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,10 @@ impl_NumStrConv_Integer!(u64)
132132

133133

134134
// Special value strings as [u8] consts.
135-
const inf_buf: [u8*3] = ['i' as u8, 'n' as u8, 'f' as u8];
136-
const positive_inf_buf: [u8*4] = ['+' as u8, 'i' as u8, 'n' as u8, 'f' as u8];
137-
const negative_inf_buf: [u8*4] = ['-' as u8, 'i' as u8, 'n' as u8, 'f' as u8];
138-
const nan_buf: [u8*3] = ['N' as u8, 'a' as u8, 'N' as u8];
135+
static inf_buf: [u8*3] = ['i' as u8, 'n' as u8, 'f' as u8];
136+
static positive_inf_buf: [u8*4] = ['+' as u8, 'i' as u8, 'n' as u8, 'f' as u8];
137+
static negative_inf_buf: [u8*4] = ['-' as u8, 'i' as u8, 'n' as u8, 'f' as u8];
138+
static nan_buf: [u8*3] = ['N' as u8, 'a' as u8, 'N' as u8];
139139

140140
/**
141141
* Converts a number to its string representation as a byte vector.

src/libcore/os.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,8 @@ pub fn env() -> ~[(~str,~str)] {
218218
fn env_convert(input: ~[~str]) -> ~[(~str, ~str)] {
219219
let mut pairs = ~[];
220220
for input.each |p| {
221-
let vs = str::splitn_char(*p, '=', 1);
221+
let mut vs = ~[];
222+
for str::each_splitn_char(*p, '=', 1) |s| { vs.push(s.to_owned()) }
222223
debug!("splitting: len: %u",
223224
vs.len());
224225
fail_unless!(vs.len() == 2);

src/libcore/path.rs

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,8 @@ impl ToStr for PosixPath {
381381
impl GenericPath for PosixPath {
382382

383383
fn from_str(s: &str) -> PosixPath {
384-
let mut components = str::split_nonempty(s, |c| c == '/');
384+
let mut components = ~[];
385+
for str::each_split_nonempty(s, |c| c == '/') |s| { components.push(s.to_owned()) }
385386
let is_absolute = (s.len() != 0 && s[0] == '/' as u8);
386387
return PosixPath { is_absolute: is_absolute,
387388
components: components }
@@ -504,9 +505,10 @@ impl GenericPath for PosixPath {
504505
fn push_many(&self, cs: &[~str]) -> PosixPath {
505506
let mut v = copy self.components;
506507
for cs.each |e| {
507-
let mut ss = str::split_nonempty(
508-
*e,
509-
|c| windows::is_sep(c as u8));
508+
let mut ss = ~[];
509+
for str::each_split_nonempty(*e, |c| windows::is_sep(c as u8)) |s| {
510+
ss.push(s.to_owned())
511+
}
510512
unsafe { v.push_all_move(ss); }
511513
}
512514
PosixPath { is_absolute: self.is_absolute,
@@ -515,7 +517,10 @@ impl GenericPath for PosixPath {
515517

516518
fn push(&self, s: &str) -> PosixPath {
517519
let mut v = copy self.components;
518-
let mut ss = str::split_nonempty(s, |c| windows::is_sep(c as u8));
520+
let mut ss = ~[];
521+
for str::each_split_nonempty(s, |c| windows::is_sep(c as u8)) |s| {
522+
ss.push(s.to_owned())
523+
}
519524
unsafe { v.push_all_move(ss); }
520525
PosixPath { components: v, ..copy *self }
521526
}
@@ -590,8 +595,10 @@ impl GenericPath for WindowsPath {
590595
}
591596
}
592597

593-
let mut components =
594-
str::split_nonempty(rest, |c| windows::is_sep(c as u8));
598+
let mut components = ~[];
599+
for str::each_split_nonempty(rest, |c| windows::is_sep(c as u8)) |s| {
600+
components.push(s.to_owned())
601+
}
595602
let is_absolute = (rest.len() != 0 && windows::is_sep(rest[0]));
596603
return WindowsPath { host: host,
597604
device: device,
@@ -759,9 +766,10 @@ impl GenericPath for WindowsPath {
759766
fn push_many(&self, cs: &[~str]) -> WindowsPath {
760767
let mut v = copy self.components;
761768
for cs.each |e| {
762-
let mut ss = str::split_nonempty(
763-
*e,
764-
|c| windows::is_sep(c as u8));
769+
let mut ss = ~[];
770+
for str::each_split_nonempty(*e, |c| windows::is_sep(c as u8)) |s| {
771+
ss.push(s.to_owned())
772+
}
765773
unsafe { v.push_all_move(ss); }
766774
}
767775
// tedious, but as-is, we can't use ..self
@@ -775,7 +783,10 @@ impl GenericPath for WindowsPath {
775783

776784
fn push(&self, s: &str) -> WindowsPath {
777785
let mut v = copy self.components;
778-
let mut ss = str::split_nonempty(s, |c| windows::is_sep(c as u8));
786+
let mut ss = ~[];
787+
for str::each_split_nonempty(s, |c| windows::is_sep(c as u8)) |s| {
788+
ss.push(s.to_owned())
789+
}
779790
unsafe { v.push_all_move(ss); }
780791
return WindowsPath { components: v, ..copy *self }
781792
}

src/libcore/rand.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,9 @@ impl RngUtil for @Rng {
327327
*/
328328
fn gen_char_from(&self, chars: &str) -> char {
329329
fail_unless!(!chars.is_empty());
330-
self.choose(str::chars(chars))
330+
let mut cs = ~[];
331+
for str::each_char(chars) |c| { cs.push(c) }
332+
self.choose(cs)
331333
}
332334

333335
/// Return a random bool

src/libcore/str.rs

Lines changed: 61 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ pub fn each_split_char_nonempty(s: &str, sep: char, it: &fn(&str) -> bool) {
463463
}
464464

465465
fn each_split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool,
466-
allow_trailing_empty: bool), it: &fn(&str) -> bool) {
466+
allow_trailing_empty: bool, it: &fn(&str) -> bool) {
467467
if sep < 128u as char {
468468
let b = sep as u8, l = len(s);
469469
let mut done = 0u;
@@ -513,8 +513,8 @@ pub fn each_split_nonempty(s: &str, sepfn: &fn(char) -> bool, it: &fn(&str) -> b
513513
each_split_inner(s, sepfn, len(s), false, false, it)
514514
}
515515

516-
pure fn each_split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint,
517-
allow_empty: bool, allow_trailing_empty: bool), it: &fn(&str) -> bool) {
516+
fn each_split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint,
517+
allow_empty: bool, allow_trailing_empty: bool, it: &fn(&str) -> bool) {
518518
let l = len(s);
519519
let mut i = 0u, start = 0u, done = 0u;
520520
while i < l && done < count {
@@ -534,7 +534,7 @@ pure fn each_split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint,
534534
}
535535

536536
// See Issue #1932 for why this is a naive search
537-
fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
537+
fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint) -> bool) {
538538
let sep_len = len(sep), l = len(s);
539539
fail_unless!(sep_len > 0u);
540540
let mut i = 0u, match_start = 0u, match_i = 0u;
@@ -545,7 +545,7 @@ fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
545545
match_i += 1u;
546546
// Found a match
547547
if match_i == sep_len {
548-
f(match_start, i + 1u);
548+
if !f(match_start, i + 1u) { return; }
549549
match_i = 0u;
550550
}
551551
i += 1u;
@@ -561,10 +561,10 @@ fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
561561
}
562562
}
563563

564-
fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
564+
fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint) -> bool) {
565565
let mut last_end = 0u;
566-
do iter_matches(s, sep) |from, to| {
567-
f(last_end, from);
566+
for iter_matches(s, sep) |from, to| {
567+
if !f(last_end, from) { return; }
568568
last_end = to;
569569
}
570570
f(last_end, len(s));
@@ -580,13 +580,13 @@ fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
580580
* ~~~
581581
*/
582582
pub fn each_split_str(s: &'a str, sep: &'b str, it: &fn(&str) -> bool) {
583-
do iter_between_matches(s, sep) |from, to| {
583+
for iter_between_matches(s, sep) |from, to| {
584584
if !it( unsafe { raw::slice_bytes(s, from, to) } ) { return; }
585585
}
586586
}
587587

588588
pub fn each_split_str_nonempty(s: &'a str, sep: &'b str, it: &fn(&str) -> bool) {
589-
do iter_between_matches(s, sep) |from, to| {
589+
for iter_between_matches(s, sep) |from, to| {
590590
if to > from {
591591
if !it( unsafe { raw::slice_bytes(s, from, to) } ) { return; }
592592
}
@@ -630,7 +630,7 @@ pub fn levdistance(s: &str, t: &str) -> uint {
630630
/**
631631
* Splits a string into a vector of the substrings separated by LF ('\n').
632632
*/
633-
pub fn each_line(s: &str, it: &fn(&str) -> bool) { each_split_char(s, '\n', it) }
633+
pub fn each_line(s: &str, it: &fn(&str) -> bool) { each_split_char_no_trailing(s, '\n', it) }
634634

635635
/**
636636
* Splits a string into a vector of the substrings separated by LF ('\n')
@@ -656,52 +656,56 @@ pub fn each_word(s: &str, it: &fn(&str) -> bool) {
656656
* each of which is less bytes long than a limit
657657
*/
658658
pub fn each_split_within(ss: &str, lim: uint, it: &fn(&str) -> bool) {
659-
let words = str::words(ss);
660-
661-
// empty?
662-
if words == ~[] { return ~[]; }
663-
664-
let mut rows : ~[~str] = ~[];
665-
let mut row : ~str = ~"";
666-
667-
for words.each |wptr| {
668-
let word = copy *wptr;
669-
670-
// if adding this word to the row would go over the limit,
671-
// then start a new row
672-
if row.len() + word.len() + 1 > lim {
673-
rows.push(copy row); // save previous row
674-
row = word; // start a new one
675-
} else {
676-
if row.len() > 0 { row += ~" " } // separate words
677-
row += word; // append to this row
678-
}
659+
// Just for fun, let's write this as an automaton
660+
enum SplitWithinState {
661+
A, // Leading whitespace, initial state
662+
B, // Words
663+
C, // Internal and trailing whitespace
679664
}
665+
enum Whitespace { Ws, Cr }
666+
enum LengthLimit { UnderLim, OverLim }
680667

681-
// save the last row
682-
if row != ~"" { rows.push(row); }
668+
let mut slice_start = 0;
669+
let mut last_start = 0;
670+
let mut last_end = 0;
671+
let mut state = A;
683672

684-
rows
685-
// NOTE: Finish change here
673+
let mut cont = true;
674+
let slice = || { cont = it(ss.slice(slice_start, last_end)) };
686675

687-
let mut last_slice_i = 0, last_word_i = 0, word_start = true;
688-
for each_chari(s) |i, c| {
689-
if (i - last_slice_i) <= lim {
690-
if char::is_whitespace(c) {
676+
let machine = |i: uint, c: char| {
677+
let whitespace = if char::is_whitespace(c) { Ws } else { Cr };
678+
let limit = if (i - slice_start + 1) <= lim { UnderLim } else { OverLim };
691679

692-
} else {
680+
state = match (state, whitespace, limit) {
681+
(A, Ws, _) => { A }
682+
(A, Cr, _) => { slice_start = i; last_start = i; B }
693683

694-
}
695-
} else {
684+
(B, Cr, UnderLim) => { B }
685+
(B, Cr, OverLim) if (i - last_start + 1) > lim
686+
=> { fail!(~"word longer than limit!") }
687+
(B, Cr, OverLim) => { slice(); slice_start = last_start; B }
688+
(B, Ws, UnderLim) => { last_end = i; C }
689+
(B, Ws, OverLim) => { last_end = i; slice(); A }
696690
697-
}
691+
(C, Cr, UnderLim) => { last_start = i; B }
692+
(C, Cr, OverLim) => { slice(); slice_start = i; last_start = i; last_end = i; B }
693+
(C, Ws, OverLim) => { slice(); A }
694+
(C, Ws, UnderLim) => { C }
695+
};
696+
cont
697+
};
698698
699+
str::each_chari(ss, machine);
699700
701+
// Let the automaton 'run out'
702+
let mut fake_i = ss.len();
703+
while cont && match state { B | C => true, A => false } {
704+
machine(fake_i, ' ');
705+
fake_i += 1;
700706
}
701707
}
702708
703-
704-
705709
/// Convert a string to lowercase. ASCII only
706710
pub fn to_lower(s: &str) -> ~str {
707711
map(s,
@@ -731,7 +735,7 @@ pub fn to_upper(s: &str) -> ~str {
731735
*/
732736
pub fn replace(s: &str, from: &str, to: &str) -> ~str {
733737
let mut result = ~"", first = true;
734-
do iter_between_matches(s, from) |start, end| {
738+
for iter_between_matches(s, from) |start, end| {
735739
if first {
736740
first = false;
737741
} else {
@@ -2286,9 +2290,9 @@ pub trait StrSlice {
22862290
fn len(&self) -> uint;
22872291
fn char_len(&self) -> uint;
22882292
fn slice(&self, begin: uint, end: uint) -> &'self str;
2289-
fn split(&self, sepfn: &fn(char) -> bool) -> ~[~str];
2290-
fn split_char(&self, sep: char) -> ~[~str];
2291-
fn split_str(&self, sep: &'a str) -> ~[~str];
2293+
fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&str) -> bool);
2294+
fn each_split_char(&self, sep: char, it: &fn(&str) -> bool);
2295+
fn each_split_str(&self, sep: &'a str, it: &fn(&str) -> bool);
22922296
fn starts_with(&self, needle: &'a str) -> bool;
22932297
fn substr(&self, begin: uint, n: uint) -> &'self str;
22942298
fn to_lower(&self) -> ~str;
@@ -2408,20 +2412,24 @@ impl StrSlice for &'self str {
24082412
}
24092413
/// Splits a string into substrings using a character function
24102414
#[inline]
2411-
fn split(&self, sepfn: &fn(char) -> bool) -> ~[~str] {
2412-
split(*self, sepfn)
2415+
fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&str) -> bool) {
2416+
each_split(*self, sepfn, it)
24132417
}
24142418
/**
24152419
* Splits a string into substrings at each occurrence of a given character
24162420
*/
24172421
#[inline]
2418-
fn split_char(&self, sep: char) -> ~[~str] { split_char(*self, sep) }
2422+
fn each_split_char(&self, sep: char, it: &fn(&str) -> bool) {
2423+
each_split_char(*self, sep, it)
2424+
}
24192425
/**
24202426
* Splits a string into a vector of the substrings separated by a given
24212427
* string
24222428
*/
24232429
#[inline]
2424-
fn split_str(&self, sep: &'a str) -> ~[~str] { split_str(*self, sep) }
2430+
fn each_split_str(&self, sep: &'a str, it: &fn(&str) -> bool) {
2431+
each_split_str(*self, sep, it)
2432+
}
24252433
/// Returns true if one string starts with another
24262434
#[inline]
24272435
fn starts_with(&self, needle: &'a str) -> bool {

src/librustc/metadata/cstore.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,9 @@ pub fn get_used_libraries(cstore: @mut CStore) -> ~[~str] {
120120
}
121121

122122
pub fn add_used_link_args(cstore: @mut CStore, args: &str) {
123-
cstore.used_link_args.push_all(args.split_char(' '));
123+
for args.each_split_char(' ') |s| {
124+
cstore.used_link_args.push(s.to_owned());
125+
}
124126
}
125127

126128
pub fn get_used_link_args(cstore: @mut CStore) -> ~[~str] {

src/librustc/middle/resolve.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ use syntax::visit::{visit_mod, visit_ty, vt};
7676
use syntax::opt_vec::OptVec;
7777

7878
use core::option::{Some, get, is_some, is_none};
79-
use core::str::{connect, split_str};
79+
use core::str::{connect, each_split_str};
8080
use core::hashmap::linear::LinearMap;
8181
use std::oldmap::HashMap;
8282

@@ -1696,7 +1696,8 @@ pub impl Resolver {
16961696
entry: %s (%?)",
16971697
path_string, def_like);
16981698

1699-
let mut pieces = split_str(path_string, ~"::");
1699+
let mut pieces = ~[];
1700+
for each_split_str(path_string, "::") |s| { pieces.push(s.to_owned()) }
17001701
let final_ident_str = pieces.pop();
17011702
let final_ident = self.session.ident_of(final_ident_str);
17021703

0 commit comments

Comments
 (0)