Skip to content

Commit efdc846

Browse files
committed
auto merge of #14613 : schmee/rust/utf16-iterator, r=huonw
Closes #14358. The tests are not yet moved to `utf16_iter`, so this probably won't compile. I'm submitting this PR anyway so it can be reviewed and since it was mentioned in #14611. This deprecates `.to_utf16`. `x.to_utf16()` should be replaced by either `x.utf16_iter().collect::<Vec<u16>>()` (the type annotation may be optional), or just `x.utf16_iter()` directly, if it can be used in an iterator context. [breaking-change] cc @huonw
2 parents feb294c + f48693c commit efdc846

File tree

3 files changed

+53
-13
lines changed

3 files changed

+53
-13
lines changed

src/libcollections/str.rs

+9-12
Original file line numberDiff line numberDiff line change
@@ -803,15 +803,9 @@ pub trait StrAllocating: Str {
803803
}
804804

805805
/// Converts to a vector of `u16` encoded as UTF-16.
806+
#[deprecated = "use `utf16_iter` instead"]
806807
fn to_utf16(&self) -> Vec<u16> {
807-
let me = self.as_slice();
808-
let mut u = Vec::new();
809-
for ch in me.chars() {
810-
let mut buf = [0u16, ..2];
811-
let n = ch.encode_utf16(buf /* as mut slice! */);
812-
u.push_all(buf.slice_to(n));
813-
}
814-
u
808+
self.as_slice().utf16_iter().collect::<Vec<u16>>()
815809
}
816810

817811
/// Given a string, make a new string with repeated copies of it.
@@ -1619,14 +1613,17 @@ mod tests {
16191613

16201614
for p in pairs.iter() {
16211615
let (s, u) = (*p).clone();
1616+
let s_as_utf16 = s.as_slice().utf16_iter().collect::<Vec<u16>>();
1617+
let u_as_string = from_utf16(u.as_slice()).unwrap();
1618+
16221619
assert!(is_utf16(u.as_slice()));
1623-
assert_eq!(s.to_utf16(), u);
1620+
assert_eq!(s_as_utf16, u);
16241621

1625-
assert_eq!(from_utf16(u.as_slice()).unwrap(), s);
1622+
assert_eq!(u_as_string, s);
16261623
assert_eq!(from_utf16_lossy(u.as_slice()), s);
16271624

1628-
assert_eq!(from_utf16(s.to_utf16().as_slice()).unwrap(), s);
1629-
assert_eq!(from_utf16(u.as_slice()).unwrap().to_utf16(), u);
1625+
assert_eq!(from_utf16(s_as_utf16.as_slice()).unwrap(), s);
1626+
assert_eq!(u_as_string.as_slice().utf16_iter().collect::<Vec<u16>>(), u);
16301627
}
16311628
}
16321629

src/libcore/str.rs

+43
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
use mem;
1818
use char;
19+
use char::Char;
1920
use clone::Clone;
2021
use cmp;
2122
use cmp::{PartialEq, Eq};
@@ -554,6 +555,40 @@ impl<'a> Iterator<&'a str> for StrSplits<'a> {
554555
}
555556
}
556557

558+
/// External iterator for a string's UTF16 codeunits.
559+
/// Use with the `std::iter` module.
560+
#[deriving(Clone)]
561+
pub struct Utf16CodeUnits<'a> {
562+
chars: Chars<'a>,
563+
extra: u16
564+
}
565+
566+
impl<'a> Iterator<u16> for Utf16CodeUnits<'a> {
567+
#[inline]
568+
fn next(&mut self) -> Option<u16> {
569+
if self.extra != 0 {
570+
let tmp = self.extra;
571+
self.extra = 0;
572+
return Some(tmp);
573+
}
574+
575+
let mut buf = [0u16, ..2];
576+
self.chars.next().map(|ch| {
577+
let n = ch.encode_utf16(buf /* as mut slice! */);
578+
if n == 2 { self.extra = buf[1]; }
579+
buf[0]
580+
})
581+
}
582+
583+
#[inline]
584+
fn size_hint(&self) -> (uint, Option<uint>) {
585+
let (low, high) = self.chars.size_hint();
586+
// we could be entirely valid surrogates (2 elements per
587+
// char), or entirely non-surrogates (1 element per char)
588+
(low / 2, high)
589+
}
590+
}
591+
557592
/*
558593
Section: Comparing strings
559594
*/
@@ -1612,6 +1647,9 @@ pub trait StrSlice<'a> {
16121647
/// and that it is not reallocated (e.g. by pushing to the
16131648
/// string).
16141649
fn as_ptr(&self) -> *u8;
1650+
1651+
/// Return an iterator of `u16` over the string encoded as UTF-16.
1652+
fn utf16_iter(&self) -> Utf16CodeUnits<'a>;
16151653
}
16161654

16171655
impl<'a> StrSlice<'a> for &'a str {
@@ -1960,6 +1998,11 @@ impl<'a> StrSlice<'a> for &'a str {
19601998
fn as_ptr(&self) -> *u8 {
19611999
self.repr().data
19622000
}
2001+
2002+
#[inline]
2003+
fn utf16_iter(&self) -> Utf16CodeUnits<'a> {
2004+
Utf16CodeUnits{ chars: self.chars(), extra: 0}
2005+
}
19632006
}
19642007

19652008
impl<'a> Default for &'a str {

src/libnative/io/process.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -763,7 +763,7 @@ fn with_envp<T>(env: Option<&[(CString, CString)]>, cb: |*mut c_void| -> T) -> T
763763
let kv = format!("{}={}",
764764
pair.ref0().as_str().unwrap(),
765765
pair.ref1().as_str().unwrap());
766-
blk.push_all(kv.to_utf16().as_slice());
766+
blk.extend(kv.utf16_iter());
767767
blk.push(0);
768768
}
769769

0 commit comments

Comments
 (0)