Skip to content

Commit a3c8f52

Browse files
committed
auto merge of #5823 : Kimundi/rust/str_from_bytes_slice, r=erickt
Added str::from_bytes_with_null Added str::subslice_offset Misc cleanup Closes #5422
2 parents 2f8b36f + 4ad76e6 commit a3c8f52

File tree

1 file changed

+147
-8
lines changed

1 file changed

+147
-8
lines changed

src/libcore/str.rs

+147-8
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ Section: Creating a string
3838
*/
3939

4040
/**
41-
* Convert a vector of bytes to a UTF-8 string
41+
* Convert a vector of bytes to a new UTF-8 string
4242
*
4343
* # Failure
4444
*
@@ -49,9 +49,26 @@ pub fn from_bytes(vv: &const [u8]) -> ~str {
4949
return unsafe { raw::from_bytes(vv) };
5050
}
5151

52+
/**
53+
* Convert a vector of bytes to a UTF-8 string.
54+
* The vector needs to be one byte longer than the string, and end with a 0 byte.
55+
*
56+
* Compared to `from_bytes()`, this fn doesn't need to allocate a new owned str.
57+
*
58+
* # Failure
59+
*
60+
* Fails if invalid UTF-8
61+
* Fails if not null terminated
62+
*/
63+
pub fn from_bytes_with_null<'a>(vv: &'a [u8]) -> &'a str {
64+
assert!(vv[vv.len() - 1] == 0);
65+
assert!(is_utf8(vv));
66+
return unsafe { raw::from_bytes_with_null(vv) };
67+
}
68+
5269
/// Copy a slice into a new unique str
5370
pub fn from_slice(s: &str) -> ~str {
54-
unsafe { raw::slice_bytes_unique(s, 0, len(s)) }
71+
unsafe { raw::slice_bytes_owned(s, 0, len(s)) }
5572
}
5673

5774
impl ToStr for ~str {
@@ -279,7 +296,7 @@ pub fn pop_char(s: &mut ~str) -> char {
279296
*/
280297
pub fn shift_char(s: &mut ~str) -> char {
281298
let CharRange {ch, next} = char_range_at(*s, 0u);
282-
*s = unsafe { raw::slice_bytes_unique(*s, next, len(*s)) };
299+
*s = unsafe { raw::slice_bytes_owned(*s, next, len(*s)) };
283300
return ch;
284301
}
285302

@@ -784,9 +801,9 @@ pub fn replace(s: &str, from: &str, to: &str) -> ~str {
784801
if first {
785802
first = false;
786803
} else {
787-
unsafe { push_str(&mut result, to); }
804+
push_str(&mut result, to);
788805
}
789-
unsafe { push_str(&mut result, raw::slice_bytes_unique(s, start, end)); }
806+
push_str(&mut result, unsafe{raw::slice_bytes(s, start, end)});
790807
}
791808
result
792809
}
@@ -2037,6 +2054,37 @@ pub fn as_buf<T>(s: &str, f: &fn(*u8, uint) -> T) -> T {
20372054
}
20382055
}
20392056

2057+
/**
2058+
* Returns the byte offset of an inner slice relative to an enclosing outer slice
2059+
*
2060+
* # Example
2061+
*
2062+
* ~~~
2063+
* let string = "a\nb\nc";
2064+
* let mut lines = ~[];
2065+
* for each_line(string) |line| { lines.push(line) }
2066+
*
2067+
* assert!(subslice_offset(string, lines[0]) == 0); // &"a"
2068+
* assert!(subslice_offset(string, lines[1]) == 2); // &"b"
2069+
* assert!(subslice_offset(string, lines[2]) == 4); // &"c"
2070+
* ~~~
2071+
*/
2072+
#[inline(always)]
2073+
pub fn subslice_offset(outer: &str, inner: &str) -> uint {
2074+
do as_buf(outer) |a, a_len| {
2075+
do as_buf(inner) |b, b_len| {
2076+
let a_start: uint, a_end: uint, b_start: uint, b_end: uint;
2077+
unsafe {
2078+
a_start = cast::transmute(a); a_end = a_len + cast::transmute(a);
2079+
b_start = cast::transmute(b); b_end = b_len + cast::transmute(b);
2080+
}
2081+
assert!(a_start <= b_start);
2082+
assert!(b_end <= a_end);
2083+
b_start - a_start
2084+
}
2085+
}
2086+
}
2087+
20402088
/**
20412089
* Reserves capacity for exactly `n` bytes in the given string, not including
20422090
* the null terminator.
@@ -2158,13 +2206,20 @@ pub mod raw {
21582206
from_buf_len(::cast::reinterpret_cast(&c_str), len)
21592207
}
21602208

2161-
/// Converts a vector of bytes to a string.
2209+
/// Converts a vector of bytes to a new owned string.
21622210
pub unsafe fn from_bytes(v: &const [u8]) -> ~str {
21632211
do vec::as_const_buf(v) |buf, len| {
21642212
from_buf_len(buf, len)
21652213
}
21662214
}
21672215

2216+
/// Converts a vector of bytes to a string.
2217+
/// The byte slice needs to contain valid utf8 and needs to be one byte longer than
2218+
/// the string, if possible ending in a 0 byte.
2219+
pub unsafe fn from_bytes_with_null<'a>(v: &'a [u8]) -> &'a str {
2220+
cast::transmute(v)
2221+
}
2222+
21682223
/// Converts a byte to a string.
21692224
pub unsafe fn from_byte(u: u8) -> ~str { raw::from_bytes([u]) }
21702225

@@ -2186,7 +2241,7 @@ pub mod raw {
21862241
* If begin is greater than end.
21872242
* If end is greater than the length of the string.
21882243
*/
2189-
pub unsafe fn slice_bytes_unique(s: &str, begin: uint, end: uint) -> ~str {
2244+
pub unsafe fn slice_bytes_owned(s: &str, begin: uint, end: uint) -> ~str {
21902245
do as_buf(s) |sbuf, n| {
21912246
assert!((begin <= end));
21922247
assert!((end <= n));
@@ -2258,7 +2313,7 @@ pub mod raw {
22582313
let len = len(*s);
22592314
assert!((len > 0u));
22602315
let b = s[0];
2261-
*s = unsafe { raw::slice_bytes_unique(*s, 1u, len) };
2316+
*s = unsafe { raw::slice_bytes_owned(*s, 1u, len) };
22622317
return b;
22632318
}
22642319

@@ -3289,6 +3344,66 @@ mod tests {
32893344
let _x = from_bytes(bb);
32903345
}
32913346
3347+
#[test]
3348+
fn test_unsafe_from_bytes_with_null() {
3349+
let a = [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
3350+
let b = unsafe { raw::from_bytes_with_null(a) };
3351+
assert_eq!(b, "AAAAAAA");
3352+
}
3353+
3354+
#[test]
3355+
fn test_from_bytes_with_null() {
3356+
let ss = "ศไทย中华Việt Nam";
3357+
let bb = [0xe0_u8, 0xb8_u8, 0xa8_u8,
3358+
0xe0_u8, 0xb9_u8, 0x84_u8,
3359+
0xe0_u8, 0xb8_u8, 0x97_u8,
3360+
0xe0_u8, 0xb8_u8, 0xa2_u8,
3361+
0xe4_u8, 0xb8_u8, 0xad_u8,
3362+
0xe5_u8, 0x8d_u8, 0x8e_u8,
3363+
0x56_u8, 0x69_u8, 0xe1_u8,
3364+
0xbb_u8, 0x87_u8, 0x74_u8,
3365+
0x20_u8, 0x4e_u8, 0x61_u8,
3366+
0x6d_u8, 0x0_u8];
3367+
3368+
assert_eq!(ss, from_bytes_with_null(bb));
3369+
}
3370+
3371+
#[test]
3372+
#[should_fail]
3373+
#[ignore(cfg(windows))]
3374+
fn test_from_bytes_with_null_fail() {
3375+
let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
3376+
0xe0_u8, 0xb9_u8, 0x84_u8,
3377+
0xe0_u8, 0xb8_u8, 0x97_u8,
3378+
0xe0_u8, 0xb8_u8, 0xa2_u8,
3379+
0xe4_u8, 0xb8_u8, 0xad_u8,
3380+
0xe5_u8, 0x8d_u8, 0x8e_u8,
3381+
0x56_u8, 0x69_u8, 0xe1_u8,
3382+
0xbb_u8, 0x87_u8, 0x74_u8,
3383+
0x20_u8, 0x4e_u8, 0x61_u8,
3384+
0x6d_u8, 0x0_u8];
3385+
3386+
let _x = from_bytes_with_null(bb);
3387+
}
3388+
3389+
#[test]
3390+
#[should_fail]
3391+
#[ignore(cfg(windows))]
3392+
fn test_from_bytes_with_null_fail_2() {
3393+
let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
3394+
0xe0_u8, 0xb9_u8, 0x84_u8,
3395+
0xe0_u8, 0xb8_u8, 0x97_u8,
3396+
0xe0_u8, 0xb8_u8, 0xa2_u8,
3397+
0xe4_u8, 0xb8_u8, 0xad_u8,
3398+
0xe5_u8, 0x8d_u8, 0x8e_u8,
3399+
0x56_u8, 0x69_u8, 0xe1_u8,
3400+
0xbb_u8, 0x87_u8, 0x74_u8,
3401+
0x20_u8, 0x4e_u8, 0x61_u8,
3402+
0x6d_u8, 0x60_u8];
3403+
3404+
let _x = from_bytes_with_null(bb);
3405+
}
3406+
32923407
#[test]
32933408
fn test_from_buf() {
32943409
unsafe {
@@ -3351,6 +3466,30 @@ mod tests {
33513466
}
33523467
}
33533468
3469+
#[test]
3470+
fn test_subslice_offset() {
3471+
let a = "kernelsprite";
3472+
let b = slice(a, 7, len(a));
3473+
let c = slice(a, 0, len(a) - 6);
3474+
assert!(subslice_offset(a, b) == 7);
3475+
assert!(subslice_offset(a, c) == 0);
3476+
3477+
let string = "a\nb\nc";
3478+
let mut lines = ~[];
3479+
for each_line(string) |line| { lines.push(line) }
3480+
assert!(subslice_offset(string, lines[0]) == 0);
3481+
assert!(subslice_offset(string, lines[1]) == 2);
3482+
assert!(subslice_offset(string, lines[2]) == 4);
3483+
}
3484+
3485+
#[test]
3486+
#[should_fail]
3487+
fn test_subslice_offset_2() {
3488+
let a = "alchemiter";
3489+
let b = "cruxtruder";
3490+
subslice_offset(a, b);
3491+
}
3492+
33543493
#[test]
33553494
fn vec_str_conversions() {
33563495
let s1: ~str = ~"All mimsy were the borogoves";

0 commit comments

Comments
 (0)