@@ -38,7 +38,7 @@ Section: Creating a string
38
38
*/
39
39
40
40
/**
41
- * Convert a vector of bytes to a UTF-8 string
41
+ * Convert a vector of bytes to a new UTF-8 string
42
42
*
43
43
* # Failure
44
44
*
@@ -49,9 +49,26 @@ pub fn from_bytes(vv: &const [u8]) -> ~str {
49
49
return unsafe { raw:: from_bytes ( vv) } ;
50
50
}
51
51
52
+ /**
53
+ * Convert a vector of bytes to a UTF-8 string.
54
+ * The vector needs to be one byte longer than the string, and end with a 0 byte.
55
+ *
56
+ * Compared to `from_bytes()`, this fn doesn't need to allocate a new owned str.
57
+ *
58
+ * # Failure
59
+ *
60
+ * Fails if invalid UTF-8
61
+ * Fails if not null terminated
62
+ */
63
+ pub fn from_bytes_with_null < ' a > ( vv : & ' a [ u8 ] ) -> & ' a str {
64
+ assert ! ( vv[ vv. len( ) - 1 ] == 0 ) ;
65
+ assert ! ( is_utf8( vv) ) ;
66
+ return unsafe { raw:: from_bytes_with_null ( vv) } ;
67
+ }
68
+
52
69
/// Copy a slice into a new unique str
53
70
pub fn from_slice ( s : & str ) -> ~str {
54
- unsafe { raw:: slice_bytes_unique ( s, 0 , len ( s) ) }
71
+ unsafe { raw:: slice_bytes_owned ( s, 0 , len ( s) ) }
55
72
}
56
73
57
74
impl ToStr for ~str {
@@ -279,7 +296,7 @@ pub fn pop_char(s: &mut ~str) -> char {
279
296
*/
280
297
pub fn shift_char ( s : & mut ~str ) -> char {
281
298
let CharRange { ch, next} = char_range_at ( * s, 0 u) ;
282
- * s = unsafe { raw:: slice_bytes_unique ( * s, next, len ( * s) ) } ;
299
+ * s = unsafe { raw:: slice_bytes_owned ( * s, next, len ( * s) ) } ;
283
300
return ch;
284
301
}
285
302
@@ -784,9 +801,9 @@ pub fn replace(s: &str, from: &str, to: &str) -> ~str {
784
801
if first {
785
802
first = false ;
786
803
} else {
787
- unsafe { push_str ( & mut result, to) ; }
804
+ push_str ( & mut result, to) ;
788
805
}
789
- unsafe { push_str ( & mut result, raw:: slice_bytes_unique ( s, start, end) ) ; }
806
+ push_str ( & mut result, unsafe { raw:: slice_bytes ( s, start, end) } ) ;
790
807
}
791
808
result
792
809
}
@@ -2037,6 +2054,37 @@ pub fn as_buf<T>(s: &str, f: &fn(*u8, uint) -> T) -> T {
2037
2054
}
2038
2055
}
2039
2056
2057
+ /**
2058
+ * Returns the byte offset of an inner slice relative to an enclosing outer slice
2059
+ *
2060
+ * # Example
2061
+ *
2062
+ * ~~~
2063
+ * let string = "a\nb\nc";
2064
+ * let mut lines = ~[];
2065
+ * for each_line(string) |line| { lines.push(line) }
2066
+ *
2067
+ * assert!(subslice_offset(string, lines[0]) == 0); // &"a"
2068
+ * assert!(subslice_offset(string, lines[1]) == 2); // &"b"
2069
+ * assert!(subslice_offset(string, lines[2]) == 4); // &"c"
2070
+ * ~~~
2071
+ */
2072
+ #[ inline( always) ]
2073
+ pub fn subslice_offset ( outer : & str , inner : & str ) -> uint {
2074
+ do as_buf ( outer) |a, a_len| {
2075
+ do as_buf ( inner) |b, b_len| {
2076
+ let a_start: uint , a_end : uint , b_start : uint , b_end : uint ;
2077
+ unsafe {
2078
+ a_start = cast:: transmute ( a) ; a_end = a_len + cast:: transmute ( a) ;
2079
+ b_start = cast:: transmute ( b) ; b_end = b_len + cast:: transmute ( b) ;
2080
+ }
2081
+ assert ! ( a_start <= b_start) ;
2082
+ assert ! ( b_end <= a_end) ;
2083
+ b_start - a_start
2084
+ }
2085
+ }
2086
+ }
2087
+
2040
2088
/**
2041
2089
* Reserves capacity for exactly `n` bytes in the given string, not including
2042
2090
* the null terminator.
@@ -2158,13 +2206,20 @@ pub mod raw {
2158
2206
from_buf_len ( :: cast:: reinterpret_cast ( & c_str) , len)
2159
2207
}
2160
2208
2161
- /// Converts a vector of bytes to a string.
2209
+ /// Converts a vector of bytes to a new owned string.
2162
2210
pub unsafe fn from_bytes ( v : & const [ u8 ] ) -> ~str {
2163
2211
do vec:: as_const_buf ( v) |buf, len| {
2164
2212
from_buf_len ( buf, len)
2165
2213
}
2166
2214
}
2167
2215
2216
+ /// Converts a vector of bytes to a string.
2217
+ /// The byte slice needs to contain valid utf8 and needs to be one byte longer than
2218
+ /// the string, if possible ending in a 0 byte.
2219
+ pub unsafe fn from_bytes_with_null < ' a > ( v : & ' a [ u8 ] ) -> & ' a str {
2220
+ cast:: transmute ( v)
2221
+ }
2222
+
2168
2223
/// Converts a byte to a string.
2169
2224
pub unsafe fn from_byte ( u : u8 ) -> ~str { raw:: from_bytes ( [ u] ) }
2170
2225
@@ -2186,7 +2241,7 @@ pub mod raw {
2186
2241
* If begin is greater than end.
2187
2242
* If end is greater than the length of the string.
2188
2243
*/
2189
- pub unsafe fn slice_bytes_unique ( s : & str , begin : uint , end : uint ) -> ~str {
2244
+ pub unsafe fn slice_bytes_owned ( s : & str , begin : uint , end : uint ) -> ~str {
2190
2245
do as_buf ( s) |sbuf, n| {
2191
2246
assert ! ( ( begin <= end) ) ;
2192
2247
assert ! ( ( end <= n) ) ;
@@ -2258,7 +2313,7 @@ pub mod raw {
2258
2313
let len = len ( * s) ;
2259
2314
assert ! ( ( len > 0 u) ) ;
2260
2315
let b = s[ 0 ] ;
2261
- * s = unsafe { raw:: slice_bytes_unique ( * s, 1 u, len) } ;
2316
+ * s = unsafe { raw:: slice_bytes_owned ( * s, 1 u, len) } ;
2262
2317
return b;
2263
2318
}
2264
2319
@@ -3289,6 +3344,66 @@ mod tests {
3289
3344
let _x = from_bytes(bb);
3290
3345
}
3291
3346
3347
+ #[test]
3348
+ fn test_unsafe_from_bytes_with_null() {
3349
+ let a = [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
3350
+ let b = unsafe { raw::from_bytes_with_null(a) };
3351
+ assert_eq!(b, " AAAAAAA ");
3352
+ }
3353
+
3354
+ #[test]
3355
+ fn test_from_bytes_with_null() {
3356
+ let ss = " ศไทย中华Việt Nam ";
3357
+ let bb = [0xe0_u8, 0xb8_u8, 0xa8_u8,
3358
+ 0xe0_u8, 0xb9_u8, 0x84_u8,
3359
+ 0xe0_u8, 0xb8_u8, 0x97_u8,
3360
+ 0xe0_u8, 0xb8_u8, 0xa2_u8,
3361
+ 0xe4_u8, 0xb8_u8, 0xad_u8,
3362
+ 0xe5_u8, 0x8d_u8, 0x8e_u8,
3363
+ 0x56_u8, 0x69_u8, 0xe1_u8,
3364
+ 0xbb_u8, 0x87_u8, 0x74_u8,
3365
+ 0x20_u8, 0x4e_u8, 0x61_u8,
3366
+ 0x6d_u8, 0x0_u8];
3367
+
3368
+ assert_eq!(ss, from_bytes_with_null(bb));
3369
+ }
3370
+
3371
+ #[test]
3372
+ #[should_fail]
3373
+ #[ignore(cfg(windows))]
3374
+ fn test_from_bytes_with_null_fail() {
3375
+ let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
3376
+ 0xe0_u8, 0xb9_u8, 0x84_u8,
3377
+ 0xe0_u8, 0xb8_u8, 0x97_u8,
3378
+ 0xe0_u8, 0xb8_u8, 0xa2_u8,
3379
+ 0xe4_u8, 0xb8_u8, 0xad_u8,
3380
+ 0xe5_u8, 0x8d_u8, 0x8e_u8,
3381
+ 0x56_u8, 0x69_u8, 0xe1_u8,
3382
+ 0xbb_u8, 0x87_u8, 0x74_u8,
3383
+ 0x20_u8, 0x4e_u8, 0x61_u8,
3384
+ 0x6d_u8, 0x0_u8];
3385
+
3386
+ let _x = from_bytes_with_null(bb);
3387
+ }
3388
+
3389
+ #[test]
3390
+ #[should_fail]
3391
+ #[ignore(cfg(windows))]
3392
+ fn test_from_bytes_with_null_fail_2() {
3393
+ let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
3394
+ 0xe0_u8, 0xb9_u8, 0x84_u8,
3395
+ 0xe0_u8, 0xb8_u8, 0x97_u8,
3396
+ 0xe0_u8, 0xb8_u8, 0xa2_u8,
3397
+ 0xe4_u8, 0xb8_u8, 0xad_u8,
3398
+ 0xe5_u8, 0x8d_u8, 0x8e_u8,
3399
+ 0x56_u8, 0x69_u8, 0xe1_u8,
3400
+ 0xbb_u8, 0x87_u8, 0x74_u8,
3401
+ 0x20_u8, 0x4e_u8, 0x61_u8,
3402
+ 0x6d_u8, 0x60_u8];
3403
+
3404
+ let _x = from_bytes_with_null(bb);
3405
+ }
3406
+
3292
3407
#[test]
3293
3408
fn test_from_buf() {
3294
3409
unsafe {
@@ -3351,6 +3466,30 @@ mod tests {
3351
3466
}
3352
3467
}
3353
3468
3469
+ #[test]
3470
+ fn test_subslice_offset() {
3471
+ let a = " kernelsprite";
3472
+ let b = slice(a, 7, len(a));
3473
+ let c = slice(a, 0, len(a) - 6);
3474
+ assert!(subslice_offset(a, b) == 7);
3475
+ assert!(subslice_offset(a, c) == 0);
3476
+
3477
+ let string = " a\n b\n c";
3478
+ let mut lines = ~[];
3479
+ for each_line(string) |line| { lines.push(line) }
3480
+ assert!(subslice_offset(string, lines[0]) == 0);
3481
+ assert!(subslice_offset(string, lines[1]) == 2);
3482
+ assert!(subslice_offset(string, lines[2]) == 4);
3483
+ }
3484
+
3485
+ #[test]
3486
+ #[should_fail]
3487
+ fn test_subslice_offset_2() {
3488
+ let a = " alchemiter";
3489
+ let b = " cruxtruder";
3490
+ subslice_offset(a, b);
3491
+ }
3492
+
3354
3493
#[test]
3355
3494
fn vec_str_conversions() {
3356
3495
let s1: ~str = ~" All mimsy were the borogoves";
0 commit comments