@@ -92,26 +92,61 @@ impl String {
92
92
panic ! ( "not available with cfg(test)" ) ;
93
93
}
94
94
95
- /// Returns the vector as a string buffer, if possible, taking care not to
96
- /// copy it.
95
+ /// Converts a vector of bytes to a `String`.
96
+ ///
97
+ /// A string slice (`&str`) is made of bytes (`u8`), and a vector of bytes
98
+ /// (`Vec<u8>`) is made of bytes, so this function converts between the
99
+ /// two. Not all byte slices are valid `String`s, however: `String`
100
+ /// requires that it is valid UTF-8. `from_utf8()` checks to ensure that
101
+ /// the bytes are valid UTF-8, and then does the conversion.
102
+ ///
103
+ /// If you are sure that the byte slice is valid UTF-8, and you don't want
104
+ /// to incur the overhead of the validity check, there is an unsafe version
105
+ /// of this function, [`from_utf8_unchecked()`][fromutf8], which has the
106
+ /// same behavior but skips the check.
107
+ ///
108
+ /// [fromutf8]: struct.String.html#method.from_utf8_unchecked
109
+ ///
110
+ /// This method will take care to not copy the vector, for efficiency's
111
+ /// sake.
112
+ ///
113
+ /// If you need a `&str` instead of a `String`, consider
114
+ /// [`str::from_utf8()`][str].
115
+ ///
116
+ /// [str]: ../str/fn.from_utf8.html
97
117
///
98
118
/// # Failure
99
119
///
100
- /// If the given vector is not valid UTF-8, then the original vector and the
101
- /// corresponding error is returned .
120
+ /// Returns `Err` if the slice is not UTF-8 with a description as to why the
121
+ /// provided bytes are not UTF-8. The vector you moved in is also included .
102
122
///
103
123
/// # Examples
104
124
///
125
+ /// Basic usage:
126
+ ///
105
127
/// ```
106
- /// let hello_vec = vec![104, 101, 108, 108, 111];
107
- /// let s = String::from_utf8(hello_vec).unwrap();
108
- /// assert_eq!(s, "hello");
109
- ///
110
- /// let invalid_vec = vec![240, 144, 128];
111
- /// let s = String::from_utf8(invalid_vec).err().unwrap();
112
- /// let err = s.utf8_error();
113
- /// assert_eq!(s.into_bytes(), [240, 144, 128]);
128
+ /// // some bytes, in a vector
129
+ /// let sparkle_heart = vec![240, 159, 146, 150];
130
+ ///
131
+ /// // We know these bytes are valid, so just use `unwrap()`.
132
+ /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
133
+ ///
134
+ /// assert_eq!("💖", sparkle_heart);
114
135
/// ```
136
+ ///
137
+ /// Incorrect bytes:
138
+ ///
139
+ /// ```
140
+ /// // some invalid bytes, in a vector
141
+ /// let sparkle_heart = vec![0, 159, 146, 150];
142
+ ///
143
+ /// assert!(String::from_utf8(sparkle_heart).is_err());
144
+ /// ```
145
+ ///
146
+ /// See the docs for [`FromUtf8Error`][error] for more details on what you
147
+ /// can do with this error.
148
+ ///
149
+ /// [error]: struct.FromUtf8Error.html
115
150
#[ inline]
116
151
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
117
152
pub fn from_utf8 ( vec : Vec < u8 > ) -> Result < String , FromUtf8Error > {
@@ -121,15 +156,49 @@ impl String {
121
156
}
122
157
}
123
158
124
- /// Converts a vector of bytes to a new UTF-8 string.
125
- /// Any invalid UTF-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
159
+ /// Converts a slice of bytes to a `String`, including invalid characters.
160
+ ///
161
+ /// A string slice (`&str`) is made of bytes (`u8`), and a slice of bytes
162
+ /// (`&[u8]`) is made of bytes, so this function converts between the two.
163
+ /// Not all byte slices are valid string slices, however: `&str` requires
164
+ /// that it is valid UTF-8. During this conversion, `from_utf8_lossy()`
165
+ /// will replace any invalid UTF-8 sequences with
166
+ /// `U+FFFD REPLACEMENT CHARACTER`, which looks like this: �
167
+ ///
168
+ /// If you are sure that the byte slice is valid UTF-8, and you don't want
169
+ /// to incur the overhead of the conversion, there is an unsafe version
170
+ /// of this function, [`from_utf8_unchecked()`][fromutf8], which has the
171
+ /// same behavior but skips the checks.
172
+ ///
173
+ /// [fromutf8]: struct.String.html#method.from_utf8_unchecked
174
+ ///
175
+ /// If you need a `&str` instead of a `String`, consider
176
+ /// [`str::from_utf8()`][str].
177
+ ///
178
+ /// [str]: ../str/fn.from_utf8.html
126
179
///
127
180
/// # Examples
128
181
///
182
+ /// Basic usage:
183
+ ///
184
+ /// ```
185
+ /// // some bytes, in a vector
186
+ /// let sparkle_heart = vec![240, 159, 146, 150];
187
+ ///
188
+ /// // We know these bytes are valid, so just use `unwrap()`.
189
+ /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
190
+ ///
191
+ /// assert_eq!("💖", sparkle_heart);
129
192
/// ```
193
+ ///
194
+ /// Incorrect bytes:
195
+ ///
196
+ /// ```
197
+ /// // some invalid bytes
130
198
/// let input = b"Hello \xF0\x90\x80World";
131
199
/// let output = String::from_utf8_lossy(input);
132
- /// assert_eq!(output, "Hello \u{FFFD}World");
200
+ ///
201
+ /// assert_eq!("Hello �World", output);
133
202
/// ```
134
203
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
135
204
pub fn from_utf8_lossy < ' a > ( v : & ' a [ u8 ] ) -> Cow < ' a , str > {
@@ -309,9 +378,33 @@ impl String {
309
378
}
310
379
}
311
380
312
- /// Converts a vector of bytes to a new `String` without checking if
313
- /// it contains valid UTF-8. This is unsafe because it assumes that
314
- /// the UTF-8-ness of the vector has already been validated.
381
+ /// Converts a vector of bytes to a `String` without checking that the
382
+ /// string contains valid UTF-8.
383
+ ///
384
+ /// See the safe version, [`from_utrf8()`][fromutf8], for more.
385
+ ///
386
+ /// [fromutf8]: struct.String.html#method.from_utf8
387
+ ///
388
+ /// # Unsafety
389
+ ///
390
+ /// This function is unsafe because it does not check that the bytes passed to
391
+ /// it are valid UTF-8. If this constraint is violated, undefined behavior
392
+ /// results, as the rest of Rust assumes that `String`s are valid UTF-8.
393
+ ///
394
+ /// # Examples
395
+ ///
396
+ /// Basic usage:
397
+ ///
398
+ /// ```
399
+ /// // some bytes, in a vector
400
+ /// let sparkle_heart = vec![240, 159, 146, 150];
401
+ ///
402
+ /// let sparkle_heart = unsafe {
403
+ /// String::from_utf8_unchecked(sparkle_heart)
404
+ /// };
405
+ ///
406
+ /// assert_eq!("💖", sparkle_heart);
407
+ /// ```
315
408
#[ inline]
316
409
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
317
410
pub unsafe fn from_utf8_unchecked ( bytes : Vec < u8 > ) -> String {
0 commit comments