Skip to content

Commit 763c6d3

Browse files
committed
Rollup merge of rust-lang#28812 - steveklabnik:improve_str_from_utf8_docs, r=brson
Our docs were very basic for the various versions of from_utf8, so this commit beefs them up. It also improves docs for the &str variant's error, Utf8Error.
2 parents 114bf59 + 4d73da9 commit 763c6d3

File tree

2 files changed

+230
-23
lines changed

2 files changed

+230
-23
lines changed

src/libcollections/string.rs

Lines changed: 111 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -92,26 +92,61 @@ impl String {
9292
panic!("not available with cfg(test)");
9393
}
9494

95-
/// Returns the vector as a string buffer, if possible, taking care not to
96-
/// copy it.
95+
/// Converts a vector of bytes to a `String`.
96+
///
97+
/// A string slice (`&str`) is made of bytes (`u8`), and a vector of bytes
98+
/// (`Vec<u8>`) is made of bytes, so this function converts between the
99+
/// two. Not all byte slices are valid `String`s, however: `String`
100+
/// requires that it is valid UTF-8. `from_utf8()` checks to ensure that
101+
/// the bytes are valid UTF-8, and then does the conversion.
102+
///
103+
/// If you are sure that the byte slice is valid UTF-8, and you don't want
104+
/// to incur the overhead of the validity check, there is an unsafe version
105+
/// of this function, [`from_utf8_unchecked()`][fromutf8], which has the
106+
/// same behavior but skips the check.
107+
///
108+
/// [fromutf8]: struct.String.html#method.from_utf8_unchecked
109+
///
110+
/// This method will take care to not copy the vector, for efficiency's
111+
/// sake.
112+
///
113+
/// If you need a `&str` instead of a `String`, consider
114+
/// [`str::from_utf8()`][str].
115+
///
116+
/// [str]: ../str/fn.from_utf8.html
97117
///
98118
/// # Failure
99119
///
100-
/// If the given vector is not valid UTF-8, then the original vector and the
101-
/// corresponding error is returned.
120+
/// Returns `Err` if the slice is not UTF-8 with a description as to why the
121+
/// provided bytes are not UTF-8. The vector you moved in is also included.
102122
///
103123
/// # Examples
104124
///
125+
/// Basic usage:
126+
///
105127
/// ```
106-
/// let hello_vec = vec![104, 101, 108, 108, 111];
107-
/// let s = String::from_utf8(hello_vec).unwrap();
108-
/// assert_eq!(s, "hello");
109-
///
110-
/// let invalid_vec = vec![240, 144, 128];
111-
/// let s = String::from_utf8(invalid_vec).err().unwrap();
112-
/// let err = s.utf8_error();
113-
/// assert_eq!(s.into_bytes(), [240, 144, 128]);
128+
/// // some bytes, in a vector
129+
/// let sparkle_heart = vec![240, 159, 146, 150];
130+
///
131+
/// // We know these bytes are valid, so just use `unwrap()`.
132+
/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
133+
///
134+
/// assert_eq!("💖", sparkle_heart);
114135
/// ```
136+
///
137+
/// Incorrect bytes:
138+
///
139+
/// ```
140+
/// // some invalid bytes, in a vector
141+
/// let sparkle_heart = vec![0, 159, 146, 150];
142+
///
143+
/// assert!(String::from_utf8(sparkle_heart).is_err());
144+
/// ```
145+
///
146+
/// See the docs for [`FromUtf8Error`][error] for more details on what you
147+
/// can do with this error.
148+
///
149+
/// [error]: struct.FromUtf8Error.html
115150
#[inline]
116151
#[stable(feature = "rust1", since = "1.0.0")]
117152
pub fn from_utf8(vec: Vec<u8>) -> Result<String, FromUtf8Error> {
@@ -121,15 +156,49 @@ impl String {
121156
}
122157
}
123158

124-
/// Converts a vector of bytes to a new UTF-8 string.
125-
/// Any invalid UTF-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
159+
/// Converts a slice of bytes to a `String`, including invalid characters.
160+
///
161+
/// A string slice (`&str`) is made of bytes (`u8`), and a slice of bytes
162+
/// (`&[u8]`) is made of bytes, so this function converts between the two.
163+
/// Not all byte slices are valid string slices, however: `&str` requires
164+
/// that it is valid UTF-8. During this conversion, `from_utf8_lossy()`
165+
/// will replace any invalid UTF-8 sequences with
166+
/// `U+FFFD REPLACEMENT CHARACTER`, which looks like this: �
167+
///
168+
/// If you are sure that the byte slice is valid UTF-8, and you don't want
169+
/// to incur the overhead of the conversion, there is an unsafe version
170+
/// of this function, [`from_utf8_unchecked()`][fromutf8], which has the
171+
/// same behavior but skips the checks.
172+
///
173+
/// [fromutf8]: struct.String.html#method.from_utf8_unchecked
174+
///
175+
/// If you need a `&str` instead of a `String`, consider
176+
/// [`str::from_utf8()`][str].
177+
///
178+
/// [str]: ../str/fn.from_utf8.html
126179
///
127180
/// # Examples
128181
///
182+
/// Basic usage:
183+
///
184+
/// ```
185+
/// // some bytes, in a vector
186+
/// let sparkle_heart = vec![240, 159, 146, 150];
187+
///
188+
/// // We know these bytes are valid, so just use `unwrap()`.
189+
/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
190+
///
191+
/// assert_eq!("💖", sparkle_heart);
129192
/// ```
193+
///
194+
/// Incorrect bytes:
195+
///
196+
/// ```
197+
/// // some invalid bytes
130198
/// let input = b"Hello \xF0\x90\x80World";
131199
/// let output = String::from_utf8_lossy(input);
132-
/// assert_eq!(output, "Hello \u{FFFD}World");
200+
///
201+
/// assert_eq!("Hello �World", output);
133202
/// ```
134203
#[stable(feature = "rust1", since = "1.0.0")]
135204
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> Cow<'a, str> {
@@ -309,9 +378,33 @@ impl String {
309378
}
310379
}
311380

312-
/// Converts a vector of bytes to a new `String` without checking if
313-
/// it contains valid UTF-8. This is unsafe because it assumes that
314-
/// the UTF-8-ness of the vector has already been validated.
381+
/// Converts a vector of bytes to a `String` without checking that the
382+
/// string contains valid UTF-8.
383+
///
384+
/// See the safe version, [`from_utrf8()`][fromutf8], for more.
385+
///
386+
/// [fromutf8]: struct.String.html#method.from_utf8
387+
///
388+
/// # Unsafety
389+
///
390+
/// This function is unsafe because it does not check that the bytes passed to
391+
/// it are valid UTF-8. If this constraint is violated, undefined behavior
392+
/// results, as the rest of Rust assumes that `String`s are valid UTF-8.
393+
///
394+
/// # Examples
395+
///
396+
/// Basic usage:
397+
///
398+
/// ```
399+
/// // some bytes, in a vector
400+
/// let sparkle_heart = vec![240, 159, 146, 150];
401+
///
402+
/// let sparkle_heart = unsafe {
403+
/// String::from_utf8_unchecked(sparkle_heart)
404+
/// };
405+
///
406+
/// assert_eq!("💖", sparkle_heart);
407+
/// ```
315408
#[inline]
316409
#[stable(feature = "rust1", since = "1.0.0")]
317410
pub unsafe fn from_utf8_unchecked(bytes: Vec<u8>) -> String {

src/libcore/str/mod.rs

Lines changed: 119 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,11 @@ impl fmt::Display for ParseBoolError {
119119
Section: Creating a string
120120
*/
121121

122-
/// Errors which can occur when attempting to interpret a byte slice as a `str`.
122+
/// Errors which can occur when attempting to interpret a sequence of `u8`
123+
/// as a string.
124+
///
125+
/// As such, the `from_utf8` family of functions and methods for both `String`s
126+
/// and `&str`s make use of this error, for example.
123127
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
124128
#[stable(feature = "rust1", since = "1.0.0")]
125129
pub struct Utf8Error {
@@ -132,21 +136,104 @@ impl Utf8Error {
132136
///
133137
/// It is the maximum index such that `from_utf8(input[..index])`
134138
/// would return `Some(_)`.
139+
///
140+
/// # Examples
141+
///
142+
/// Basic usage:
143+
///
144+
/// ```
145+
/// #![feature(utf8_error)]
146+
///
147+
/// use std::str;
148+
///
149+
/// // some invalid bytes, in a vector
150+
/// let sparkle_heart = vec![0, 159, 146, 150];
151+
///
152+
/// // std::str::from_utf8 returns a Utf8Error
153+
/// let error = str::from_utf8(&sparkle_heart).unwrap_err();
154+
///
155+
/// // the first byte is invalid here
156+
/// assert_eq!(1, error.valid_up_to());
157+
/// ```
135158
#[unstable(feature = "utf8_error", reason = "method just added",
136159
issue = "27734")]
137160
pub fn valid_up_to(&self) -> usize { self.valid_up_to }
138161
}
139162

140-
/// Converts a slice of bytes to a string slice without performing any
141-
/// allocations.
163+
/// Converts a slice of bytes to a string slice.
142164
///
143-
/// Once the slice has been validated as UTF-8, it is transmuted in-place and
144-
/// returned as a '&str' instead of a '&[u8]'
165+
/// A string slice (`&str`) is made of bytes (`u8`), and a byte slice (`&[u8]`)
166+
/// is made of bytes, so this function converts between the two. Not all byte
167+
/// slices are valid string slices, however: `&str` requires that it is valid
168+
/// UTF-8. `from_utf8()` checks to ensure that the bytes are valid UTF-8, and
169+
/// then does the conversion.
170+
///
171+
/// If you are sure that the byte slice is valid UTF-8, and you don't want to
172+
/// incur the overhead of the validity check, there is an unsafe version of
173+
/// this function, [`from_utf8_unchecked()`][fromutf8], which has the same
174+
/// behavior but skips the check.
175+
///
176+
/// [fromutf8]: fn.from_utf8.html
177+
///
178+
/// If you need a `String` instead of a `&str`, consider
179+
/// [`String::from_utf8()`][string].
180+
///
181+
/// [string]: ../string/struct.String.html#method.from_utf8
182+
///
183+
/// Because you can stack-allocate a `[u8; N]`, and you can take a `&[u8]` of
184+
/// it, this function is one way to have a stack-allocated string. There is
185+
/// an example of this in the examples section below.
145186
///
146187
/// # Failure
147188
///
148189
/// Returns `Err` if the slice is not UTF-8 with a description as to why the
149190
/// provided slice is not UTF-8.
191+
///
192+
/// # Examples
193+
///
194+
/// Basic usage:
195+
///
196+
/// ```
197+
/// use std::str;
198+
///
199+
/// // some bytes, in a vector
200+
/// let sparkle_heart = vec![240, 159, 146, 150];
201+
///
202+
/// // We know these bytes are valid, so just use `unwrap()`.
203+
/// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
204+
///
205+
/// assert_eq!("💖", sparkle_heart);
206+
/// ```
207+
///
208+
/// Incorrect bytes:
209+
///
210+
/// ```
211+
/// use std::str;
212+
///
213+
/// // some invalid bytes, in a vector
214+
/// let sparkle_heart = vec![0, 159, 146, 150];
215+
///
216+
/// assert!(str::from_utf8(&sparkle_heart).is_err());
217+
/// ```
218+
///
219+
/// See the docs for [`Utf8Error`][error] for more details on the kinds of
220+
/// errors that can be returned.
221+
///
222+
/// [error]: struct.Utf8Error.html
223+
///
224+
/// A "stack allocated string":
225+
///
226+
/// ```
227+
/// use std::str;
228+
///
229+
/// // some bytes, in a stack-allocated array
230+
/// let sparkle_heart = [240, 159, 146, 150];
231+
///
232+
/// // We know these bytes are valid, so just use `unwrap()`.
233+
/// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
234+
///
235+
/// assert_eq!("💖", sparkle_heart);
236+
/// ```
150237
#[stable(feature = "rust1", since = "1.0.0")]
151238
pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
152239
try!(run_utf8_validation_iterator(&mut v.iter()));
@@ -155,6 +242,33 @@ pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
155242

156243
/// Converts a slice of bytes to a string slice without checking
157244
/// that the string contains valid UTF-8.
245+
///
246+
/// See the safe version, [`from_utrf8()`][fromutf8], for more.
247+
///
248+
/// [fromutf8]: fn.from_utf8.html
249+
///
250+
/// # Unsafety
251+
///
252+
/// This function is unsafe because it does not check that the bytes passed to
253+
/// it are valid UTF-8. If this constraint is violated, undefined behavior
254+
/// results, as the rest of Rust assumes that `&str`s are valid UTF-8.
255+
///
256+
/// # Examples
257+
///
258+
/// Basic usage:
259+
///
260+
/// ```
261+
/// use std::str;
262+
///
263+
/// // some bytes, in a vector
264+
/// let sparkle_heart = vec![240, 159, 146, 150];
265+
///
266+
/// let sparkle_heart = unsafe {
267+
/// str::from_utf8_unchecked(&sparkle_heart)
268+
/// };
269+
///
270+
/// assert_eq!("💖", sparkle_heart);
271+
/// ```
158272
#[inline(always)]
159273
#[stable(feature = "rust1", since = "1.0.0")]
160274
pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {

0 commit comments

Comments
 (0)