Skip to content

Commit 5a56885

Browse files
Introduce StrWriter to allow ESW to wrap both a String and a &mut String
1 parent 24ca190 commit 5a56885

File tree

3 files changed

+103
-17
lines changed

3 files changed

+103
-17
lines changed

benches/benchmarks.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,20 @@ fn do_encode_bench_string_stream(b: &mut Bencher, &size: &usize) {
129129

130130
b.iter(|| {
131131
let mut stream_enc = write::EncoderStringWriter::new(TEST_CONFIG);
132+
stream_enc.write_all(&v).unwrap();
133+
stream_enc.flush().unwrap();
134+
let _ = stream_enc.into_inner();
135+
});
136+
}
137+
138+
fn do_encode_bench_string_reuse_buf_stream(b: &mut Bencher, &size: &usize) {
139+
let mut v: Vec<u8> = Vec::with_capacity(size);
140+
fill(&mut v);
141+
142+
let mut buf = String::new();
143+
b.iter(|| {
144+
buf.clear();
145+
let mut stream_enc = write::EncoderStringWriter::from(&mut buf, TEST_CONFIG);
132146
stream_enc.write_all(&v).unwrap();
133147
stream_enc.flush().unwrap();
134148
let _ = stream_enc.into_inner();
@@ -160,6 +174,7 @@ fn encode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> {
160174
.with_function("encode_slice", do_encode_bench_slice)
161175
.with_function("encode_reuse_buf_stream", do_encode_bench_stream)
162176
.with_function("encode_string_stream", do_encode_bench_string_stream)
177+
.with_function("encode_string_reuse_buf_stream", do_encode_bench_string_reuse_buf_stream)
163178
}
164179

165180
fn decode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> {

examples/make_tables.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,14 @@ fn print_decode_table(alphabet: &[u8], const_name: &str, indent_depth: usize) {
164164
}
165165

166166
fn check_alphabet(alphabet: &[u8]) {
167+
// ensure all characters are distinct
167168
assert_eq!(64, alphabet.len());
168169
let mut set: HashSet<u8> = HashSet::new();
169170
set.extend(alphabet);
170171
assert_eq!(64, set.len());
172+
173+
// must be ASCII to be valid as single UTF-8 bytes
174+
for &b in alphabet {
175+
assert!(b <= 0x7F_u8);
176+
}
171177
}

src/write/encoder_string_writer.rs

Lines changed: 82 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ use super::encoder::EncoderWriter;
88
///
99
/// # Examples
1010
///
11+
/// Buffer base64 in a new String:
12+
///
1113
/// ```
1214
/// use std::io::Write;
1315
///
@@ -21,27 +23,40 @@ use super::encoder::EncoderWriter;
2123
/// assert_eq!("YXNkZg==", &b64_string);
2224
/// ```
2325
///
26+
/// Or, append to an existing String:
27+
///
28+
/// ```
29+
/// use std::io::Write;
30+
///
31+
/// let mut buf = String::from("base64: ");
32+
///
33+
/// let mut enc = base64::write::EncoderStringWriter::from(&mut buf, base64::STANDARD);
34+
///
35+
/// enc.write_all(b"asdf").unwrap();
36+
///
37+
/// // release the &mut reference on buf
38+
/// let _ = enc.into_inner();
39+
///
40+
/// assert_eq!("base64: YXNkZg==", &buf);
41+
/// ```
42+
///
2443
/// # Panics
2544
///
2645
/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without
2746
/// error is invalid and will panic.
2847
///
2948
/// # Performance
3049
///
31-
/// B64-encoded data is buffered in the heap since the point is to collect it in a String.
32-
pub struct EncoderStringWriter {
33-
encoder: EncoderWriter<Vec<u8>>,
50+
/// Because it has to validate that the base64 is UTF-8, it is about 80% as fast as writing plain
51+
/// bytes to a `io::Write`.
52+
pub struct EncoderStringWriter<S: StrWrite> {
53+
encoder: EncoderWriter<Utf8SingleCodeUnitWriter<S>>,
3454
}
3555

36-
impl EncoderStringWriter {
37-
/// Create a new EncoderStringWriter that will encode with the provided config.
38-
pub fn new(config: Config) -> EncoderStringWriter {
39-
EncoderStringWriter::from(String::new(), config)
40-
}
41-
42-
/// Create a new EncoderStringWriter that will append to the provided string.
43-
pub fn from(s: String, config: Config) -> EncoderStringWriter {
44-
EncoderStringWriter { encoder: EncoderWriter::new(s.into_bytes(), config) }
56+
impl<S: StrWrite> EncoderStringWriter<S> {
57+
/// Create a EncoderStringWriter that will append to the provided `StrWrite`.
58+
pub fn from(str_writer: S, config: Config) -> Self {
59+
EncoderStringWriter { encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_writer }, config) }
4560
}
4661

4762
/// Encode all remaining buffered data, including any trailing incomplete input triples and
@@ -50,15 +65,21 @@ impl EncoderStringWriter {
5065
/// Once this succeeds, no further writes or calls to this method are allowed.
5166
///
5267
/// Returns the base64-encoded form of the accumulated written data.
53-
pub fn into_inner(mut self) -> String {
54-
let buf = self.encoder.finish()
55-
.expect("Writing to a Vec<u8> should never fail");
68+
pub fn into_inner(mut self) -> S {
69+
self.encoder.finish()
70+
.expect("Writing to a Vec<u8> should never fail")
71+
.str_writer
72+
}
73+
}
5674

57-
String::from_utf8(buf).expect("Base64 should always be valid UTF-8")
75+
impl EncoderStringWriter<String> {
76+
/// Create a EncoderStringWriter that will encode into a new String with the provided config.
77+
pub fn new(config: Config) -> Self {
78+
EncoderStringWriter::from(String::new(), config)
5879
}
5980
}
6081

61-
impl<'a> Write for EncoderStringWriter {
82+
impl <S: StrWrite> Write for EncoderStringWriter<S> {
6283
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
6384
self.encoder.write(buf)
6485
}
@@ -68,6 +89,50 @@ impl<'a> Write for EncoderStringWriter {
6889
}
6990
}
7091

92+
/// An abstraction around infallible writes of `str`s.
93+
///
94+
/// Typically, this will just be String.
95+
pub trait StrWrite {
96+
/// The write must succeed, and must write the entire `buf`.
97+
fn write(&mut self, buf: &str);
98+
}
99+
100+
/// As for io::Write, StrWrite is implemented automatically for `&mut S`.
101+
impl<S: StrWrite + ?Sized> StrWrite for &mut S {
102+
fn write(&mut self, buf: &str) {
103+
(**self).write(buf)
104+
}
105+
}
106+
107+
impl StrWrite for String {
108+
fn write(&mut self, buf: &str) {
109+
self.push_str(buf)
110+
}
111+
}
112+
113+
/// A `Write` that only can handle bytes that are valid single-byte UTF-8 code units.
114+
///
115+
/// This is safe because we only use it when writing base64, which is always valid UTF-8.
116+
struct Utf8SingleCodeUnitWriter<S: StrWrite> {
117+
str_writer: S
118+
}
119+
120+
impl<S: StrWrite> io::Write for Utf8SingleCodeUnitWriter<S> {
121+
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
122+
let s = std::str::from_utf8(buf)
123+
.expect("Input must be valid UTF-8");
124+
125+
self.str_writer.write(s);
126+
127+
Ok(buf.len())
128+
}
129+
130+
fn flush(&mut self) -> io::Result<()> {
131+
// no op
132+
Ok(())
133+
}
134+
}
135+
71136
#[cfg(test)]
72137
mod tests {
73138
use crate::encode_config_buf;

0 commit comments

Comments
 (0)