Skip to content

Commit 4296732

Browse files
Add docs and other cleanup
1 parent 6bb3556 commit 4296732

File tree

8 files changed

+55
-40
lines changed

8 files changed

+55
-40
lines changed

RELEASE-NOTES.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
# Next
1+
# 0.13.0
22

33
- Config methods are const
44
- Added `EncoderStringWriter` to allow encoding directly to a String
55
- `EncoderWriter` now owns its delegate writer rather than keeping a reference to it (though refs still work)
6-
- As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`, which returns `Result<W>` instead of `Result<()>`.
6+
- As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`, which returns `Result<W>` instead of `Result<()>`. If you were calling `finish()` explicitly, you will now need to use `let _ = foo.finish()` instead of just `foo.finish()` to avoid a warning about the unused value.
7+
- When decoding input that has both an invalid length and an invalid symbol as the last byte, `InvalidByte` will be emitted instead of `InvalidLength` to make the problem more obvious.
78

89
# 0.12.2
910

benches/benchmarks.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ fn do_encode_bench_string_stream(b: &mut Bencher, &size: &usize) {
130130
b.iter(|| {
131131
let mut stream_enc = write::EncoderStringWriter::new(TEST_CONFIG);
132132
stream_enc.write_all(&v).unwrap();
133-
stream_enc.flush().unwrap();
133+
stream_enc.flush().unwrap();
134134
let _ = stream_enc.into_inner();
135135
});
136136
}
@@ -174,7 +174,10 @@ fn encode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> {
174174
.with_function("encode_slice", do_encode_bench_slice)
175175
.with_function("encode_reuse_buf_stream", do_encode_bench_stream)
176176
.with_function("encode_string_stream", do_encode_bench_string_stream)
177-
.with_function("encode_string_reuse_buf_stream", do_encode_bench_string_reuse_buf_stream)
177+
.with_function(
178+
"encode_string_reuse_buf_stream",
179+
do_encode_bench_string_reuse_buf_stream,
180+
)
178181
}
179182

180183
fn decode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> {

examples/make_tables.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,5 +173,7 @@ fn check_alphabet(alphabet: &[u8]) {
173173
// must be ASCII to be valid as single UTF-8 bytes
174174
for &b in alphabet {
175175
assert!(b <= 0x7F_u8);
176+
// = is assumed to be padding, so cannot be used as a symbol
177+
assert_ne!(b'=', b);
176178
}
177179
}

src/decode.rs

Lines changed: 11 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::{tables, Config};
1+
use crate::{tables, Config, PAD_BYTE};
22

33
#[cfg(any(feature = "alloc", feature = "std", test))]
44
use crate::STANDARD;
@@ -30,6 +30,9 @@ pub enum DecodeError {
3030
InvalidByte(usize, u8),
3131
/// The length of the input is invalid.
3232
/// A typical cause of this is stray trailing whitespace or other separator bytes.
33+
/// In the case where excess trailing bytes have produced an invalid length *and* the last byte
34+
/// is also an invalid base64 symbol (as would be the case for whitespace, etc), `InvalidByte`
35+
/// will be emitted instead of `InvalidLength` to make the issue easier to debug.
3336
InvalidLength,
3437
/// The last non-padding input symbol's encoded 6 bits have nonzero bits that will be discarded.
3538
/// This is indicative of corrupted or truncated Base64.
@@ -44,7 +47,7 @@ impl fmt::Display for DecodeError {
4447
DecodeError::InvalidByte(index, byte) => {
4548
write!(f, "Invalid byte {}, offset {}.", byte, index)
4649
}
47-
DecodeError::InvalidLength => write!(f, "Encoded text cannot have a 6-bit remainder. Trailing whitespace or other bytes?"),
50+
DecodeError::InvalidLength => write!(f, "Encoded text cannot have a 6-bit remainder."),
4851
DecodeError::InvalidLastSymbol(index, byte) => {
4952
write!(f, "Invalid last symbol {}, offset {}.", byte, index)
5053
}
@@ -216,7 +219,7 @@ fn decode_helper(
216219
// trailing whitespace is so common that it's worth it to check the last byte to
217220
// possibly return a better error message
218221
if let Some(b) = input.last() {
219-
if *b != b'=' && decode_table[*b as usize] == tables::INVALID_VALUE {
222+
if *b != PAD_BYTE && decode_table[*b as usize] == tables::INVALID_VALUE {
220223
return Err(DecodeError::InvalidByte(input.len() - 1, *b));
221224
}
222225
}
@@ -340,7 +343,7 @@ fn decode_helper(
340343
let start_of_leftovers = input_index;
341344
for (i, b) in input[start_of_leftovers..].iter().enumerate() {
342345
// '=' padding
343-
if *b == 0x3D {
346+
if *b == PAD_BYTE {
344347
// There can be bad padding in a few ways:
345348
// 1 - Padding with non-padding characters after it
346349
// 2 - Padding after zero or one non-padding characters before it
@@ -381,7 +384,7 @@ fn decode_helper(
381384
if padding_bytes > 0 {
382385
return Err(DecodeError::InvalidByte(
383386
start_of_leftovers + first_padding_index,
384-
0x3D,
387+
PAD_BYTE,
385388
));
386389
}
387390
last_symbol = *b;
@@ -816,7 +819,7 @@ mod tests {
816819
symbols[1] = s2;
817820
for &s3 in STANDARD.char_set.encode_table().iter() {
818821
symbols[2] = s3;
819-
symbols[3] = b'=';
822+
symbols[3] = PAD_BYTE;
820823

821824
match base64_to_bytes.get(&symbols[..]) {
822825
Some(bytes) => {
@@ -852,8 +855,8 @@ mod tests {
852855
symbols[0] = s1;
853856
for &s2 in STANDARD.char_set.encode_table().iter() {
854857
symbols[1] = s2;
855-
symbols[2] = b'=';
856-
symbols[3] = b'=';
858+
symbols[2] = PAD_BYTE;
859+
symbols[3] = PAD_BYTE;
857860

858861
match base64_to_bytes.get(&symbols[..]) {
859862
Some(bytes) => {
@@ -867,22 +870,4 @@ mod tests {
867870
}
868871
}
869872
}
870-
871-
#[test]
872-
fn decode_imap() {
873-
assert_eq!(
874-
decode_config(b"+,,+", crate::IMAP_MUTF7),
875-
decode_config(b"+//+", crate::STANDARD_NO_PAD)
876-
);
877-
}
878-
879-
#[test]
880-
fn decode_invalid_trailing_bytes() {
881-
// The case of trailing newlines is common enough to warrant a test for a good error
882-
// message.
883-
assert_eq!(
884-
decode(b"Zm9vCg==\n"),
885-
Err(DecodeError::InvalidByte(8, b'\n'))
886-
);
887-
}
888873
}

src/encode.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::Config;
1+
use crate::{Config, PAD_BYTE};
22
#[cfg(any(feature = "alloc", feature = "std", test))]
33
use crate::{chunked_encoder, STANDARD};
44
#[cfg(any(feature = "alloc", feature = "std", test))]
@@ -312,7 +312,7 @@ pub fn add_padding(input_len: usize, output: &mut [u8]) -> usize {
312312
let rem = input_len % 3;
313313
let mut bytes_written = 0;
314314
for _ in 0..((3 - rem) % 3) {
315-
output[bytes_written] = b'=';
315+
output[bytes_written] = PAD_BYTE;
316316
bytes_written += 1;
317317
}
318318

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,3 +241,5 @@ pub const BINHEX: Config = Config {
241241
pad: false,
242242
decode_allow_trailing_bits: false,
243243
};
244+
245+
const PAD_BYTE: u8 = b'=';

src/write/encoder_string_writer.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1+
use super::encoder::EncoderWriter;
12
use crate::Config;
23
use std::io;
34
use std::io::Write;
4-
use super::encoder::EncoderWriter;
55

66
/// A `Write` implementation that base64-encodes data using the provided config and accumulates the
77
/// resulting base64 in memory, which is then exposed as a String via `into_inner()`.
@@ -56,7 +56,9 @@ pub struct EncoderStringWriter<S: StrConsumer> {
5656
impl<S: StrConsumer> EncoderStringWriter<S> {
5757
/// Create a EncoderStringWriter that will append to the provided `StrConsumer`.
5858
pub fn from(str_consumer: S, config: Config) -> Self {
59-
EncoderStringWriter { encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_consumer }, config) }
59+
EncoderStringWriter {
60+
encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_consumer }, config),
61+
}
6062
}
6163

6264
/// Encode all remaining buffered data, including any trailing incomplete input triples and
@@ -66,7 +68,8 @@ impl<S: StrConsumer> EncoderStringWriter<S> {
6668
///
6769
/// Returns the base64-encoded form of the accumulated written data.
6870
pub fn into_inner(mut self) -> S {
69-
self.encoder.finish()
71+
self.encoder
72+
.finish()
7073
.expect("Writing to a Vec<u8> should never fail")
7174
.str_consumer
7275
}
@@ -79,7 +82,7 @@ impl EncoderStringWriter<String> {
7982
}
8083
}
8184

82-
impl <S: StrConsumer> Write for EncoderStringWriter<S> {
85+
impl<S: StrConsumer> Write for EncoderStringWriter<S> {
8386
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
8487
self.encoder.write(buf)
8588
}
@@ -113,15 +116,14 @@ impl StrConsumer for String {
113116
///
114117
/// This is safe because we only use it when writing base64, which is always valid UTF-8.
115118
struct Utf8SingleCodeUnitWriter<S: StrConsumer> {
116-
str_consumer: S
119+
str_consumer: S,
117120
}
118121

119122
impl<S: StrConsumer> io::Write for Utf8SingleCodeUnitWriter<S> {
120123
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
121124
// Because we expect all input to be valid utf-8 individual bytes, we can encode any buffer
122125
// length
123-
let s = std::str::from_utf8(buf)
124-
.expect("Input must be valid UTF-8");
126+
let s = std::str::from_utf8(buf).expect("Input must be valid UTF-8");
125127

126128
self.str_consumer.consume(s);
127129

@@ -138,9 +140,9 @@ impl<S: StrConsumer> io::Write for Utf8SingleCodeUnitWriter<S> {
138140
mod tests {
139141
use crate::encode_config_buf;
140142
use crate::tests::random_config;
143+
use crate::write::encoder_string_writer::EncoderStringWriter;
141144
use rand::Rng;
142145
use std::io::Write;
143-
use crate::write::encoder_string_writer::EncoderStringWriter;
144146

145147
#[test]
146148
fn every_possible_split_of_input() {

tests/decode.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,26 @@ fn decode_reject_invalid_bytes_with_correct_error() {
305305
}
306306
}
307307

308+
#[test]
309+
fn decode_imap() {
310+
assert_eq!(
311+
decode_config(b"+,,+", crate::IMAP_MUTF7),
312+
decode_config(b"+//+", crate::STANDARD_NO_PAD)
313+
);
314+
}
315+
316+
#[test]
317+
fn decode_invalid_trailing_bytes() {
318+
// The case of trailing newlines is common enough to warrant a test for a good error
319+
// message.
320+
assert_eq!(
321+
Err(DecodeError::InvalidByte(8, b'\n')),
322+
decode(b"Zm9vCg==\n")
323+
);
324+
// extra padding, however, is still InvalidLength
325+
assert_eq!(Err(DecodeError::InvalidLength), decode(b"Zm9vCg==="));
326+
}
327+
308328
fn config_std_pad() -> Config {
309329
Config::new(CharacterSet::Standard, true)
310330
}

0 commit comments

Comments
 (0)