Add docs and other cleanup

marshallpierce · marshallpierce · commit 42967320b3c6 · 2020-09-30T08:30:45.000-06:00
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -1,9 +1,10 @@
-# Next
+# 0.13.0
 
 - Config methods are const
 - Added `EncoderStringWriter` to allow encoding directly to a String
 - `EncoderWriter` now owns its delegate writer rather than keeping a reference to it (though refs still work)
-    - As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`, which returns `Result<W>` instead of `Result<()>`.
+    - As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`, which returns `Result<W>` instead of `Result<()>`. If you were calling `finish()` explicitly, you will now need to use `let _ = foo.finish()` instead of just `foo.finish()` to avoid a warning about the unused value.
+- When decoding input that has both an invalid length and an invalid symbol as the last byte, `InvalidByte` will be emitted instead of `InvalidLength` to make the problem more obvious.
 
 # 0.12.2
 
diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs
@@ -130,7 +130,7 @@ fn do_encode_bench_string_stream(b: &mut Bencher, &size: &usize) {
     b.iter(|| {
         let mut stream_enc = write::EncoderStringWriter::new(TEST_CONFIG);
         stream_enc.write_all(&v).unwrap();
-            stream_enc.flush().unwrap();
+        stream_enc.flush().unwrap();
         let _ = stream_enc.into_inner();
     });
 }
@@ -174,7 +174,10 @@ fn encode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> {
         .with_function("encode_slice", do_encode_bench_slice)
         .with_function("encode_reuse_buf_stream", do_encode_bench_stream)
         .with_function("encode_string_stream", do_encode_bench_string_stream)
-        .with_function("encode_string_reuse_buf_stream", do_encode_bench_string_reuse_buf_stream)
+        .with_function(
+            "encode_string_reuse_buf_stream",
+            do_encode_bench_string_reuse_buf_stream,
+        )
 }
 
 fn decode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> {
diff --git a/examples/make_tables.rs b/examples/make_tables.rs
@@ -173,5 +173,7 @@ fn check_alphabet(alphabet: &[u8]) {
     // must be ASCII to be valid as single UTF-8 bytes
     for &b in alphabet {
         assert!(b <= 0x7F_u8);
+        // = is assumed to be padding, so cannot be used as a symbol
+        assert_ne!(b'=', b);
     }
 }
diff --git a/src/decode.rs b/src/decode.rs
@@ -1,4 +1,4 @@
-use crate::{tables, Config};
+use crate::{tables, Config, PAD_BYTE};
 
 #[cfg(any(feature = "alloc", feature = "std", test))]
 use crate::STANDARD;
@@ -30,6 +30,9 @@ pub enum DecodeError {
     InvalidByte(usize, u8),
     /// The length of the input is invalid.
     /// A typical cause of this is stray trailing whitespace or other separator bytes.
+    /// In the case where excess trailing bytes have produced an invalid length *and* the last byte
+    /// is also an invalid base64 symbol (as would be the case for whitespace, etc), `InvalidByte`
+    /// will be emitted instead of `InvalidLength` to make the issue easier to debug.
     InvalidLength,
     /// The last non-padding input symbol's encoded 6 bits have nonzero bits that will be discarded.
     /// This is indicative of corrupted or truncated Base64.
@@ -44,7 +47,7 @@ impl fmt::Display for DecodeError {
             DecodeError::InvalidByte(index, byte) => {
                 write!(f, "Invalid byte {}, offset {}.", byte, index)
             }
-            DecodeError::InvalidLength => write!(f, "Encoded text cannot have a 6-bit remainder. Trailing whitespace or other bytes?"),
+            DecodeError::InvalidLength => write!(f, "Encoded text cannot have a 6-bit remainder."),
             DecodeError::InvalidLastSymbol(index, byte) => {
                 write!(f, "Invalid last symbol {}, offset {}.", byte, index)
             }
@@ -216,7 +219,7 @@ fn decode_helper(
             // trailing whitespace is so common that it's worth it to check the last byte to
             // possibly return a better error message
             if let Some(b) = input.last() {
-                if *b != b'=' && decode_table[*b as usize] == tables::INVALID_VALUE {
+                if *b != PAD_BYTE && decode_table[*b as usize] == tables::INVALID_VALUE {
                     return Err(DecodeError::InvalidByte(input.len() - 1, *b));
                 }
             }
@@ -340,7 +343,7 @@ fn decode_helper(
     let start_of_leftovers = input_index;
     for (i, b) in input[start_of_leftovers..].iter().enumerate() {
         // '=' padding
-        if *b == 0x3D {
+        if *b == PAD_BYTE {
             // There can be bad padding in a few ways:
             // 1 - Padding with non-padding characters after it
             // 2 - Padding after zero or one non-padding characters before it
@@ -381,7 +384,7 @@ fn decode_helper(
         if padding_bytes > 0 {
             return Err(DecodeError::InvalidByte(
                 start_of_leftovers + first_padding_index,
-                0x3D,
+                PAD_BYTE,
             ));
         }
         last_symbol = *b;
@@ -816,7 +819,7 @@ mod tests {
                 symbols[1] = s2;
                 for &s3 in STANDARD.char_set.encode_table().iter() {
                     symbols[2] = s3;
-                    symbols[3] = b'=';
+                    symbols[3] = PAD_BYTE;
 
                     match base64_to_bytes.get(&symbols[..]) {
                         Some(bytes) => {
@@ -852,8 +855,8 @@ mod tests {
             symbols[0] = s1;
             for &s2 in STANDARD.char_set.encode_table().iter() {
                 symbols[1] = s2;
-                symbols[2] = b'=';
-                symbols[3] = b'=';
+                symbols[2] = PAD_BYTE;
+                symbols[3] = PAD_BYTE;
 
                 match base64_to_bytes.get(&symbols[..]) {
                     Some(bytes) => {
@@ -867,22 +870,4 @@ mod tests {
             }
         }
     }
-
-    #[test]
-    fn decode_imap() {
-        assert_eq!(
-            decode_config(b"+,,+", crate::IMAP_MUTF7),
-            decode_config(b"+//+", crate::STANDARD_NO_PAD)
-        );
-    }
-
-    #[test]
-    fn decode_invalid_trailing_bytes() {
-        // The case of trailing newlines is common enough to warrant a test for a good error
-        // message.
-        assert_eq!(
-            decode(b"Zm9vCg==\n"),
-            Err(DecodeError::InvalidByte(8, b'\n'))
-        );
-    }
 }
diff --git a/src/encode.rs b/src/encode.rs
@@ -1,4 +1,4 @@
-use crate::Config;
+use crate::{Config, PAD_BYTE};
 #[cfg(any(feature = "alloc", feature = "std", test))]
 use crate::{chunked_encoder, STANDARD};
 #[cfg(any(feature = "alloc", feature = "std", test))]
@@ -312,7 +312,7 @@ pub fn add_padding(input_len: usize, output: &mut [u8]) -> usize {
     let rem = input_len % 3;
     let mut bytes_written = 0;
     for _ in 0..((3 - rem) % 3) {
-        output[bytes_written] = b'=';
+        output[bytes_written] = PAD_BYTE;
         bytes_written += 1;
     }
 
diff --git a/src/lib.rs b/src/lib.rs
@@ -241,3 +241,5 @@ pub const BINHEX: Config = Config {
     pad: false,
     decode_allow_trailing_bits: false,
 };
+
+const PAD_BYTE: u8 = b'=';
diff --git a/src/write/encoder_string_writer.rs b/src/write/encoder_string_writer.rs
@@ -1,7 +1,7 @@
+use super::encoder::EncoderWriter;
 use crate::Config;
 use std::io;
 use std::io::Write;
-use super::encoder::EncoderWriter;
 
 /// A `Write` implementation that base64-encodes data using the provided config and accumulates the
 /// resulting base64 in memory, which is then exposed as a String via `into_inner()`.
@@ -56,7 +56,9 @@ pub struct EncoderStringWriter<S: StrConsumer> {
 impl<S: StrConsumer> EncoderStringWriter<S> {
     /// Create a EncoderStringWriter that will append to the provided `StrConsumer`.
     pub fn from(str_consumer: S, config: Config) -> Self {
-        EncoderStringWriter { encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_consumer }, config) }
+        EncoderStringWriter {
+            encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_consumer }, config),
+        }
     }
 
     /// Encode all remaining buffered data, including any trailing incomplete input triples and
@@ -66,7 +68,8 @@ impl<S: StrConsumer> EncoderStringWriter<S> {
     ///
     /// Returns the base64-encoded form of the accumulated written data.
     pub fn into_inner(mut self) -> S {
-        self.encoder.finish()
+        self.encoder
+            .finish()
             .expect("Writing to a Vec<u8> should never fail")
             .str_consumer
     }
@@ -79,7 +82,7 @@ impl EncoderStringWriter<String> {
     }
 }
 
-impl <S: StrConsumer> Write for EncoderStringWriter<S> {
+impl<S: StrConsumer> Write for EncoderStringWriter<S> {
     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
         self.encoder.write(buf)
     }
@@ -113,15 +116,14 @@ impl StrConsumer for String {
 ///
 /// This is safe because we only use it when writing base64, which is always valid UTF-8.
 struct Utf8SingleCodeUnitWriter<S: StrConsumer> {
-    str_consumer: S
+    str_consumer: S,
 }
 
 impl<S: StrConsumer> io::Write for Utf8SingleCodeUnitWriter<S> {
     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
         // Because we expect all input to be valid utf-8 individual bytes, we can encode any buffer
         // length
-        let s = std::str::from_utf8(buf)
-            .expect("Input must be valid UTF-8");
+        let s = std::str::from_utf8(buf).expect("Input must be valid UTF-8");
 
         self.str_consumer.consume(s);
 
@@ -138,9 +140,9 @@ impl<S: StrConsumer> io::Write for Utf8SingleCodeUnitWriter<S> {
 mod tests {
     use crate::encode_config_buf;
     use crate::tests::random_config;
+    use crate::write::encoder_string_writer::EncoderStringWriter;
     use rand::Rng;
     use std::io::Write;
-    use crate::write::encoder_string_writer::EncoderStringWriter;
 
     #[test]
     fn every_possible_split_of_input() {
diff --git a/tests/decode.rs b/tests/decode.rs
@@ -305,6 +305,26 @@ fn decode_reject_invalid_bytes_with_correct_error() {
     }
 }
 
+#[test]
+fn decode_imap() {
+    assert_eq!(
+        decode_config(b"+,,+", crate::IMAP_MUTF7),
+        decode_config(b"+//+", crate::STANDARD_NO_PAD)
+    );
+}
+
+#[test]
+fn decode_invalid_trailing_bytes() {
+    // The case of trailing newlines is common enough to warrant a test for a good error
+    // message.
+    assert_eq!(
+        Err(DecodeError::InvalidByte(8, b'\n')),
+        decode(b"Zm9vCg==\n")
+    );
+    // extra padding, however, is still InvalidLength
+    assert_eq!(Err(DecodeError::InvalidLength), decode(b"Zm9vCg==="));
+}
+
 fn config_std_pad() -> Config {
     Config::new(CharacterSet::Standard, true)
 }

Original file line number	Diff line number	Diff line change
`@@ -173,5 +173,7 @@ fn check_alphabet(alphabet: &[u8]) {`
`173`	`173`	`// must be ASCII to be valid as single UTF-8 bytes`
`174`	`174`	`for &b in alphabet {`
`175`	`175`	`assert!(b <= 0x7F_u8);`
	`176`	`+ // = is assumed to be padding, so cannot be used as a symbol`
	`177`	`+ assert_ne!(b'=', b);`
`176`	`178`	`}`
`177`	`179`	`}`