From 1839b0d3eecccd8e9ae1280b93bb60218487bdbb Mon Sep 17 00:00:00 2001
From: Patrick Meredith <pmeredit@protonmail.com>
Date: Thu, 12 Dec 2024 14:05:43 -0500
Subject: [PATCH 01/10] Check in Devin's patch

---
 .evergreen/config.yml                 |  31 +++-
 .evergreen/run-fuzzer.sh              |  45 +++++-
 fuzz/Cargo.toml                       |  34 ++++-
 fuzz/fuzz_targets/malformed_length.rs |  20 +++
 fuzz/fuzz_targets/serialization.rs    | 209 ++++++++++++++++++++++++++
 fuzz/fuzz_targets/string_handling.rs  |  27 ++++
 fuzz/fuzz_targets/type_markers.rs     |  14 ++
 fuzz/generate_corpus.rs               | 134 +++++++++++++++++
 fuzz/run-fuzzer.sh                    |  33 ++++
 9 files changed, 536 insertions(+), 11 deletions(-)
 create mode 100644 fuzz/fuzz_targets/malformed_length.rs
 create mode 100644 fuzz/fuzz_targets/serialization.rs
 create mode 100644 fuzz/fuzz_targets/string_handling.rs
 create mode 100644 fuzz/fuzz_targets/type_markers.rs
 create mode 100644 fuzz/generate_corpus.rs
 create mode 100755 fuzz/run-fuzzer.sh

diff --git a/.evergreen/config.yml b/.evergreen/config.yml
index d27fca57..1270f8a5 100644
--- a/.evergreen/config.yml
+++ b/.evergreen/config.yml
@@ -13,15 +13,18 @@ stepback: true
 command_type: system
 
 # Protect ourself against rogue test case, or curl gone wild, that runs forever
-# 12 minutes is the longest we'll ever run
-exec_timeout_secs: 3600 # 12 minutes is the longest we'll ever run
+# 60 minutes is the longest we'll ever run
+exec_timeout_secs: 3600 # 1 hour total for security-focused fuzzing
 
 # What to do when evergreen hits the timeout (`post:` tasks are run automatically)
 timeout:
   - command: shell.exec
     params:
       script: |
-        ls -la
+        echo "Fuzzing timed out. Collecting any available artifacts..."
+        if [ -d "src/fuzz/artifacts" ]; then
+          tar czf "${PROJECT_DIRECTORY}/crash-artifacts.tar.gz" src/fuzz/artifacts/
+        fi
 
 functions:
   "fetch source":
@@ -154,7 +157,25 @@ functions:
     - command: shell.exec
       params:
         script: |
-            # Nothing needs to be done here
+          # Archive crash artifacts if they exist and contain crashes
+          if [ -d "src/fuzz/artifacts" ] && [ "$(ls -A src/fuzz/artifacts)" ]; then
+            echo "Crashes found in artifacts directory. Creating archive..."
+            tar czf "${PROJECT_DIRECTORY}/crash-artifacts.tar.gz" src/fuzz/artifacts/
+          else
+            echo "No crashes found in artifacts directory. Skipping archive creation."
+          fi
+    # Upload crash artifacts if they exist
+    - command: s3.put
+      params:
+        aws_key: ${aws_key}
+        aws_secret: ${aws_secret}
+        local_file: ${PROJECT_DIRECTORY}/crash-artifacts.tar.gz
+        remote_file: ${CURRENT_VERSION}/crash-artifacts.tar.gz
+        bucket: mciuploads
+        permissions: public-read
+        content_type: application/x-gzip
+        optional: true
+
 pre:
   - func: "fetch source"
   - func: "install dependencies"
@@ -259,4 +280,4 @@ buildvariants:
   run_on:
     - ubuntu1804-test
   tasks:
-    - name: "wasm-test"
\ No newline at end of file
+    - name: "wasm-test"
diff --git a/.evergreen/run-fuzzer.sh b/.evergreen/run-fuzzer.sh
index 511799cb..b7f5f9f7 100755
--- a/.evergreen/run-fuzzer.sh
+++ b/.evergreen/run-fuzzer.sh
@@ -6,7 +6,44 @@ set -o errexit
 
 cd fuzz
 
-# each runs for a minute
-cargo +nightly fuzz run deserialize -- -rss_limit_mb=4096 -max_total_time=60
-cargo +nightly fuzz run raw_deserialize -- -rss_limit_mb=4096 -max_total_time=60
-cargo +nightly fuzz run iterate -- -rss_limit_mb=4096 -max_total_time=60
+# Create directories for crashes and corpus
+mkdir -p artifacts
+mkdir -p corpus
+
+# Generate initial corpus if directory is empty
+if [ -z "$(ls -A corpus)" ]; then
+    echo "Generating initial corpus..."
+    cargo run --bin generate_corpus
+fi
+
+# Function to run fuzzer and collect crashes
+run_fuzzer() {
+    target=$1
+    echo "Running fuzzer for $target"
+    # Run fuzzer and redirect crashes to artifacts directory
+    RUST_BACKTRACE=1 cargo +nightly fuzz run $target -- \
+        -rss_limit_mb=4096 \
+        -max_total_time=60 \
+        -artifact_prefix=artifacts/ \
+        -print_final_stats=1 \
+        corpus/
+}
+
+# Run existing targets
+run_fuzzer "deserialize"
+run_fuzzer "raw_deserialize"
+run_fuzzer "iterate"
+
+# Run new security-focused targets
+run_fuzzer "malformed_length"
+run_fuzzer "type_markers"
+run_fuzzer "string_handling"
+run_fuzzer "serialization"
+
+# If any crashes were found, save them as test artifacts
+if [ "$(ls -A artifacts)" ]; then
+    echo "Crashes found! Check artifacts directory."
+    exit 1
+else
+    echo "No crashes found."
+fi
diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
index 25c60712..2e79dbb5 100644
--- a/fuzz/Cargo.toml
+++ b/fuzz/Cargo.toml
@@ -1,19 +1,29 @@
-
 [package]
 name = "bson-fuzz"
 version = "0.0.1"
 authors = ["Automatically generated"]
 publish = false
+edition = "2021"
 
 [package.metadata]
 cargo-fuzz = true
 
 [dependencies.bson]
 path = ".."
+
 [dependencies.libfuzzer-sys]
 version = "0.4.0"
 
-# Prevent this from interfering with workspaces
+[dependencies.arbitrary]
+version = "1.3.0"
+features = ["derive"]
+
+[dependencies.serde]
+version = "1.0"
+
+[dependencies.serde_json]
+version = "1.0"
+
 [workspace]
 members = ["."]
 
@@ -32,3 +42,23 @@ path = "fuzz_targets/raw_deserialize.rs"
 [[bin]]
 name = "raw_deserialize_utf8_lossy"
 path = "fuzz_targets/raw_deserialize_utf8_lossy.rs"
+
+[[bin]]
+name = "malformed_length"
+path = "fuzz_targets/malformed_length.rs"
+
+[[bin]]
+name = "type_markers"
+path = "fuzz_targets/type_markers.rs"
+
+[[bin]]
+name = "string_handling"
+path = "fuzz_targets/string_handling.rs"
+
+[[bin]]
+name = "serialization"
+path = "fuzz_targets/serialization.rs"
+
+[[bin]]
+name = "generate_corpus"
+path = "generate_corpus.rs"
diff --git a/fuzz/fuzz_targets/malformed_length.rs b/fuzz/fuzz_targets/malformed_length.rs
new file mode 100644
index 00000000..53ebf147
--- /dev/null
+++ b/fuzz/fuzz_targets/malformed_length.rs
@@ -0,0 +1,20 @@
+//! BSON Document Length Field Fuzzer
+//!
+//! This fuzz test focuses on finding security vulnerabilities related to BSON document length fields.
+//! It specifically targets:
+//! - Integer overflow/underflow in length calculations
+//! - Malformed length fields that could cause buffer overruns
+//! - Mismatches between declared and actual document sizes
+//! - Memory allocation issues with large or invalid lengths
+
+#![no_main]
+#[macro_use] extern crate libfuzzer_sys;
+extern crate bson;
+use bson::RawDocument;
+
+fuzz_target!(|buf: &[u8]| {
+    if buf.len() >= 4 {
+        // Focus on document length field manipulation
+        let _ = RawDocument::from_bytes(buf);
+    }
+});
diff --git a/fuzz/fuzz_targets/serialization.rs b/fuzz/fuzz_targets/serialization.rs
new file mode 100644
index 00000000..ccdf22a7
--- /dev/null
+++ b/fuzz/fuzz_targets/serialization.rs
@@ -0,0 +1,209 @@
+#![no_main]
+use arbitrary::Arbitrary;
+use bson::{
+    raw::{RawBson, RawBsonRef, RawDocument},
+    spec::BinarySubtype,
+    Decimal128,
+};
+use libfuzzer_sys::fuzz_target;
+use std::str::FromStr;
+
+fn convert_bson_ref(bson_ref: RawBsonRef) -> Option<RawBson> {
+    match bson_ref {
+        RawBsonRef::Double(d) => {
+            if d.is_nan() {
+                Some(RawBsonRef::Double(f64::NAN).to_raw_bson())
+            } else if d.is_infinite() {
+                Some(RawBsonRef::Double(d).to_raw_bson())
+            } else {
+                Some(RawBsonRef::Double(d).to_raw_bson())
+            }
+        }
+        RawBsonRef::String(s) => {
+            if !s.is_empty() && !s.contains('\0') && s.len() <= (i32::MAX as usize) {
+                Some(RawBsonRef::String(s).to_raw_bson())
+            } else {
+                None
+            }
+        }
+        RawBsonRef::Document(d) => {
+            let mut valid = true;
+            for result in d.iter() {
+                match result {
+                    Ok((key, _)) if key.is_empty() || key.contains('\0') => {
+                        valid = false;
+                        break;
+                    }
+                    Err(_) => {
+                        valid = false;
+                        break;
+                    }
+                    _ => {}
+                }
+            }
+            if valid {
+                Some(RawBsonRef::Document(d).to_raw_bson())
+            } else {
+                None
+            }
+        }
+        RawBsonRef::Array(a) => {
+            let mut valid = true;
+            for result in a.into_iter() {
+                if result.is_err() {
+                    valid = false;
+                    break;
+                }
+            }
+            if valid {
+                Some(RawBsonRef::Array(a).to_raw_bson())
+            } else {
+                None
+            }
+        }
+        RawBsonRef::Binary(b) => {
+            if b.bytes.len() <= i32::MAX as usize &&
+               match b.subtype {
+                   BinarySubtype::Generic |
+                   BinarySubtype::Function |
+                   BinarySubtype::BinaryOld |
+                   BinarySubtype::UuidOld |
+                   BinarySubtype::Uuid |
+                   BinarySubtype::Md5 |
+                   BinarySubtype::UserDefined(_) => true,
+                   _ => false
+               } {
+                Some(RawBsonRef::Binary(b).to_raw_bson())
+            } else {
+                None
+            }
+        }
+        RawBsonRef::ObjectId(id) => Some(RawBsonRef::ObjectId(id).to_raw_bson()),
+        RawBsonRef::Boolean(b) => Some(RawBsonRef::Boolean(b).to_raw_bson()),
+        RawBsonRef::Null => Some(RawBsonRef::Null.to_raw_bson()),
+        RawBsonRef::RegularExpression(regex) => {
+            let valid_options = "ilmsux";
+            let mut options_sorted = regex.options.chars().collect::<Vec<_>>();
+            options_sorted.sort_unstable();
+            options_sorted.dedup();
+            let sorted_str: String = options_sorted.into_iter().collect();
+
+            if sorted_str.chars().all(|c| valid_options.contains(c)) &&
+               !regex.pattern.contains('\0') &&
+               regex.pattern.len() <= (i32::MAX as usize) {
+                Some(RawBsonRef::RegularExpression(regex).to_raw_bson())
+            } else {
+                None
+            }
+        }
+        RawBsonRef::JavaScriptCode(code) => {
+            if !code.is_empty() && !code.contains('\0') && code.len() <= (i32::MAX as usize) {
+                Some(RawBsonRef::JavaScriptCode(code).to_raw_bson())
+            } else {
+                None
+            }
+        }
+        RawBsonRef::JavaScriptCodeWithScope(code_w_scope) => {
+            if !code_w_scope.code.is_empty() &&
+               !code_w_scope.code.contains('\0') &&
+               code_w_scope.code.len() <= (i32::MAX as usize) {
+                Some(RawBsonRef::JavaScriptCodeWithScope(code_w_scope).to_raw_bson())
+            } else {
+                None
+            }
+        }
+        RawBsonRef::DbPointer(ptr) => {
+            let raw_bson = RawBsonRef::DbPointer(ptr).to_raw_bson();
+            Some(raw_bson)
+        }
+        RawBsonRef::Symbol(s) => {
+            if !s.is_empty() && !s.contains('\0') && s.len() <= i32::MAX as usize {
+                Some(RawBsonRef::Symbol(s).to_raw_bson())
+            } else {
+                None
+            }
+        }
+        RawBsonRef::Int32(i) => Some(RawBsonRef::Int32(i).to_raw_bson()),
+        RawBsonRef::Int64(i) => Some(RawBsonRef::Int64(i).to_raw_bson()),
+        RawBsonRef::Timestamp(ts) => Some(RawBsonRef::Timestamp(ts).to_raw_bson()),
+        RawBsonRef::DateTime(dt) => Some(RawBsonRef::DateTime(dt).to_raw_bson()),
+        RawBsonRef::Decimal128(d) => {
+            let d_str = d.to_string();
+            if d_str.contains("NaN") {
+                if let Ok(nan) = Decimal128::from_str("NaN") {
+                    Some(RawBsonRef::Decimal128(nan).to_raw_bson())
+                } else {
+                    None
+                }
+            } else if d_str == "Infinity" || d_str == "-Infinity" {
+                if let Ok(val) = Decimal128::from_str(&d_str) {
+                    Some(RawBsonRef::Decimal128(val).to_raw_bson())
+                } else {
+                    None
+                }
+            } else {
+                Some(RawBsonRef::Decimal128(d).to_raw_bson())
+            }
+        }
+        RawBsonRef::MinKey => Some(RawBsonRef::MinKey.to_raw_bson()),
+        RawBsonRef::MaxKey => Some(RawBsonRef::MaxKey.to_raw_bson()),
+        RawBsonRef::Undefined => Some(RawBsonRef::Undefined.to_raw_bson()),
+    }
+}
+
+#[derive(Debug, Arbitrary)]
+struct Input {
+    bytes: Vec<u8>,
+}
+
+fuzz_target!(|input: Input| {
+    if let Ok(doc) = RawDocument::from_bytes(&input.bytes) {
+        for result in doc.iter() {
+            if let Ok((key, value)) = result {
+                if let Some(converted) = convert_bson_ref(value) {
+                    let original_bytes = value.to_raw_bson();
+                    match value {
+                        RawBsonRef::Double(d) if d.is_nan() => {
+                            if let Some(converted_ref) = converted.as_raw_bson_ref().as_f64() {
+                                assert!(converted_ref.is_nan(),
+                                       "NaN comparison failed for key: {}", key);
+                            }
+                        }
+                        RawBsonRef::Double(d) if d.is_infinite() => {
+                            if let Some(converted_ref) = converted.as_raw_bson_ref().as_f64() {
+                                assert_eq!(d.is_sign_positive(), converted_ref.is_sign_positive(),
+                                         "Infinity sign mismatch for key: {}", key);
+                                assert!(converted_ref.is_infinite(),
+                                       "Infinity comparison failed for key: {}", key);
+                            }
+                        }
+                        RawBsonRef::Decimal128(d) if d.to_string().contains("NaN") => {
+                            match converted.as_raw_bson_ref() {
+                                RawBsonRef::Decimal128(cd) => {
+                                    assert!(cd.to_string().contains("NaN"),
+                                           "Decimal128 NaN comparison failed for key: {}", key);
+                                }
+                                _ => panic!("Type mismatch: expected Decimal128, got different type for key: {}", key),
+                            }
+                        }
+                        RawBsonRef::Decimal128(d) if d.to_string().contains("Infinity") => {
+                            match converted.as_raw_bson_ref() {
+                                RawBsonRef::Decimal128(cd) => {
+                                    let d_str = d.to_string();
+                                    let cd_str = cd.to_string();
+                                    assert_eq!(d_str, cd_str,
+                                             "Decimal128 Infinity comparison failed for key: {}", key);
+                                }
+                                _ => panic!("Type mismatch: expected Decimal128, got different type for key: {}", key),
+                            }
+                        }
+                        _ => {
+                            assert_eq!(converted, original_bytes,
+                                     "Serialization mismatch for key: {}", key);
+                        }
+                    }
+                }
+            }
+        }
+    }
+});
diff --git a/fuzz/fuzz_targets/string_handling.rs b/fuzz/fuzz_targets/string_handling.rs
new file mode 100644
index 00000000..502a4135
--- /dev/null
+++ b/fuzz/fuzz_targets/string_handling.rs
@@ -0,0 +1,27 @@
+#![no_main]
+#[macro_use]
+extern crate libfuzzer_sys;
+extern crate bson;
+use bson::{spec::BinarySubtype, RawBsonRef, RawDocument};
+use std::convert::TryInto;
+
+fuzz_target!(|buf: &[u8]| {
+    if let Ok(doc) = RawDocument::from_bytes(buf) {
+        for elem in doc.iter_elements().flatten() {
+            // Convert to RawBsonRef and check string-related types
+            if let Ok(bson) = elem.try_into() {
+                match bson {
+                    RawBsonRef::String(s) => {
+                        let _ = s.len();
+                        let _ = s.chars().count();
+                    }
+                    RawBsonRef::Binary(b) if b.subtype == BinarySubtype::Generic => {
+                        // Test UTF-8 validation on binary data
+                        let _ = std::str::from_utf8(b.bytes);
+                    }
+                    _ => {}
+                }
+            }
+        }
+    }
+});
diff --git a/fuzz/fuzz_targets/type_markers.rs b/fuzz/fuzz_targets/type_markers.rs
new file mode 100644
index 00000000..3506b96e
--- /dev/null
+++ b/fuzz/fuzz_targets/type_markers.rs
@@ -0,0 +1,14 @@
+#![no_main]
+#[macro_use]
+extern crate libfuzzer_sys;
+extern crate bson;
+use bson::{RawBsonRef, RawDocument};
+use std::convert::TryInto;
+
+fuzz_target!(|buf: &[u8]| {
+    if let Ok(doc) = RawDocument::from_bytes(buf) {
+        for elem in doc.iter_elements().flatten() {
+            let _: Result<RawBsonRef, _> = elem.try_into();
+        }
+    }
+});
diff --git a/fuzz/generate_corpus.rs b/fuzz/generate_corpus.rs
new file mode 100644
index 00000000..88033a33
--- /dev/null
+++ b/fuzz/generate_corpus.rs
@@ -0,0 +1,134 @@
+use bson::{doc, Bson, Decimal128};
+use std::fs;
+use std::path::Path;
+use std::io::{Error, ErrorKind};
+use std::str::FromStr;
+
+fn main() -> std::io::Result<()> {
+    let corpus_dir = Path::new("fuzz/corpus");
+    fs::create_dir_all(corpus_dir)?;
+
+    // Generate edge cases for each fuzz target
+    generate_length_edge_cases(corpus_dir)?;
+    generate_type_marker_cases(corpus_dir)?;
+    generate_string_edge_cases(corpus_dir)?;
+    generate_serialization_cases(corpus_dir)?;
+    Ok(())
+}
+
+fn generate_length_edge_cases(dir: &Path) -> std::io::Result<()> {
+    let target_dir = dir.join("malformed_length");
+    fs::create_dir_all(&target_dir)?;
+
+    // Minimal valid document
+    let min_doc = doc! {};
+    fs::write(
+        target_dir.join("min_doc"),
+        bson::to_vec(&min_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?
+    )?;
+
+    // Document with length near i32::MAX
+    let large_doc = doc! { "a": "b".repeat(i32::MAX as usize / 2) };
+    fs::write(
+        target_dir.join("large_doc"),
+        bson::to_vec(&large_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?
+    )?;
+
+    Ok(())
+}
+
+fn generate_type_marker_cases(dir: &Path) -> std::io::Result<()> {
+    let target_dir = dir.join("type_markers");
+    fs::create_dir_all(&target_dir)?;
+
+    // Document with all BSON types
+    let all_types = doc! {
+        "double": 1.0f64,
+        "double_nan": f64::NAN,
+        "double_infinity": f64::INFINITY,
+        "double_neg_infinity": f64::NEG_INFINITY,
+        "string": "test",
+        "document": doc! {},
+        "array": vec![1, 2, 3],
+        "binary": Bson::Binary(bson::Binary { subtype: bson::spec::BinarySubtype::Generic, bytes: vec![1, 2, 3] }),
+        "object_id": bson::oid::ObjectId::new(),
+        "bool": true,
+        "date": bson::DateTime::now(),
+        "null": Bson::Null,
+        "regex": Bson::RegularExpression(bson::Regex { pattern: "pattern".into(), options: "i".into() }),
+        "int32": 123i32,
+        "timestamp": bson::Timestamp { time: 12345, increment: 1 },
+        "int64": 123i64,
+        "decimal128_nan": Decimal128::from_str("NaN").unwrap(),
+        "decimal128_infinity": Decimal128::from_str("Infinity").unwrap(),
+        "decimal128_neg_infinity": Decimal128::from_str("-Infinity").unwrap(),
+        "min_key": Bson::MinKey,
+        "max_key": Bson::MaxKey,
+        "undefined": Bson::Undefined
+    };
+    fs::write(
+        target_dir.join("all_types"),
+        bson::to_vec(&all_types).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?
+    )?;
+
+    Ok(())
+}
+
+fn generate_string_edge_cases(dir: &Path) -> std::io::Result<()> {
+    let target_dir = dir.join("string_handling");
+    fs::create_dir_all(&target_dir)?;
+
+    // UTF-8 edge cases
+    let utf8_cases = doc! {
+        "empty": "",
+        "null_bytes": "hello\0world",
+        "unicode": "🦀💻🔒",
+        "high_surrogate": "\u{10000}",
+        "invalid_continuation": Bson::Binary(bson::Binary {
+            subtype: bson::spec::BinarySubtype::Generic,
+            bytes: vec![0x80u8, 0x80u8, 0x80u8]
+        }),
+        "overlong": Bson::Binary(bson::Binary {
+            subtype: bson::spec::BinarySubtype::Generic,
+            bytes: vec![0xC0u8, 0x80u8]
+        })
+    };
+    fs::write(
+        target_dir.join("utf8_cases"),
+        bson::to_vec(&utf8_cases).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?
+    )?;
+
+    Ok(())
+}
+
+fn generate_serialization_cases(dir: &Path) -> std::io::Result<()> {
+    let target_dir = dir.join("serialization");
+    fs::create_dir_all(&target_dir)?;
+
+    // Deeply nested document
+    let mut nested_doc = doc! {};
+    let mut current = &mut nested_doc;
+    for i in 0..100 {
+        let next_doc = doc! {};
+        current.insert(i.to_string(), next_doc);
+        current = current.get_mut(&i.to_string()).unwrap().as_document_mut().unwrap();
+    }
+    fs::write(
+        target_dir.join("nested_doc"),
+        bson::to_vec(&nested_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?
+    )?;
+
+    // Document with large binary data
+    let large_binary = doc! {
+        "binary": Bson::Binary(bson::Binary {
+            subtype: bson::spec::BinarySubtype::Generic,
+            bytes: vec![0xFF; 1024 * 1024] // 1MB of data
+        })
+    };
+    fs::write(
+        target_dir.join("large_binary"),
+        bson::to_vec(&large_binary).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?
+    )?;
+
+    Ok(())
+}
diff --git a/fuzz/run-fuzzer.sh b/fuzz/run-fuzzer.sh
new file mode 100755
index 00000000..0b005ed3
--- /dev/null
+++ b/fuzz/run-fuzzer.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+set -e
+
+# Directory setup
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+CORPUS_DIR="$SCRIPT_DIR/corpus"
+ARTIFACTS_DIR="$SCRIPT_DIR/artifacts"
+
+# Ensure directories exist
+mkdir -p "$CORPUS_DIR"
+mkdir -p "$ARTIFACTS_DIR"
+
+# Generate corpus if it doesn't exist or is empty
+if [ ! -d "$CORPUS_DIR" ] || [ -z "$(ls -A $CORPUS_DIR)" ]; then
+    echo "Generating initial corpus..."
+    cargo run --bin generate_corpus
+    # Move generated corpus files to the corpus directory
+    mv generated_corpus/* "$CORPUS_DIR/" 2>/dev/null || true
+fi
+
+# List of fuzz targets
+TARGETS=(
+    "malformed_length"
+    "type_markers"
+    "string_handling"
+    "serialization"
+)
+
+# Run each fuzz target with the corpus
+for target in "${TARGETS[@]}"; do
+    echo "Running fuzzer for target: $target"
+    RUST_BACKTRACE=1 cargo fuzz run "$target" "$CORPUS_DIR" -j 1 --release --max-total-time=3600
+done

From 0bb5da292ed5377d94807770070da71a70a82802 Mon Sep 17 00:00:00 2001
From: Patrick Meredith <pmeredit@protonmail.com>
Date: Thu, 12 Dec 2024 14:19:36 -0500
Subject: [PATCH 02/10] artifacts dir does not mean crashes occur, don't feel
 like telling Devin

---
 .evergreen/run-fuzzer.sh | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/.evergreen/run-fuzzer.sh b/.evergreen/run-fuzzer.sh
index b7f5f9f7..b66abf1b 100755
--- a/.evergreen/run-fuzzer.sh
+++ b/.evergreen/run-fuzzer.sh
@@ -39,11 +39,3 @@ run_fuzzer "malformed_length"
 run_fuzzer "type_markers"
 run_fuzzer "string_handling"
 run_fuzzer "serialization"
-
-# If any crashes were found, save them as test artifacts
-if [ "$(ls -A artifacts)" ]; then
-    echo "Crashes found! Check artifacts directory."
-    exit 1
-else
-    echo "No crashes found."
-fi

From 35fb75afa4dbd2b0c66aab37bdbf5bec39aa25cf Mon Sep 17 00:00:00 2001
From: Patrick Meredith <pmeredit@protonmail.com>
Date: Thu, 12 Dec 2024 14:20:19 -0500
Subject: [PATCH 03/10] Remove that extraneous shell script

---
 fuzz/run-fuzzer.sh | 33 ---------------------------------
 1 file changed, 33 deletions(-)
 delete mode 100755 fuzz/run-fuzzer.sh

diff --git a/fuzz/run-fuzzer.sh b/fuzz/run-fuzzer.sh
deleted file mode 100755
index 0b005ed3..00000000
--- a/fuzz/run-fuzzer.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-set -e
-
-# Directory setup
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-CORPUS_DIR="$SCRIPT_DIR/corpus"
-ARTIFACTS_DIR="$SCRIPT_DIR/artifacts"
-
-# Ensure directories exist
-mkdir -p "$CORPUS_DIR"
-mkdir -p "$ARTIFACTS_DIR"
-
-# Generate corpus if it doesn't exist or is empty
-if [ ! -d "$CORPUS_DIR" ] || [ -z "$(ls -A $CORPUS_DIR)" ]; then
-    echo "Generating initial corpus..."
-    cargo run --bin generate_corpus
-    # Move generated corpus files to the corpus directory
-    mv generated_corpus/* "$CORPUS_DIR/" 2>/dev/null || true
-fi
-
-# List of fuzz targets
-TARGETS=(
-    "malformed_length"
-    "type_markers"
-    "string_handling"
-    "serialization"
-)
-
-# Run each fuzz target with the corpus
-for target in "${TARGETS[@]}"; do
-    echo "Running fuzzer for target: $target"
-    RUST_BACKTRACE=1 cargo fuzz run "$target" "$CORPUS_DIR" -j 1 --release --max-total-time=3600
-done

From b44f491fea6c1678d51e1bf1288abf212113b2b5 Mon Sep 17 00:00:00 2001
From: Patrick Meredith <pmeredit@protonmail.com>
Date: Fri, 13 Dec 2024 11:34:17 -0500
Subject: [PATCH 04/10] Devin thinks the presence of an artifacts directory
 implies crashes, but it is created regardless. I could update Devin on this,
 but I want to move on to other things with Devin.

---
 .evergreen/config.yml |   2 +-
 src/spec.rs           | 198 ------------------------------------------
 2 files changed, 1 insertion(+), 199 deletions(-)
 delete mode 100644 src/spec.rs

diff --git a/.evergreen/config.yml b/.evergreen/config.yml
index 1270f8a5..17a1b393 100644
--- a/.evergreen/config.yml
+++ b/.evergreen/config.yml
@@ -159,7 +159,7 @@ functions:
         script: |
           # Archive crash artifacts if they exist and contain crashes
           if [ -d "src/fuzz/artifacts" ] && [ "$(ls -A src/fuzz/artifacts)" ]; then
-            echo "Crashes found in artifacts directory. Creating archive..."
+            echo "Creating artifacts archive..."
             tar czf "${PROJECT_DIRECTORY}/crash-artifacts.tar.gz" src/fuzz/artifacts/
           else
             echo "No crashes found in artifacts directory. Skipping archive creation."
diff --git a/src/spec.rs b/src/spec.rs
deleted file mode 100644
index 04ec7708..00000000
--- a/src/spec.rs
+++ /dev/null
@@ -1,198 +0,0 @@
-// The MIT License (MIT)
-
-// Copyright (c) 2015 Y. T. Chung <zonyitoo@gmail.com>
-
-// Permission is hereby granted, free of charge, to any person obtaining a copy of
-// this software and associated documentation files (the "Software"), to deal in
-// the Software without restriction, including without limitation the rights to
-// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-// the Software, and to permit persons to whom the Software is furnished to do so,
-// subject to the following conditions:
-
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-//! Constants derived from the [BSON Specification Version 1.1](http://bsonspec.org/spec.html).
-
-use std::convert::From;
-
-const ELEMENT_TYPE_FLOATING_POINT: u8 = 0x01;
-const ELEMENT_TYPE_UTF8_STRING: u8 = 0x02;
-const ELEMENT_TYPE_EMBEDDED_DOCUMENT: u8 = 0x03;
-const ELEMENT_TYPE_ARRAY: u8 = 0x04;
-const ELEMENT_TYPE_BINARY: u8 = 0x05;
-const ELEMENT_TYPE_UNDEFINED: u8 = 0x06; // Deprecated
-const ELEMENT_TYPE_OBJECT_ID: u8 = 0x07;
-const ELEMENT_TYPE_BOOLEAN: u8 = 0x08;
-const ELEMENT_TYPE_UTC_DATETIME: u8 = 0x09;
-const ELEMENT_TYPE_NULL_VALUE: u8 = 0x0A;
-const ELEMENT_TYPE_REGULAR_EXPRESSION: u8 = 0x0B;
-const ELEMENT_TYPE_DBPOINTER: u8 = 0x0C; // Deprecated
-const ELEMENT_TYPE_JAVASCRIPT_CODE: u8 = 0x0D;
-const ELEMENT_TYPE_SYMBOL: u8 = 0x0E; // Deprecated
-const ELEMENT_TYPE_JAVASCRIPT_CODE_WITH_SCOPE: u8 = 0x0F;
-const ELEMENT_TYPE_32BIT_INTEGER: u8 = 0x10;
-const ELEMENT_TYPE_TIMESTAMP: u8 = 0x11;
-const ELEMENT_TYPE_64BIT_INTEGER: u8 = 0x12;
-#[allow(unused)]
-const ELEMENT_TYPE_128BIT_DECIMAL: u8 = 0x13;
-const ELEMENT_TYPE_MINKEY: u8 = 0xFF;
-const ELEMENT_TYPE_MAXKEY: u8 = 0x7F;
-
-const BINARY_SUBTYPE_GENERIC: u8 = 0x00;
-const BINARY_SUBTYPE_FUNCTION: u8 = 0x01;
-const BINARY_SUBTYPE_BINARY_OLD: u8 = 0x02;
-const BINARY_SUBTYPE_UUID_OLD: u8 = 0x03;
-const BINARY_SUBTYPE_UUID: u8 = 0x04;
-const BINARY_SUBTYPE_MD5: u8 = 0x05;
-const BINARY_SUBTYPE_ENCRYPTED: u8 = 0x06;
-const BINARY_SUBTYPE_COLUMN: u8 = 0x07;
-const BINARY_SUBTYPE_SENSITIVE: u8 = 0x08;
-const BINARY_SUBTYPE_USER_DEFINED: u8 = 0x80;
-
-/// All available BSON element types.
-///
-/// Not all element types are representable by the [`Bson`](crate::Bson) type.
-#[repr(u8)]
-#[derive(Debug, Eq, PartialEq, Clone, Copy)]
-pub enum ElementType {
-    /// 64-bit binary floating point
-    Double = ELEMENT_TYPE_FLOATING_POINT,
-    /// UTF-8 string
-    String = ELEMENT_TYPE_UTF8_STRING,
-    /// Embedded document
-    EmbeddedDocument = ELEMENT_TYPE_EMBEDDED_DOCUMENT,
-    /// Array
-    Array = ELEMENT_TYPE_ARRAY,
-    /// Binary data
-    Binary = ELEMENT_TYPE_BINARY,
-    /// Deprecated. Undefined (value)
-    Undefined = ELEMENT_TYPE_UNDEFINED,
-    /// [ObjectId](http://dochub.mongodb.org/core/objectids)
-    ObjectId = ELEMENT_TYPE_OBJECT_ID,
-    /// Bool value
-    Boolean = ELEMENT_TYPE_BOOLEAN,
-    /// UTC datetime
-    DateTime = ELEMENT_TYPE_UTC_DATETIME,
-    /// Null value
-    Null = ELEMENT_TYPE_NULL_VALUE,
-    /// Regular expression - The first cstring is the regex pattern, the second is the regex
-    /// options string. Options are identified by characters, which must be stored in
-    /// alphabetical order. Valid options are 'i' for case insensitive matching, 'm' for
-    /// multiline matching, 'x' for verbose mode, 'l' to make \w, \W, etc. locale dependent,
-    /// 's' for dotall mode ('.' matches everything), and 'u' to make \w, \W, etc. match
-    /// unicode.
-    RegularExpression = ELEMENT_TYPE_REGULAR_EXPRESSION,
-    /// Deprecated.
-    DbPointer = ELEMENT_TYPE_DBPOINTER,
-    /// JavaScript code
-    JavaScriptCode = ELEMENT_TYPE_JAVASCRIPT_CODE,
-    /// Deprecated.
-    Symbol = ELEMENT_TYPE_SYMBOL,
-    /// JavaScript code w/ scope
-    JavaScriptCodeWithScope = ELEMENT_TYPE_JAVASCRIPT_CODE_WITH_SCOPE,
-    /// 32-bit integer
-    Int32 = ELEMENT_TYPE_32BIT_INTEGER,
-    /// Timestamp
-    Timestamp = ELEMENT_TYPE_TIMESTAMP,
-    /// 64-bit integer
-    Int64 = ELEMENT_TYPE_64BIT_INTEGER,
-    /// [128-bit decimal floating point](https://github.com/mongodb/specifications/blob/master/source/bson-decimal128/decimal128.rst)
-    Decimal128 = ELEMENT_TYPE_128BIT_DECIMAL,
-    MaxKey = ELEMENT_TYPE_MAXKEY,
-    MinKey = ELEMENT_TYPE_MINKEY,
-}
-
-impl ElementType {
-    /// Attempt to convert from a `u8`.
-    #[inline]
-    pub fn from(tag: u8) -> Option<ElementType> {
-        use self::ElementType::*;
-        Some(match tag {
-            ELEMENT_TYPE_FLOATING_POINT => Self::Double,
-            ELEMENT_TYPE_UTF8_STRING => Self::String,
-            ELEMENT_TYPE_EMBEDDED_DOCUMENT => EmbeddedDocument,
-            ELEMENT_TYPE_ARRAY => Array,
-            ELEMENT_TYPE_BINARY => Binary,
-            ELEMENT_TYPE_UNDEFINED => Undefined,
-            ELEMENT_TYPE_OBJECT_ID => ObjectId,
-            ELEMENT_TYPE_BOOLEAN => Boolean,
-            ELEMENT_TYPE_UTC_DATETIME => Self::DateTime,
-            ELEMENT_TYPE_NULL_VALUE => Self::Null,
-            ELEMENT_TYPE_REGULAR_EXPRESSION => RegularExpression,
-            ELEMENT_TYPE_DBPOINTER => DbPointer,
-            ELEMENT_TYPE_JAVASCRIPT_CODE => JavaScriptCode,
-            ELEMENT_TYPE_SYMBOL => Symbol,
-            ELEMENT_TYPE_JAVASCRIPT_CODE_WITH_SCOPE => JavaScriptCodeWithScope,
-            ELEMENT_TYPE_32BIT_INTEGER => Int32,
-            ELEMENT_TYPE_TIMESTAMP => Timestamp,
-            ELEMENT_TYPE_64BIT_INTEGER => Int64,
-            ELEMENT_TYPE_128BIT_DECIMAL => Decimal128,
-            ELEMENT_TYPE_MAXKEY => MaxKey,
-            ELEMENT_TYPE_MINKEY => MinKey,
-            _ => return None,
-        })
-    }
-}
-
-/// The available binary subtypes, plus a user-defined slot.
-#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
-#[non_exhaustive]
-pub enum BinarySubtype {
-    Generic,
-    Function,
-    BinaryOld,
-    UuidOld,
-    Uuid,
-    Md5,
-    Encrypted,
-    Column,
-    Sensitive,
-    UserDefined(u8),
-    Reserved(u8),
-}
-
-impl From<BinarySubtype> for u8 {
-    #[inline]
-    fn from(t: BinarySubtype) -> u8 {
-        match t {
-            BinarySubtype::Generic => BINARY_SUBTYPE_GENERIC,
-            BinarySubtype::Function => BINARY_SUBTYPE_FUNCTION,
-            BinarySubtype::BinaryOld => BINARY_SUBTYPE_BINARY_OLD,
-            BinarySubtype::UuidOld => BINARY_SUBTYPE_UUID_OLD,
-            BinarySubtype::Uuid => BINARY_SUBTYPE_UUID,
-            BinarySubtype::Md5 => BINARY_SUBTYPE_MD5,
-            BinarySubtype::Encrypted => BINARY_SUBTYPE_ENCRYPTED,
-            BinarySubtype::Column => BINARY_SUBTYPE_COLUMN,
-            BinarySubtype::Sensitive => BINARY_SUBTYPE_SENSITIVE,
-            BinarySubtype::UserDefined(x) => x,
-            BinarySubtype::Reserved(x) => x,
-        }
-    }
-}
-
-impl From<u8> for BinarySubtype {
-    #[inline]
-    fn from(t: u8) -> BinarySubtype {
-        match t {
-            BINARY_SUBTYPE_GENERIC => BinarySubtype::Generic,
-            BINARY_SUBTYPE_FUNCTION => BinarySubtype::Function,
-            BINARY_SUBTYPE_BINARY_OLD => BinarySubtype::BinaryOld,
-            BINARY_SUBTYPE_UUID_OLD => BinarySubtype::UuidOld,
-            BINARY_SUBTYPE_UUID => BinarySubtype::Uuid,
-            BINARY_SUBTYPE_MD5 => BinarySubtype::Md5,
-            BINARY_SUBTYPE_ENCRYPTED => BinarySubtype::Encrypted,
-            BINARY_SUBTYPE_COLUMN => BinarySubtype::Column,
-            BINARY_SUBTYPE_SENSITIVE => BinarySubtype::Sensitive,
-            _ if t < BINARY_SUBTYPE_USER_DEFINED => BinarySubtype::Reserved(t),
-            _ => BinarySubtype::UserDefined(t),
-        }
-    }
-}

From 0d24c9fcaeac535fe42f74a11c53023c8726a5ee Mon Sep 17 00:00:00 2001
From: Patrick Meredith <pmeredit@protonmail.com>
Date: Fri, 13 Dec 2024 11:35:31 -0500
Subject: [PATCH 05/10] Devin moved the spec file

---
 src/spec/fmt.rs |   9 +++
 src/spec/mod.rs | 202 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 211 insertions(+)
 create mode 100644 src/spec/fmt.rs
 create mode 100644 src/spec/mod.rs

diff --git a/src/spec/fmt.rs b/src/spec/fmt.rs
new file mode 100644
index 00000000..1cbb6fb3
--- /dev/null
+++ b/src/spec/fmt.rs
@@ -0,0 +1,9 @@
+use std::fmt;
+use crate::spec::BinarySubtype;
+
+impl fmt::LowerHex for BinarySubtype {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let value: u8 = (*self).into();
+        fmt::LowerHex::fmt(&value, f)
+    }
+}
diff --git a/src/spec/mod.rs b/src/spec/mod.rs
new file mode 100644
index 00000000..bce083d3
--- /dev/null
+++ b/src/spec/mod.rs
@@ -0,0 +1,202 @@
+// The MIT License (MIT)
+
+// Copyright (c) 2015 Y. T. Chung <zonyitoo@gmail.com>
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy of
+// this software and associated documentation files (the "Software"), to deal in
+// the Software without restriction, including without limitation the rights to
+// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+// the Software, and to permit persons to whom the Software is furnished to do so,
+// subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+//! Constants derived from the [BSON Specification Version 1.1](http://bsonspec.org/spec.html).
+
+use std::convert::From;
+
+mod fmt;
+#[allow(unused_imports)]
+pub use self::fmt::*;
+
+const ELEMENT_TYPE_FLOATING_POINT: u8 = 0x01;
+const ELEMENT_TYPE_UTF8_STRING: u8 = 0x02;
+const ELEMENT_TYPE_EMBEDDED_DOCUMENT: u8 = 0x03;
+const ELEMENT_TYPE_ARRAY: u8 = 0x04;
+const ELEMENT_TYPE_BINARY: u8 = 0x05;
+const ELEMENT_TYPE_UNDEFINED: u8 = 0x06; // Deprecated
+const ELEMENT_TYPE_OBJECT_ID: u8 = 0x07;
+const ELEMENT_TYPE_BOOLEAN: u8 = 0x08;
+const ELEMENT_TYPE_UTC_DATETIME: u8 = 0x09;
+const ELEMENT_TYPE_NULL_VALUE: u8 = 0x0A;
+const ELEMENT_TYPE_REGULAR_EXPRESSION: u8 = 0x0B;
+const ELEMENT_TYPE_DBPOINTER: u8 = 0x0C; // Deprecated
+const ELEMENT_TYPE_JAVASCRIPT_CODE: u8 = 0x0D;
+const ELEMENT_TYPE_SYMBOL: u8 = 0x0E; // Deprecated
+const ELEMENT_TYPE_JAVASCRIPT_CODE_WITH_SCOPE: u8 = 0x0F;
+const ELEMENT_TYPE_32BIT_INTEGER: u8 = 0x10;
+const ELEMENT_TYPE_TIMESTAMP: u8 = 0x11;
+const ELEMENT_TYPE_64BIT_INTEGER: u8 = 0x12;
+#[allow(unused)]
+const ELEMENT_TYPE_128BIT_DECIMAL: u8 = 0x13;
+const ELEMENT_TYPE_MINKEY: u8 = 0xFF;
+const ELEMENT_TYPE_MAXKEY: u8 = 0x7F;
+
+const BINARY_SUBTYPE_GENERIC: u8 = 0x00;
+const BINARY_SUBTYPE_FUNCTION: u8 = 0x01;
+const BINARY_SUBTYPE_BINARY_OLD: u8 = 0x02;
+const BINARY_SUBTYPE_UUID_OLD: u8 = 0x03;
+const BINARY_SUBTYPE_UUID: u8 = 0x04;
+const BINARY_SUBTYPE_MD5: u8 = 0x05;
+const BINARY_SUBTYPE_ENCRYPTED: u8 = 0x06;
+const BINARY_SUBTYPE_COLUMN: u8 = 0x07;
+const BINARY_SUBTYPE_SENSITIVE: u8 = 0x08;
+const BINARY_SUBTYPE_USER_DEFINED: u8 = 0x80;
+
+/// All available BSON element types.
+///
+/// Not all element types are representable by the [`Bson`](crate::Bson) type.
+#[repr(u8)]
+#[derive(Debug, Eq, PartialEq, Clone, Copy)]
+pub enum ElementType {
+    /// 64-bit binary floating point
+    Double = ELEMENT_TYPE_FLOATING_POINT,
+    /// UTF-8 string
+    String = ELEMENT_TYPE_UTF8_STRING,
+    /// Embedded document
+    EmbeddedDocument = ELEMENT_TYPE_EMBEDDED_DOCUMENT,
+    /// Array
+    Array = ELEMENT_TYPE_ARRAY,
+    /// Binary data
+    Binary = ELEMENT_TYPE_BINARY,
+    /// Deprecated. Undefined (value)
+    Undefined = ELEMENT_TYPE_UNDEFINED,
+    /// [ObjectId](http://dochub.mongodb.org/core/objectids)
+    ObjectId = ELEMENT_TYPE_OBJECT_ID,
+    /// Bool value
+    Boolean = ELEMENT_TYPE_BOOLEAN,
+    /// UTC datetime
+    DateTime = ELEMENT_TYPE_UTC_DATETIME,
+    /// Null value
+    Null = ELEMENT_TYPE_NULL_VALUE,
+    /// Regular expression - The first cstring is the regex pattern, the second is the regex
+    /// options string. Options are identified by characters, which must be stored in
+    /// alphabetical order. Valid options are 'i' for case insensitive matching, 'm' for
+    /// multiline matching, 'x' for verbose mode, 'l' to make \w, \W, etc. locale dependent,
+    /// 's' for dotall mode ('.' matches everything), and 'u' to make \w, \W, etc. match
+    /// unicode.
+    RegularExpression = ELEMENT_TYPE_REGULAR_EXPRESSION,
+    /// Deprecated.
+    DbPointer = ELEMENT_TYPE_DBPOINTER,
+    /// JavaScript code
+    JavaScriptCode = ELEMENT_TYPE_JAVASCRIPT_CODE,
+    /// Deprecated.
+    Symbol = ELEMENT_TYPE_SYMBOL,
+    /// JavaScript code w/ scope
+    JavaScriptCodeWithScope = ELEMENT_TYPE_JAVASCRIPT_CODE_WITH_SCOPE,
+    /// 32-bit integer
+    Int32 = ELEMENT_TYPE_32BIT_INTEGER,
+    /// Timestamp
+    Timestamp = ELEMENT_TYPE_TIMESTAMP,
+    /// 64-bit integer
+    Int64 = ELEMENT_TYPE_64BIT_INTEGER,
+    /// [128-bit decimal floating point](https://github.com/mongodb/specifications/blob/master/source/bson-decimal128/decimal128.rst)
+    Decimal128 = ELEMENT_TYPE_128BIT_DECIMAL,
+    MaxKey = ELEMENT_TYPE_MAXKEY,
+    MinKey = ELEMENT_TYPE_MINKEY,
+}
+
+impl ElementType {
+    /// Attempt to convert from a `u8`.
+    #[inline]
+    pub fn from(tag: u8) -> Option<ElementType> {
+        use self::ElementType::*;
+        Some(match tag {
+            ELEMENT_TYPE_FLOATING_POINT => Self::Double,
+            ELEMENT_TYPE_UTF8_STRING => Self::String,
+            ELEMENT_TYPE_EMBEDDED_DOCUMENT => EmbeddedDocument,
+            ELEMENT_TYPE_ARRAY => Array,
+            ELEMENT_TYPE_BINARY => Binary,
+            ELEMENT_TYPE_UNDEFINED => Undefined,
+            ELEMENT_TYPE_OBJECT_ID => ObjectId,
+            ELEMENT_TYPE_BOOLEAN => Boolean,
+            ELEMENT_TYPE_UTC_DATETIME => Self::DateTime,
+            ELEMENT_TYPE_NULL_VALUE => Self::Null,
+            ELEMENT_TYPE_REGULAR_EXPRESSION => RegularExpression,
+            ELEMENT_TYPE_DBPOINTER => DbPointer,
+            ELEMENT_TYPE_JAVASCRIPT_CODE => JavaScriptCode,
+            ELEMENT_TYPE_SYMBOL => Symbol,
+            ELEMENT_TYPE_JAVASCRIPT_CODE_WITH_SCOPE => JavaScriptCodeWithScope,
+            ELEMENT_TYPE_32BIT_INTEGER => Int32,
+            ELEMENT_TYPE_TIMESTAMP => Timestamp,
+            ELEMENT_TYPE_64BIT_INTEGER => Int64,
+            ELEMENT_TYPE_128BIT_DECIMAL => Decimal128,
+            ELEMENT_TYPE_MAXKEY => MaxKey,
+            ELEMENT_TYPE_MINKEY => MinKey,
+            _ => return None,
+        })
+    }
+}
+
+/// The available binary subtypes, plus a user-defined slot.
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
+#[non_exhaustive]
+pub enum BinarySubtype {
+    Generic,
+    Function,
+    BinaryOld,
+    UuidOld,
+    Uuid,
+    Md5,
+    Encrypted,
+    Column,
+    Sensitive,
+    UserDefined(u8),
+    Reserved(u8),
+}
+
+impl From<BinarySubtype> for u8 {
+    #[inline]
+    fn from(t: BinarySubtype) -> u8 {
+        match t {
+            BinarySubtype::Generic => BINARY_SUBTYPE_GENERIC,
+            BinarySubtype::Function => BINARY_SUBTYPE_FUNCTION,
+            BinarySubtype::BinaryOld => BINARY_SUBTYPE_BINARY_OLD,
+            BinarySubtype::UuidOld => BINARY_SUBTYPE_UUID_OLD,
+            BinarySubtype::Uuid => BINARY_SUBTYPE_UUID,
+            BinarySubtype::Md5 => BINARY_SUBTYPE_MD5,
+            BinarySubtype::Encrypted => BINARY_SUBTYPE_ENCRYPTED,
+            BinarySubtype::Column => BINARY_SUBTYPE_COLUMN,
+            BinarySubtype::Sensitive => BINARY_SUBTYPE_SENSITIVE,
+            BinarySubtype::UserDefined(x) => x,
+            BinarySubtype::Reserved(x) => x,
+        }
+    }
+}
+
+impl From<u8> for BinarySubtype {
+    #[inline]
+    fn from(t: u8) -> BinarySubtype {
+        match t {
+            BINARY_SUBTYPE_GENERIC => BinarySubtype::Generic,
+            BINARY_SUBTYPE_FUNCTION => BinarySubtype::Function,
+            BINARY_SUBTYPE_BINARY_OLD => BinarySubtype::BinaryOld,
+            BINARY_SUBTYPE_UUID_OLD => BinarySubtype::UuidOld,
+            BINARY_SUBTYPE_UUID => BinarySubtype::Uuid,
+            BINARY_SUBTYPE_MD5 => BinarySubtype::Md5,
+            BINARY_SUBTYPE_ENCRYPTED => BinarySubtype::Encrypted,
+            BINARY_SUBTYPE_COLUMN => BinarySubtype::Column,
+            BINARY_SUBTYPE_SENSITIVE => BinarySubtype::Sensitive,
+            _ if t < BINARY_SUBTYPE_USER_DEFINED => BinarySubtype::Reserved(t),
+            _ => BinarySubtype::UserDefined(t),
+        }
+    }
+}

From aabac7c2d5976ddfaf96f9f63d63601011b00ef6 Mon Sep 17 00:00:00 2001
From: Patrick Meredith <pmeredit@protonmail.com>
Date: Mon, 16 Dec 2024 16:23:26 -0500
Subject: [PATCH 06/10] Cleanup some of Devin's silliness

---
 fuzz/fuzz_targets/malformed_length.rs |  14 ++--
 fuzz/fuzz_targets/serialization.rs    | 105 +++++++++++++++-----------
 fuzz/fuzz_targets/string_handling.rs  |   6 +-
 fuzz/generate_corpus.rs               |  31 +++++---
 src/{spec/mod.rs => spec.rs}          |  11 ++-
 src/spec/fmt.rs                       |   9 ---
 6 files changed, 97 insertions(+), 79 deletions(-)
 rename src/{spec/mod.rs => spec.rs} (97%)
 delete mode 100644 src/spec/fmt.rs

diff --git a/fuzz/fuzz_targets/malformed_length.rs b/fuzz/fuzz_targets/malformed_length.rs
index 53ebf147..a007e146 100644
--- a/fuzz/fuzz_targets/malformed_length.rs
+++ b/fuzz/fuzz_targets/malformed_length.rs
@@ -1,20 +1,20 @@
 //! BSON Document Length Field Fuzzer
 //!
-//! This fuzz test focuses on finding security vulnerabilities related to BSON document length fields.
-//! It specifically targets:
+//! This fuzz test focuses on finding security vulnerabilities related to BSON document length
+//! fields. It specifically targets:
 //! - Integer overflow/underflow in length calculations
 //! - Malformed length fields that could cause buffer overruns
 //! - Mismatches between declared and actual document sizes
 //! - Memory allocation issues with large or invalid lengths
 
 #![no_main]
-#[macro_use] extern crate libfuzzer_sys;
+#[macro_use]
+extern crate libfuzzer_sys;
 extern crate bson;
 use bson::RawDocument;
 
 fuzz_target!(|buf: &[u8]| {
-    if buf.len() >= 4 {
-        // Focus on document length field manipulation
-        let _ = RawDocument::from_bytes(buf);
-    }
+    // Focus on document length field manipulation
+    // This should return an error if the buf.len() < 4 rather than panic.
+    let _ = RawDocument::from_bytes(buf);
 });
diff --git a/fuzz/fuzz_targets/serialization.rs b/fuzz/fuzz_targets/serialization.rs
index ccdf22a7..ac779142 100644
--- a/fuzz/fuzz_targets/serialization.rs
+++ b/fuzz/fuzz_targets/serialization.rs
@@ -13,8 +13,6 @@ fn convert_bson_ref(bson_ref: RawBsonRef) -> Option<RawBson> {
         RawBsonRef::Double(d) => {
             if d.is_nan() {
                 Some(RawBsonRef::Double(f64::NAN).to_raw_bson())
-            } else if d.is_infinite() {
-                Some(RawBsonRef::Double(d).to_raw_bson())
             } else {
                 Some(RawBsonRef::Double(d).to_raw_bson())
             }
@@ -62,25 +60,23 @@ fn convert_bson_ref(bson_ref: RawBsonRef) -> Option<RawBson> {
             }
         }
         RawBsonRef::Binary(b) => {
-            if b.bytes.len() <= i32::MAX as usize &&
-               match b.subtype {
-                   BinarySubtype::Generic |
-                   BinarySubtype::Function |
-                   BinarySubtype::BinaryOld |
-                   BinarySubtype::UuidOld |
-                   BinarySubtype::Uuid |
-                   BinarySubtype::Md5 |
-                   BinarySubtype::UserDefined(_) => true,
-                   _ => false
-               } {
+            if b.bytes.len() <= i32::MAX as usize
+                && match b.subtype {
+                    BinarySubtype::Generic
+                    | BinarySubtype::Function
+                    | BinarySubtype::BinaryOld
+                    | BinarySubtype::UuidOld
+                    | BinarySubtype::Uuid
+                    | BinarySubtype::Md5
+                    | BinarySubtype::UserDefined(_) => true,
+                    _ => false,
+                }
+            {
                 Some(RawBsonRef::Binary(b).to_raw_bson())
             } else {
                 None
             }
         }
-        RawBsonRef::ObjectId(id) => Some(RawBsonRef::ObjectId(id).to_raw_bson()),
-        RawBsonRef::Boolean(b) => Some(RawBsonRef::Boolean(b).to_raw_bson()),
-        RawBsonRef::Null => Some(RawBsonRef::Null.to_raw_bson()),
         RawBsonRef::RegularExpression(regex) => {
             let valid_options = "ilmsux";
             let mut options_sorted = regex.options.chars().collect::<Vec<_>>();
@@ -88,9 +84,10 @@ fn convert_bson_ref(bson_ref: RawBsonRef) -> Option<RawBson> {
             options_sorted.dedup();
             let sorted_str: String = options_sorted.into_iter().collect();
 
-            if sorted_str.chars().all(|c| valid_options.contains(c)) &&
-               !regex.pattern.contains('\0') &&
-               regex.pattern.len() <= (i32::MAX as usize) {
+            if sorted_str.chars().all(|c| valid_options.contains(c))
+                && !regex.pattern.contains('\0')
+                && regex.pattern.len() <= (i32::MAX as usize)
+            {
                 Some(RawBsonRef::RegularExpression(regex).to_raw_bson())
             } else {
                 None
@@ -104,9 +101,10 @@ fn convert_bson_ref(bson_ref: RawBsonRef) -> Option<RawBson> {
             }
         }
         RawBsonRef::JavaScriptCodeWithScope(code_w_scope) => {
-            if !code_w_scope.code.is_empty() &&
-               !code_w_scope.code.contains('\0') &&
-               code_w_scope.code.len() <= (i32::MAX as usize) {
+            if !code_w_scope.code.is_empty()
+                && !code_w_scope.code.contains('\0')
+                && code_w_scope.code.len() <= (i32::MAX as usize)
+            {
                 Some(RawBsonRef::JavaScriptCodeWithScope(code_w_scope).to_raw_bson())
             } else {
                 None
@@ -123,10 +121,6 @@ fn convert_bson_ref(bson_ref: RawBsonRef) -> Option<RawBson> {
                 None
             }
         }
-        RawBsonRef::Int32(i) => Some(RawBsonRef::Int32(i).to_raw_bson()),
-        RawBsonRef::Int64(i) => Some(RawBsonRef::Int64(i).to_raw_bson()),
-        RawBsonRef::Timestamp(ts) => Some(RawBsonRef::Timestamp(ts).to_raw_bson()),
-        RawBsonRef::DateTime(dt) => Some(RawBsonRef::DateTime(dt).to_raw_bson()),
         RawBsonRef::Decimal128(d) => {
             let d_str = d.to_string();
             if d_str.contains("NaN") {
@@ -145,9 +139,7 @@ fn convert_bson_ref(bson_ref: RawBsonRef) -> Option<RawBson> {
                 Some(RawBsonRef::Decimal128(d).to_raw_bson())
             }
         }
-        RawBsonRef::MinKey => Some(RawBsonRef::MinKey.to_raw_bson()),
-        RawBsonRef::MaxKey => Some(RawBsonRef::MaxKey.to_raw_bson()),
-        RawBsonRef::Undefined => Some(RawBsonRef::Undefined.to_raw_bson()),
+        other => Some(other.to_raw_bson()),
     }
 }
 
@@ -165,25 +157,42 @@ fuzz_target!(|input: Input| {
                     match value {
                         RawBsonRef::Double(d) if d.is_nan() => {
                             if let Some(converted_ref) = converted.as_raw_bson_ref().as_f64() {
-                                assert!(converted_ref.is_nan(),
-                                       "NaN comparison failed for key: {}", key);
+                                assert!(
+                                    converted_ref.is_nan(),
+                                    "NaN comparison failed for key: {}",
+                                    key
+                                );
                             }
                         }
                         RawBsonRef::Double(d) if d.is_infinite() => {
                             if let Some(converted_ref) = converted.as_raw_bson_ref().as_f64() {
-                                assert_eq!(d.is_sign_positive(), converted_ref.is_sign_positive(),
-                                         "Infinity sign mismatch for key: {}", key);
-                                assert!(converted_ref.is_infinite(),
-                                       "Infinity comparison failed for key: {}", key);
+                                assert_eq!(
+                                    d.is_sign_positive(),
+                                    converted_ref.is_sign_positive(),
+                                    "Infinity sign mismatch for key: {}",
+                                    key
+                                );
+                                assert!(
+                                    converted_ref.is_infinite(),
+                                    "Infinity comparison failed for key: {}",
+                                    key
+                                );
                             }
                         }
                         RawBsonRef::Decimal128(d) if d.to_string().contains("NaN") => {
                             match converted.as_raw_bson_ref() {
                                 RawBsonRef::Decimal128(cd) => {
-                                    assert!(cd.to_string().contains("NaN"),
-                                           "Decimal128 NaN comparison failed for key: {}", key);
+                                    assert!(
+                                        cd.to_string().contains("NaN"),
+                                        "Decimal128 NaN comparison failed for key: {}",
+                                        key
+                                    );
                                 }
-                                _ => panic!("Type mismatch: expected Decimal128, got different type for key: {}", key),
+                                _ => panic!(
+                                    "Type mismatch: expected Decimal128, got different type for \
+                                     key: {}",
+                                    key
+                                ),
                             }
                         }
                         RawBsonRef::Decimal128(d) if d.to_string().contains("Infinity") => {
@@ -191,15 +200,25 @@ fuzz_target!(|input: Input| {
                                 RawBsonRef::Decimal128(cd) => {
                                     let d_str = d.to_string();
                                     let cd_str = cd.to_string();
-                                    assert_eq!(d_str, cd_str,
-                                             "Decimal128 Infinity comparison failed for key: {}", key);
+                                    assert_eq!(
+                                        d_str, cd_str,
+                                        "Decimal128 Infinity comparison failed for key: {}",
+                                        key
+                                    );
                                 }
-                                _ => panic!("Type mismatch: expected Decimal128, got different type for key: {}", key),
+                                _ => panic!(
+                                    "Type mismatch: expected Decimal128, got different type for \
+                                     key: {}",
+                                    key
+                                ),
                             }
                         }
                         _ => {
-                            assert_eq!(converted, original_bytes,
-                                     "Serialization mismatch for key: {}", key);
+                            assert_eq!(
+                                converted, original_bytes,
+                                "Serialization mismatch for key: {}",
+                                key
+                            );
                         }
                     }
                 }
diff --git a/fuzz/fuzz_targets/string_handling.rs b/fuzz/fuzz_targets/string_handling.rs
index 502a4135..090d132f 100644
--- a/fuzz/fuzz_targets/string_handling.rs
+++ b/fuzz/fuzz_targets/string_handling.rs
@@ -2,7 +2,7 @@
 #[macro_use]
 extern crate libfuzzer_sys;
 extern crate bson;
-use bson::{spec::BinarySubtype, RawBsonRef, RawDocument};
+use bson::{RawBsonRef, RawDocument};
 use std::convert::TryInto;
 
 fuzz_target!(|buf: &[u8]| {
@@ -15,10 +15,6 @@ fuzz_target!(|buf: &[u8]| {
                         let _ = s.len();
                         let _ = s.chars().count();
                     }
-                    RawBsonRef::Binary(b) if b.subtype == BinarySubtype::Generic => {
-                        // Test UTF-8 validation on binary data
-                        let _ = std::str::from_utf8(b.bytes);
-                    }
                     _ => {}
                 }
             }
diff --git a/fuzz/generate_corpus.rs b/fuzz/generate_corpus.rs
index 88033a33..c67c4cfc 100644
--- a/fuzz/generate_corpus.rs
+++ b/fuzz/generate_corpus.rs
@@ -1,8 +1,10 @@
 use bson::{doc, Bson, Decimal128};
-use std::fs;
-use std::path::Path;
-use std::io::{Error, ErrorKind};
-use std::str::FromStr;
+use std::{
+    fs,
+    io::{Error, ErrorKind},
+    path::Path,
+    str::FromStr,
+};
 
 fn main() -> std::io::Result<()> {
     let corpus_dir = Path::new("fuzz/corpus");
@@ -20,18 +22,21 @@ fn generate_length_edge_cases(dir: &Path) -> std::io::Result<()> {
     let target_dir = dir.join("malformed_length");
     fs::create_dir_all(&target_dir)?;
 
+    // Invalid length
+    fs::write(target_dir.join("invalid_len"), vec![4, 5])?;
+
     // Minimal valid document
     let min_doc = doc! {};
     fs::write(
         target_dir.join("min_doc"),
-        bson::to_vec(&min_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?
+        bson::to_vec(&min_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
     )?;
 
     // Document with length near i32::MAX
     let large_doc = doc! { "a": "b".repeat(i32::MAX as usize / 2) };
     fs::write(
         target_dir.join("large_doc"),
-        bson::to_vec(&large_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?
+        bson::to_vec(&large_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
     )?;
 
     Ok(())
@@ -68,7 +73,7 @@ fn generate_type_marker_cases(dir: &Path) -> std::io::Result<()> {
     };
     fs::write(
         target_dir.join("all_types"),
-        bson::to_vec(&all_types).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?
+        bson::to_vec(&all_types).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
     )?;
 
     Ok(())
@@ -95,7 +100,7 @@ fn generate_string_edge_cases(dir: &Path) -> std::io::Result<()> {
     };
     fs::write(
         target_dir.join("utf8_cases"),
-        bson::to_vec(&utf8_cases).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?
+        bson::to_vec(&utf8_cases).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
     )?;
 
     Ok(())
@@ -111,11 +116,15 @@ fn generate_serialization_cases(dir: &Path) -> std::io::Result<()> {
     for i in 0..100 {
         let next_doc = doc! {};
         current.insert(i.to_string(), next_doc);
-        current = current.get_mut(&i.to_string()).unwrap().as_document_mut().unwrap();
+        current = current
+            .get_mut(&i.to_string())
+            .unwrap()
+            .as_document_mut()
+            .unwrap();
     }
     fs::write(
         target_dir.join("nested_doc"),
-        bson::to_vec(&nested_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?
+        bson::to_vec(&nested_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
     )?;
 
     // Document with large binary data
@@ -127,7 +136,7 @@ fn generate_serialization_cases(dir: &Path) -> std::io::Result<()> {
     };
     fs::write(
         target_dir.join("large_binary"),
-        bson::to_vec(&large_binary).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?
+        bson::to_vec(&large_binary).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
     )?;
 
     Ok(())
diff --git a/src/spec/mod.rs b/src/spec.rs
similarity index 97%
rename from src/spec/mod.rs
rename to src/spec.rs
index bce083d3..e853c150 100644
--- a/src/spec/mod.rs
+++ b/src/spec.rs
@@ -21,11 +21,14 @@
 
 //! Constants derived from the [BSON Specification Version 1.1](http://bsonspec.org/spec.html).
 
-use std::convert::From;
+use std::{convert::From, fmt};
 
-mod fmt;
-#[allow(unused_imports)]
-pub use self::fmt::*;
+impl fmt::LowerHex for BinarySubtype {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let value: u8 = (*self).into();
+        fmt::LowerHex::fmt(&value, f)
+    }
+}
 
 const ELEMENT_TYPE_FLOATING_POINT: u8 = 0x01;
 const ELEMENT_TYPE_UTF8_STRING: u8 = 0x02;
diff --git a/src/spec/fmt.rs b/src/spec/fmt.rs
deleted file mode 100644
index 1cbb6fb3..00000000
--- a/src/spec/fmt.rs
+++ /dev/null
@@ -1,9 +0,0 @@
-use std::fmt;
-use crate::spec::BinarySubtype;
-
-impl fmt::LowerHex for BinarySubtype {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let value: u8 = (*self).into();
-        fmt::LowerHex::fmt(&value, f)
-    }
-}

From f490e93ba54b3c860252f3ac299ba3e8528bb7c7 Mon Sep 17 00:00:00 2001
From: Patrick Meredith <pmeredit@protonmail.com>
Date: Mon, 16 Dec 2024 17:34:17 -0500
Subject: [PATCH 07/10] Fix the strange serialization test

---
 fuzz/Cargo.toml                    |   4 -
 fuzz/fuzz_targets/serialization.rs | 232 ++---------------------------
 2 files changed, 14 insertions(+), 222 deletions(-)

diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
index 2e79dbb5..abdcffb5 100644
--- a/fuzz/Cargo.toml
+++ b/fuzz/Cargo.toml
@@ -14,10 +14,6 @@ path = ".."
 [dependencies.libfuzzer-sys]
 version = "0.4.0"
 
-[dependencies.arbitrary]
-version = "1.3.0"
-features = ["derive"]
-
 [dependencies.serde]
 version = "1.0"
 
diff --git a/fuzz/fuzz_targets/serialization.rs b/fuzz/fuzz_targets/serialization.rs
index ac779142..82568bc1 100644
--- a/fuzz/fuzz_targets/serialization.rs
+++ b/fuzz/fuzz_targets/serialization.rs
@@ -1,226 +1,22 @@
 #![no_main]
-use arbitrary::Arbitrary;
 use bson::{
-    raw::{RawBson, RawBsonRef, RawDocument},
-    spec::BinarySubtype,
-    Decimal128,
+    raw::{RawDocument, RawDocumentBuf},
+    Document,
 };
 use libfuzzer_sys::fuzz_target;
-use std::str::FromStr;
 
-fn convert_bson_ref(bson_ref: RawBsonRef) -> Option<RawBson> {
-    match bson_ref {
-        RawBsonRef::Double(d) => {
-            if d.is_nan() {
-                Some(RawBsonRef::Double(f64::NAN).to_raw_bson())
-            } else {
-                Some(RawBsonRef::Double(d).to_raw_bson())
-            }
-        }
-        RawBsonRef::String(s) => {
-            if !s.is_empty() && !s.contains('\0') && s.len() <= (i32::MAX as usize) {
-                Some(RawBsonRef::String(s).to_raw_bson())
-            } else {
-                None
-            }
-        }
-        RawBsonRef::Document(d) => {
-            let mut valid = true;
-            for result in d.iter() {
-                match result {
-                    Ok((key, _)) if key.is_empty() || key.contains('\0') => {
-                        valid = false;
-                        break;
-                    }
-                    Err(_) => {
-                        valid = false;
-                        break;
-                    }
-                    _ => {}
-                }
-            }
-            if valid {
-                Some(RawBsonRef::Document(d).to_raw_bson())
-            } else {
-                None
-            }
-        }
-        RawBsonRef::Array(a) => {
-            let mut valid = true;
-            for result in a.into_iter() {
-                if result.is_err() {
-                    valid = false;
-                    break;
-                }
-            }
-            if valid {
-                Some(RawBsonRef::Array(a).to_raw_bson())
-            } else {
-                None
-            }
-        }
-        RawBsonRef::Binary(b) => {
-            if b.bytes.len() <= i32::MAX as usize
-                && match b.subtype {
-                    BinarySubtype::Generic
-                    | BinarySubtype::Function
-                    | BinarySubtype::BinaryOld
-                    | BinarySubtype::UuidOld
-                    | BinarySubtype::Uuid
-                    | BinarySubtype::Md5
-                    | BinarySubtype::UserDefined(_) => true,
-                    _ => false,
-                }
-            {
-                Some(RawBsonRef::Binary(b).to_raw_bson())
-            } else {
-                None
-            }
-        }
-        RawBsonRef::RegularExpression(regex) => {
-            let valid_options = "ilmsux";
-            let mut options_sorted = regex.options.chars().collect::<Vec<_>>();
-            options_sorted.sort_unstable();
-            options_sorted.dedup();
-            let sorted_str: String = options_sorted.into_iter().collect();
-
-            if sorted_str.chars().all(|c| valid_options.contains(c))
-                && !regex.pattern.contains('\0')
-                && regex.pattern.len() <= (i32::MAX as usize)
-            {
-                Some(RawBsonRef::RegularExpression(regex).to_raw_bson())
-            } else {
-                None
-            }
-        }
-        RawBsonRef::JavaScriptCode(code) => {
-            if !code.is_empty() && !code.contains('\0') && code.len() <= (i32::MAX as usize) {
-                Some(RawBsonRef::JavaScriptCode(code).to_raw_bson())
-            } else {
-                None
-            }
-        }
-        RawBsonRef::JavaScriptCodeWithScope(code_w_scope) => {
-            if !code_w_scope.code.is_empty()
-                && !code_w_scope.code.contains('\0')
-                && code_w_scope.code.len() <= (i32::MAX as usize)
-            {
-                Some(RawBsonRef::JavaScriptCodeWithScope(code_w_scope).to_raw_bson())
-            } else {
-                None
-            }
-        }
-        RawBsonRef::DbPointer(ptr) => {
-            let raw_bson = RawBsonRef::DbPointer(ptr).to_raw_bson();
-            Some(raw_bson)
-        }
-        RawBsonRef::Symbol(s) => {
-            if !s.is_empty() && !s.contains('\0') && s.len() <= i32::MAX as usize {
-                Some(RawBsonRef::Symbol(s).to_raw_bson())
-            } else {
-                None
-            }
-        }
-        RawBsonRef::Decimal128(d) => {
-            let d_str = d.to_string();
-            if d_str.contains("NaN") {
-                if let Ok(nan) = Decimal128::from_str("NaN") {
-                    Some(RawBsonRef::Decimal128(nan).to_raw_bson())
-                } else {
-                    None
-                }
-            } else if d_str == "Infinity" || d_str == "-Infinity" {
-                if let Ok(val) = Decimal128::from_str(&d_str) {
-                    Some(RawBsonRef::Decimal128(val).to_raw_bson())
-                } else {
-                    None
-                }
-            } else {
-                Some(RawBsonRef::Decimal128(d).to_raw_bson())
-            }
-        }
-        other => Some(other.to_raw_bson()),
-    }
-}
-
-#[derive(Debug, Arbitrary)]
-struct Input {
-    bytes: Vec<u8>,
-}
-
-fuzz_target!(|input: Input| {
-    if let Ok(doc) = RawDocument::from_bytes(&input.bytes) {
-        for result in doc.iter() {
-            if let Ok((key, value)) = result {
-                if let Some(converted) = convert_bson_ref(value) {
-                    let original_bytes = value.to_raw_bson();
-                    match value {
-                        RawBsonRef::Double(d) if d.is_nan() => {
-                            if let Some(converted_ref) = converted.as_raw_bson_ref().as_f64() {
-                                assert!(
-                                    converted_ref.is_nan(),
-                                    "NaN comparison failed for key: {}",
-                                    key
-                                );
-                            }
-                        }
-                        RawBsonRef::Double(d) if d.is_infinite() => {
-                            if let Some(converted_ref) = converted.as_raw_bson_ref().as_f64() {
-                                assert_eq!(
-                                    d.is_sign_positive(),
-                                    converted_ref.is_sign_positive(),
-                                    "Infinity sign mismatch for key: {}",
-                                    key
-                                );
-                                assert!(
-                                    converted_ref.is_infinite(),
-                                    "Infinity comparison failed for key: {}",
-                                    key
-                                );
-                            }
-                        }
-                        RawBsonRef::Decimal128(d) if d.to_string().contains("NaN") => {
-                            match converted.as_raw_bson_ref() {
-                                RawBsonRef::Decimal128(cd) => {
-                                    assert!(
-                                        cd.to_string().contains("NaN"),
-                                        "Decimal128 NaN comparison failed for key: {}",
-                                        key
-                                    );
-                                }
-                                _ => panic!(
-                                    "Type mismatch: expected Decimal128, got different type for \
-                                     key: {}",
-                                    key
-                                ),
-                            }
-                        }
-                        RawBsonRef::Decimal128(d) if d.to_string().contains("Infinity") => {
-                            match converted.as_raw_bson_ref() {
-                                RawBsonRef::Decimal128(cd) => {
-                                    let d_str = d.to_string();
-                                    let cd_str = cd.to_string();
-                                    assert_eq!(
-                                        d_str, cd_str,
-                                        "Decimal128 Infinity comparison failed for key: {}",
-                                        key
-                                    );
-                                }
-                                _ => panic!(
-                                    "Type mismatch: expected Decimal128, got different type for \
-                                     key: {}",
-                                    key
-                                ),
-                            }
-                        }
-                        _ => {
-                            assert_eq!(
-                                converted, original_bytes,
-                                "Serialization mismatch for key: {}",
-                                key
-                            );
-                        }
-                    }
+fuzz_target!(|input: &[u8]| {
+    if let Ok(rawdoc) = RawDocument::from_bytes(&input) {
+        if let Ok(doc) = Document::try_from(rawdoc) {
+            let out = RawDocumentBuf::try_from(&doc).unwrap();
+            let out_bytes = out.as_bytes();
+            if input != out_bytes {
+                let reserialized = RawDocument::from_bytes(&out_bytes).unwrap();
+                let reserialized_doc = Document::try_from(reserialized).unwrap();
+                // Ensure that the reserialized document is the same as the original document, the
+                // bytes can differ while still resulting in the same Document.
+                if doc != reserialized_doc {
+                    panic!("reserialization failed");
                 }
             }
         }

From ecfec8af45fd88ee69383542fd6125b918fa0196 Mon Sep 17 00:00:00 2001
From: Patrick Meredith <pmeredit@protonmail.com>
Date: Mon, 16 Dec 2024 17:37:38 -0500
Subject: [PATCH 08/10] Use assert

---
 fuzz/fuzz_targets/serialization.rs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fuzz/fuzz_targets/serialization.rs b/fuzz/fuzz_targets/serialization.rs
index 82568bc1..b889ca03 100644
--- a/fuzz/fuzz_targets/serialization.rs
+++ b/fuzz/fuzz_targets/serialization.rs
@@ -15,9 +15,7 @@ fuzz_target!(|input: &[u8]| {
                 let reserialized_doc = Document::try_from(reserialized).unwrap();
                 // Ensure that the reserialized document is the same as the original document, the
                 // bytes can differ while still resulting in the same Document.
-                if doc != reserialized_doc {
-                    panic!("reserialization failed");
-                }
+                assert_eq!(doc, reserialized_doc, "reserialization failed");
             }
         }
     }

From 100f70e6ca2b3906f00ab1c434adf956e7c660c5 Mon Sep 17 00:00:00 2001
From: Patrick Meredith <pmeredit@protonmail.com>
Date: Mon, 16 Dec 2024 17:47:21 -0500
Subject: [PATCH 09/10] Still need to worry about double nan

---
 fuzz/fuzz_targets/serialization.rs | 35 +++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/fuzz/fuzz_targets/serialization.rs b/fuzz/fuzz_targets/serialization.rs
index b889ca03..e5ba621d 100644
--- a/fuzz/fuzz_targets/serialization.rs
+++ b/fuzz/fuzz_targets/serialization.rs
@@ -1,10 +1,37 @@
 #![no_main]
 use bson::{
     raw::{RawDocument, RawDocumentBuf},
+    Bson,
     Document,
 };
 use libfuzzer_sys::fuzz_target;
 
+fn compare_docs(doc1: &Document, doc2: &Document) -> bool {
+    if doc1.len() != doc2.len() {
+        return false;
+    }
+    for (key, value) in doc1 {
+        if !doc2.contains_key(key) {
+            return false;
+        }
+        if let Some(val2) = doc2.get(key) {
+            match (value, val2) {
+                (Bson::Double(d1), Bson::Double(d2)) => {
+                    if (!d1.is_nan() || !d2.is_nan()) && d1 != d2 {
+                        return false;
+                    }
+                }
+                (v1, v2) => {
+                    if v1 != v2 {
+                        return false;
+                    }
+                }
+            }
+        }
+    }
+    true
+}
+
 fuzz_target!(|input: &[u8]| {
     if let Ok(rawdoc) = RawDocument::from_bytes(&input) {
         if let Ok(doc) = Document::try_from(rawdoc) {
@@ -15,7 +42,13 @@ fuzz_target!(|input: &[u8]| {
                 let reserialized_doc = Document::try_from(reserialized).unwrap();
                 // Ensure that the reserialized document is the same as the original document, the
                 // bytes can differ while still resulting in the same Document.
-                assert_eq!(doc, reserialized_doc, "reserialization failed");
+                if !compare_docs(&doc, &reserialized_doc) {
+                    panic!(
+                        "Reserialized document is not the same as the original document: {:?} != \
+                         {:?}",
+                        doc, reserialized_doc
+                    );
+                }
             }
         }
     }

From 81d4a093e8738748c86e85ffd781d026fc393580 Mon Sep 17 00:00:00 2001
From: Patrick Meredith <pmeredit@protonmail.com>
Date: Tue, 17 Dec 2024 17:27:26 -0500
Subject: [PATCH 10/10] Remove redundant test

---
 fuzz/fuzz_targets/malformed_length.rs | 20 --------------------
 1 file changed, 20 deletions(-)
 delete mode 100644 fuzz/fuzz_targets/malformed_length.rs

diff --git a/fuzz/fuzz_targets/malformed_length.rs b/fuzz/fuzz_targets/malformed_length.rs
deleted file mode 100644
index a007e146..00000000
--- a/fuzz/fuzz_targets/malformed_length.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-//! BSON Document Length Field Fuzzer
-//!
-//! This fuzz test focuses on finding security vulnerabilities related to BSON document length
-//! fields. It specifically targets:
-//! - Integer overflow/underflow in length calculations
-//! - Malformed length fields that could cause buffer overruns
-//! - Mismatches between declared and actual document sizes
-//! - Memory allocation issues with large or invalid lengths
-
-#![no_main]
-#[macro_use]
-extern crate libfuzzer_sys;
-extern crate bson;
-use bson::RawDocument;
-
-fuzz_target!(|buf: &[u8]| {
-    // Focus on document length field manipulation
-    // This should return an error if the buf.len() < 4 rather than panic.
-    let _ = RawDocument::from_bytes(buf);
-});