From 1839b0d3eecccd8e9ae1280b93bb60218487bdbb Mon Sep 17 00:00:00 2001 From: Patrick Meredith Date: Thu, 12 Dec 2024 14:05:43 -0500 Subject: [PATCH 01/10] Check in Devin's patch --- .evergreen/config.yml | 31 +++- .evergreen/run-fuzzer.sh | 45 +++++- fuzz/Cargo.toml | 34 ++++- fuzz/fuzz_targets/malformed_length.rs | 20 +++ fuzz/fuzz_targets/serialization.rs | 209 ++++++++++++++++++++++++++ fuzz/fuzz_targets/string_handling.rs | 27 ++++ fuzz/fuzz_targets/type_markers.rs | 14 ++ fuzz/generate_corpus.rs | 134 +++++++++++++++++ fuzz/run-fuzzer.sh | 33 ++++ 9 files changed, 536 insertions(+), 11 deletions(-) create mode 100644 fuzz/fuzz_targets/malformed_length.rs create mode 100644 fuzz/fuzz_targets/serialization.rs create mode 100644 fuzz/fuzz_targets/string_handling.rs create mode 100644 fuzz/fuzz_targets/type_markers.rs create mode 100644 fuzz/generate_corpus.rs create mode 100755 fuzz/run-fuzzer.sh diff --git a/.evergreen/config.yml b/.evergreen/config.yml index d27fca57..1270f8a5 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -13,15 +13,18 @@ stepback: true command_type: system # Protect ourself against rogue test case, or curl gone wild, that runs forever -# 12 minutes is the longest we'll ever run -exec_timeout_secs: 3600 # 12 minutes is the longest we'll ever run +# 60 minutes is the longest we'll ever run +exec_timeout_secs: 3600 # 1 hour total for security-focused fuzzing # What to do when evergreen hits the timeout (`post:` tasks are run automatically) timeout: - command: shell.exec params: script: | - ls -la + echo "Fuzzing timed out. Collecting any available artifacts..." + if [ -d "src/fuzz/artifacts" ]; then + tar czf "${PROJECT_DIRECTORY}/crash-artifacts.tar.gz" src/fuzz/artifacts/ + fi functions: "fetch source": @@ -154,7 +157,25 @@ functions: - command: shell.exec params: script: | - # Nothing needs to be done here + # Archive crash artifacts if they exist and contain crashes + if [ -d "src/fuzz/artifacts" ] && [ "$(ls -A src/fuzz/artifacts)" ]; then + echo "Crashes found in artifacts directory. Creating archive..." + tar czf "${PROJECT_DIRECTORY}/crash-artifacts.tar.gz" src/fuzz/artifacts/ + else + echo "No crashes found in artifacts directory. Skipping archive creation." + fi + # Upload crash artifacts if they exist + - command: s3.put + params: + aws_key: ${aws_key} + aws_secret: ${aws_secret} + local_file: ${PROJECT_DIRECTORY}/crash-artifacts.tar.gz + remote_file: ${CURRENT_VERSION}/crash-artifacts.tar.gz + bucket: mciuploads + permissions: public-read + content_type: application/x-gzip + optional: true + pre: - func: "fetch source" - func: "install dependencies" @@ -259,4 +280,4 @@ buildvariants: run_on: - ubuntu1804-test tasks: - - name: "wasm-test" \ No newline at end of file + - name: "wasm-test" diff --git a/.evergreen/run-fuzzer.sh b/.evergreen/run-fuzzer.sh index 511799cb..b7f5f9f7 100755 --- a/.evergreen/run-fuzzer.sh +++ b/.evergreen/run-fuzzer.sh @@ -6,7 +6,44 @@ set -o errexit cd fuzz -# each runs for a minute -cargo +nightly fuzz run deserialize -- -rss_limit_mb=4096 -max_total_time=60 -cargo +nightly fuzz run raw_deserialize -- -rss_limit_mb=4096 -max_total_time=60 -cargo +nightly fuzz run iterate -- -rss_limit_mb=4096 -max_total_time=60 +# Create directories for crashes and corpus +mkdir -p artifacts +mkdir -p corpus + +# Generate initial corpus if directory is empty +if [ -z "$(ls -A corpus)" ]; then + echo "Generating initial corpus..." + cargo run --bin generate_corpus +fi + +# Function to run fuzzer and collect crashes +run_fuzzer() { + target=$1 + echo "Running fuzzer for $target" + # Run fuzzer and redirect crashes to artifacts directory + RUST_BACKTRACE=1 cargo +nightly fuzz run $target -- \ + -rss_limit_mb=4096 \ + -max_total_time=60 \ + -artifact_prefix=artifacts/ \ + -print_final_stats=1 \ + corpus/ +} + +# Run existing targets +run_fuzzer "deserialize" +run_fuzzer "raw_deserialize" +run_fuzzer "iterate" + +# Run new security-focused targets +run_fuzzer "malformed_length" +run_fuzzer "type_markers" +run_fuzzer "string_handling" +run_fuzzer "serialization" + +# If any crashes were found, save them as test artifacts +if [ "$(ls -A artifacts)" ]; then + echo "Crashes found! Check artifacts directory." + exit 1 +else + echo "No crashes found." +fi diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 25c60712..2e79dbb5 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -1,19 +1,29 @@ - [package] name = "bson-fuzz" version = "0.0.1" authors = ["Automatically generated"] publish = false +edition = "2021" [package.metadata] cargo-fuzz = true [dependencies.bson] path = ".." + [dependencies.libfuzzer-sys] version = "0.4.0" -# Prevent this from interfering with workspaces +[dependencies.arbitrary] +version = "1.3.0" +features = ["derive"] + +[dependencies.serde] +version = "1.0" + +[dependencies.serde_json] +version = "1.0" + [workspace] members = ["."] @@ -32,3 +42,23 @@ path = "fuzz_targets/raw_deserialize.rs" [[bin]] name = "raw_deserialize_utf8_lossy" path = "fuzz_targets/raw_deserialize_utf8_lossy.rs" + +[[bin]] +name = "malformed_length" +path = "fuzz_targets/malformed_length.rs" + +[[bin]] +name = "type_markers" +path = "fuzz_targets/type_markers.rs" + +[[bin]] +name = "string_handling" +path = "fuzz_targets/string_handling.rs" + +[[bin]] +name = "serialization" +path = "fuzz_targets/serialization.rs" + +[[bin]] +name = "generate_corpus" +path = "generate_corpus.rs" diff --git a/fuzz/fuzz_targets/malformed_length.rs b/fuzz/fuzz_targets/malformed_length.rs new file mode 100644 index 00000000..53ebf147 --- /dev/null +++ b/fuzz/fuzz_targets/malformed_length.rs @@ -0,0 +1,20 @@ +//! BSON Document Length Field Fuzzer +//! +//! This fuzz test focuses on finding security vulnerabilities related to BSON document length fields. +//! It specifically targets: +//! - Integer overflow/underflow in length calculations +//! - Malformed length fields that could cause buffer overruns +//! - Mismatches between declared and actual document sizes +//! - Memory allocation issues with large or invalid lengths + +#![no_main] +#[macro_use] extern crate libfuzzer_sys; +extern crate bson; +use bson::RawDocument; + +fuzz_target!(|buf: &[u8]| { + if buf.len() >= 4 { + // Focus on document length field manipulation + let _ = RawDocument::from_bytes(buf); + } +}); diff --git a/fuzz/fuzz_targets/serialization.rs b/fuzz/fuzz_targets/serialization.rs new file mode 100644 index 00000000..ccdf22a7 --- /dev/null +++ b/fuzz/fuzz_targets/serialization.rs @@ -0,0 +1,209 @@ +#![no_main] +use arbitrary::Arbitrary; +use bson::{ + raw::{RawBson, RawBsonRef, RawDocument}, + spec::BinarySubtype, + Decimal128, +}; +use libfuzzer_sys::fuzz_target; +use std::str::FromStr; + +fn convert_bson_ref(bson_ref: RawBsonRef) -> Option { + match bson_ref { + RawBsonRef::Double(d) => { + if d.is_nan() { + Some(RawBsonRef::Double(f64::NAN).to_raw_bson()) + } else if d.is_infinite() { + Some(RawBsonRef::Double(d).to_raw_bson()) + } else { + Some(RawBsonRef::Double(d).to_raw_bson()) + } + } + RawBsonRef::String(s) => { + if !s.is_empty() && !s.contains('\0') && s.len() <= (i32::MAX as usize) { + Some(RawBsonRef::String(s).to_raw_bson()) + } else { + None + } + } + RawBsonRef::Document(d) => { + let mut valid = true; + for result in d.iter() { + match result { + Ok((key, _)) if key.is_empty() || key.contains('\0') => { + valid = false; + break; + } + Err(_) => { + valid = false; + break; + } + _ => {} + } + } + if valid { + Some(RawBsonRef::Document(d).to_raw_bson()) + } else { + None + } + } + RawBsonRef::Array(a) => { + let mut valid = true; + for result in a.into_iter() { + if result.is_err() { + valid = false; + break; + } + } + if valid { + Some(RawBsonRef::Array(a).to_raw_bson()) + } else { + None + } + } + RawBsonRef::Binary(b) => { + if b.bytes.len() <= i32::MAX as usize && + match b.subtype { + BinarySubtype::Generic | + BinarySubtype::Function | + BinarySubtype::BinaryOld | + BinarySubtype::UuidOld | + BinarySubtype::Uuid | + BinarySubtype::Md5 | + BinarySubtype::UserDefined(_) => true, + _ => false + } { + Some(RawBsonRef::Binary(b).to_raw_bson()) + } else { + None + } + } + RawBsonRef::ObjectId(id) => Some(RawBsonRef::ObjectId(id).to_raw_bson()), + RawBsonRef::Boolean(b) => Some(RawBsonRef::Boolean(b).to_raw_bson()), + RawBsonRef::Null => Some(RawBsonRef::Null.to_raw_bson()), + RawBsonRef::RegularExpression(regex) => { + let valid_options = "ilmsux"; + let mut options_sorted = regex.options.chars().collect::>(); + options_sorted.sort_unstable(); + options_sorted.dedup(); + let sorted_str: String = options_sorted.into_iter().collect(); + + if sorted_str.chars().all(|c| valid_options.contains(c)) && + !regex.pattern.contains('\0') && + regex.pattern.len() <= (i32::MAX as usize) { + Some(RawBsonRef::RegularExpression(regex).to_raw_bson()) + } else { + None + } + } + RawBsonRef::JavaScriptCode(code) => { + if !code.is_empty() && !code.contains('\0') && code.len() <= (i32::MAX as usize) { + Some(RawBsonRef::JavaScriptCode(code).to_raw_bson()) + } else { + None + } + } + RawBsonRef::JavaScriptCodeWithScope(code_w_scope) => { + if !code_w_scope.code.is_empty() && + !code_w_scope.code.contains('\0') && + code_w_scope.code.len() <= (i32::MAX as usize) { + Some(RawBsonRef::JavaScriptCodeWithScope(code_w_scope).to_raw_bson()) + } else { + None + } + } + RawBsonRef::DbPointer(ptr) => { + let raw_bson = RawBsonRef::DbPointer(ptr).to_raw_bson(); + Some(raw_bson) + } + RawBsonRef::Symbol(s) => { + if !s.is_empty() && !s.contains('\0') && s.len() <= i32::MAX as usize { + Some(RawBsonRef::Symbol(s).to_raw_bson()) + } else { + None + } + } + RawBsonRef::Int32(i) => Some(RawBsonRef::Int32(i).to_raw_bson()), + RawBsonRef::Int64(i) => Some(RawBsonRef::Int64(i).to_raw_bson()), + RawBsonRef::Timestamp(ts) => Some(RawBsonRef::Timestamp(ts).to_raw_bson()), + RawBsonRef::DateTime(dt) => Some(RawBsonRef::DateTime(dt).to_raw_bson()), + RawBsonRef::Decimal128(d) => { + let d_str = d.to_string(); + if d_str.contains("NaN") { + if let Ok(nan) = Decimal128::from_str("NaN") { + Some(RawBsonRef::Decimal128(nan).to_raw_bson()) + } else { + None + } + } else if d_str == "Infinity" || d_str == "-Infinity" { + if let Ok(val) = Decimal128::from_str(&d_str) { + Some(RawBsonRef::Decimal128(val).to_raw_bson()) + } else { + None + } + } else { + Some(RawBsonRef::Decimal128(d).to_raw_bson()) + } + } + RawBsonRef::MinKey => Some(RawBsonRef::MinKey.to_raw_bson()), + RawBsonRef::MaxKey => Some(RawBsonRef::MaxKey.to_raw_bson()), + RawBsonRef::Undefined => Some(RawBsonRef::Undefined.to_raw_bson()), + } +} + +#[derive(Debug, Arbitrary)] +struct Input { + bytes: Vec, +} + +fuzz_target!(|input: Input| { + if let Ok(doc) = RawDocument::from_bytes(&input.bytes) { + for result in doc.iter() { + if let Ok((key, value)) = result { + if let Some(converted) = convert_bson_ref(value) { + let original_bytes = value.to_raw_bson(); + match value { + RawBsonRef::Double(d) if d.is_nan() => { + if let Some(converted_ref) = converted.as_raw_bson_ref().as_f64() { + assert!(converted_ref.is_nan(), + "NaN comparison failed for key: {}", key); + } + } + RawBsonRef::Double(d) if d.is_infinite() => { + if let Some(converted_ref) = converted.as_raw_bson_ref().as_f64() { + assert_eq!(d.is_sign_positive(), converted_ref.is_sign_positive(), + "Infinity sign mismatch for key: {}", key); + assert!(converted_ref.is_infinite(), + "Infinity comparison failed for key: {}", key); + } + } + RawBsonRef::Decimal128(d) if d.to_string().contains("NaN") => { + match converted.as_raw_bson_ref() { + RawBsonRef::Decimal128(cd) => { + assert!(cd.to_string().contains("NaN"), + "Decimal128 NaN comparison failed for key: {}", key); + } + _ => panic!("Type mismatch: expected Decimal128, got different type for key: {}", key), + } + } + RawBsonRef::Decimal128(d) if d.to_string().contains("Infinity") => { + match converted.as_raw_bson_ref() { + RawBsonRef::Decimal128(cd) => { + let d_str = d.to_string(); + let cd_str = cd.to_string(); + assert_eq!(d_str, cd_str, + "Decimal128 Infinity comparison failed for key: {}", key); + } + _ => panic!("Type mismatch: expected Decimal128, got different type for key: {}", key), + } + } + _ => { + assert_eq!(converted, original_bytes, + "Serialization mismatch for key: {}", key); + } + } + } + } + } + } +}); diff --git a/fuzz/fuzz_targets/string_handling.rs b/fuzz/fuzz_targets/string_handling.rs new file mode 100644 index 00000000..502a4135 --- /dev/null +++ b/fuzz/fuzz_targets/string_handling.rs @@ -0,0 +1,27 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; +extern crate bson; +use bson::{spec::BinarySubtype, RawBsonRef, RawDocument}; +use std::convert::TryInto; + +fuzz_target!(|buf: &[u8]| { + if let Ok(doc) = RawDocument::from_bytes(buf) { + for elem in doc.iter_elements().flatten() { + // Convert to RawBsonRef and check string-related types + if let Ok(bson) = elem.try_into() { + match bson { + RawBsonRef::String(s) => { + let _ = s.len(); + let _ = s.chars().count(); + } + RawBsonRef::Binary(b) if b.subtype == BinarySubtype::Generic => { + // Test UTF-8 validation on binary data + let _ = std::str::from_utf8(b.bytes); + } + _ => {} + } + } + } + } +}); diff --git a/fuzz/fuzz_targets/type_markers.rs b/fuzz/fuzz_targets/type_markers.rs new file mode 100644 index 00000000..3506b96e --- /dev/null +++ b/fuzz/fuzz_targets/type_markers.rs @@ -0,0 +1,14 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; +extern crate bson; +use bson::{RawBsonRef, RawDocument}; +use std::convert::TryInto; + +fuzz_target!(|buf: &[u8]| { + if let Ok(doc) = RawDocument::from_bytes(buf) { + for elem in doc.iter_elements().flatten() { + let _: Result = elem.try_into(); + } + } +}); diff --git a/fuzz/generate_corpus.rs b/fuzz/generate_corpus.rs new file mode 100644 index 00000000..88033a33 --- /dev/null +++ b/fuzz/generate_corpus.rs @@ -0,0 +1,134 @@ +use bson::{doc, Bson, Decimal128}; +use std::fs; +use std::path::Path; +use std::io::{Error, ErrorKind}; +use std::str::FromStr; + +fn main() -> std::io::Result<()> { + let corpus_dir = Path::new("fuzz/corpus"); + fs::create_dir_all(corpus_dir)?; + + // Generate edge cases for each fuzz target + generate_length_edge_cases(corpus_dir)?; + generate_type_marker_cases(corpus_dir)?; + generate_string_edge_cases(corpus_dir)?; + generate_serialization_cases(corpus_dir)?; + Ok(()) +} + +fn generate_length_edge_cases(dir: &Path) -> std::io::Result<()> { + let target_dir = dir.join("malformed_length"); + fs::create_dir_all(&target_dir)?; + + // Minimal valid document + let min_doc = doc! {}; + fs::write( + target_dir.join("min_doc"), + bson::to_vec(&min_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))? + )?; + + // Document with length near i32::MAX + let large_doc = doc! { "a": "b".repeat(i32::MAX as usize / 2) }; + fs::write( + target_dir.join("large_doc"), + bson::to_vec(&large_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))? + )?; + + Ok(()) +} + +fn generate_type_marker_cases(dir: &Path) -> std::io::Result<()> { + let target_dir = dir.join("type_markers"); + fs::create_dir_all(&target_dir)?; + + // Document with all BSON types + let all_types = doc! { + "double": 1.0f64, + "double_nan": f64::NAN, + "double_infinity": f64::INFINITY, + "double_neg_infinity": f64::NEG_INFINITY, + "string": "test", + "document": doc! {}, + "array": vec![1, 2, 3], + "binary": Bson::Binary(bson::Binary { subtype: bson::spec::BinarySubtype::Generic, bytes: vec![1, 2, 3] }), + "object_id": bson::oid::ObjectId::new(), + "bool": true, + "date": bson::DateTime::now(), + "null": Bson::Null, + "regex": Bson::RegularExpression(bson::Regex { pattern: "pattern".into(), options: "i".into() }), + "int32": 123i32, + "timestamp": bson::Timestamp { time: 12345, increment: 1 }, + "int64": 123i64, + "decimal128_nan": Decimal128::from_str("NaN").unwrap(), + "decimal128_infinity": Decimal128::from_str("Infinity").unwrap(), + "decimal128_neg_infinity": Decimal128::from_str("-Infinity").unwrap(), + "min_key": Bson::MinKey, + "max_key": Bson::MaxKey, + "undefined": Bson::Undefined + }; + fs::write( + target_dir.join("all_types"), + bson::to_vec(&all_types).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))? + )?; + + Ok(()) +} + +fn generate_string_edge_cases(dir: &Path) -> std::io::Result<()> { + let target_dir = dir.join("string_handling"); + fs::create_dir_all(&target_dir)?; + + // UTF-8 edge cases + let utf8_cases = doc! { + "empty": "", + "null_bytes": "hello\0world", + "unicode": "🦀💻🔒", + "high_surrogate": "\u{10000}", + "invalid_continuation": Bson::Binary(bson::Binary { + subtype: bson::spec::BinarySubtype::Generic, + bytes: vec![0x80u8, 0x80u8, 0x80u8] + }), + "overlong": Bson::Binary(bson::Binary { + subtype: bson::spec::BinarySubtype::Generic, + bytes: vec![0xC0u8, 0x80u8] + }) + }; + fs::write( + target_dir.join("utf8_cases"), + bson::to_vec(&utf8_cases).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))? + )?; + + Ok(()) +} + +fn generate_serialization_cases(dir: &Path) -> std::io::Result<()> { + let target_dir = dir.join("serialization"); + fs::create_dir_all(&target_dir)?; + + // Deeply nested document + let mut nested_doc = doc! {}; + let mut current = &mut nested_doc; + for i in 0..100 { + let next_doc = doc! {}; + current.insert(i.to_string(), next_doc); + current = current.get_mut(&i.to_string()).unwrap().as_document_mut().unwrap(); + } + fs::write( + target_dir.join("nested_doc"), + bson::to_vec(&nested_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))? + )?; + + // Document with large binary data + let large_binary = doc! { + "binary": Bson::Binary(bson::Binary { + subtype: bson::spec::BinarySubtype::Generic, + bytes: vec![0xFF; 1024 * 1024] // 1MB of data + }) + }; + fs::write( + target_dir.join("large_binary"), + bson::to_vec(&large_binary).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))? + )?; + + Ok(()) +} diff --git a/fuzz/run-fuzzer.sh b/fuzz/run-fuzzer.sh new file mode 100755 index 00000000..0b005ed3 --- /dev/null +++ b/fuzz/run-fuzzer.sh @@ -0,0 +1,33 @@ +#!/bin/bash +set -e + +# Directory setup +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +CORPUS_DIR="$SCRIPT_DIR/corpus" +ARTIFACTS_DIR="$SCRIPT_DIR/artifacts" + +# Ensure directories exist +mkdir -p "$CORPUS_DIR" +mkdir -p "$ARTIFACTS_DIR" + +# Generate corpus if it doesn't exist or is empty +if [ ! -d "$CORPUS_DIR" ] || [ -z "$(ls -A $CORPUS_DIR)" ]; then + echo "Generating initial corpus..." + cargo run --bin generate_corpus + # Move generated corpus files to the corpus directory + mv generated_corpus/* "$CORPUS_DIR/" 2>/dev/null || true +fi + +# List of fuzz targets +TARGETS=( + "malformed_length" + "type_markers" + "string_handling" + "serialization" +) + +# Run each fuzz target with the corpus +for target in "${TARGETS[@]}"; do + echo "Running fuzzer for target: $target" + RUST_BACKTRACE=1 cargo fuzz run "$target" "$CORPUS_DIR" -j 1 --release --max-total-time=3600 +done From 0bb5da292ed5377d94807770070da71a70a82802 Mon Sep 17 00:00:00 2001 From: Patrick Meredith Date: Thu, 12 Dec 2024 14:19:36 -0500 Subject: [PATCH 02/10] artifacts dir does not mean crashes occur, don't feel like telling Devin --- .evergreen/run-fuzzer.sh | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.evergreen/run-fuzzer.sh b/.evergreen/run-fuzzer.sh index b7f5f9f7..b66abf1b 100755 --- a/.evergreen/run-fuzzer.sh +++ b/.evergreen/run-fuzzer.sh @@ -39,11 +39,3 @@ run_fuzzer "malformed_length" run_fuzzer "type_markers" run_fuzzer "string_handling" run_fuzzer "serialization" - -# If any crashes were found, save them as test artifacts -if [ "$(ls -A artifacts)" ]; then - echo "Crashes found! Check artifacts directory." - exit 1 -else - echo "No crashes found." -fi From 35fb75afa4dbd2b0c66aab37bdbf5bec39aa25cf Mon Sep 17 00:00:00 2001 From: Patrick Meredith Date: Thu, 12 Dec 2024 14:20:19 -0500 Subject: [PATCH 03/10] Remove that extraneous shell script --- fuzz/run-fuzzer.sh | 33 --------------------------------- 1 file changed, 33 deletions(-) delete mode 100755 fuzz/run-fuzzer.sh diff --git a/fuzz/run-fuzzer.sh b/fuzz/run-fuzzer.sh deleted file mode 100755 index 0b005ed3..00000000 --- a/fuzz/run-fuzzer.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -set -e - -# Directory setup -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -CORPUS_DIR="$SCRIPT_DIR/corpus" -ARTIFACTS_DIR="$SCRIPT_DIR/artifacts" - -# Ensure directories exist -mkdir -p "$CORPUS_DIR" -mkdir -p "$ARTIFACTS_DIR" - -# Generate corpus if it doesn't exist or is empty -if [ ! -d "$CORPUS_DIR" ] || [ -z "$(ls -A $CORPUS_DIR)" ]; then - echo "Generating initial corpus..." - cargo run --bin generate_corpus - # Move generated corpus files to the corpus directory - mv generated_corpus/* "$CORPUS_DIR/" 2>/dev/null || true -fi - -# List of fuzz targets -TARGETS=( - "malformed_length" - "type_markers" - "string_handling" - "serialization" -) - -# Run each fuzz target with the corpus -for target in "${TARGETS[@]}"; do - echo "Running fuzzer for target: $target" - RUST_BACKTRACE=1 cargo fuzz run "$target" "$CORPUS_DIR" -j 1 --release --max-total-time=3600 -done From b44f491fea6c1678d51e1bf1288abf212113b2b5 Mon Sep 17 00:00:00 2001 From: Patrick Meredith Date: Fri, 13 Dec 2024 11:34:17 -0500 Subject: [PATCH 04/10] Devin thinks the presence of an artifacts directory implies crashes, but it is created regardless. I could update Devin on this, but I want to move on to other things with Devin. --- .evergreen/config.yml | 2 +- src/spec.rs | 198 ------------------------------------------ 2 files changed, 1 insertion(+), 199 deletions(-) delete mode 100644 src/spec.rs diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 1270f8a5..17a1b393 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -159,7 +159,7 @@ functions: script: | # Archive crash artifacts if they exist and contain crashes if [ -d "src/fuzz/artifacts" ] && [ "$(ls -A src/fuzz/artifacts)" ]; then - echo "Crashes found in artifacts directory. Creating archive..." + echo "Creating artifacts archive..." tar czf "${PROJECT_DIRECTORY}/crash-artifacts.tar.gz" src/fuzz/artifacts/ else echo "No crashes found in artifacts directory. Skipping archive creation." diff --git a/src/spec.rs b/src/spec.rs deleted file mode 100644 index 04ec7708..00000000 --- a/src/spec.rs +++ /dev/null @@ -1,198 +0,0 @@ -// The MIT License (MIT) - -// Copyright (c) 2015 Y. T. Chung - -// Permission is hereby granted, free of charge, to any person obtaining a copy of -// this software and associated documentation files (the "Software"), to deal in -// the Software without restriction, including without limitation the rights to -// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -// the Software, and to permit persons to whom the Software is furnished to do so, -// subject to the following conditions: - -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. - -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -//! Constants derived from the [BSON Specification Version 1.1](http://bsonspec.org/spec.html). - -use std::convert::From; - -const ELEMENT_TYPE_FLOATING_POINT: u8 = 0x01; -const ELEMENT_TYPE_UTF8_STRING: u8 = 0x02; -const ELEMENT_TYPE_EMBEDDED_DOCUMENT: u8 = 0x03; -const ELEMENT_TYPE_ARRAY: u8 = 0x04; -const ELEMENT_TYPE_BINARY: u8 = 0x05; -const ELEMENT_TYPE_UNDEFINED: u8 = 0x06; // Deprecated -const ELEMENT_TYPE_OBJECT_ID: u8 = 0x07; -const ELEMENT_TYPE_BOOLEAN: u8 = 0x08; -const ELEMENT_TYPE_UTC_DATETIME: u8 = 0x09; -const ELEMENT_TYPE_NULL_VALUE: u8 = 0x0A; -const ELEMENT_TYPE_REGULAR_EXPRESSION: u8 = 0x0B; -const ELEMENT_TYPE_DBPOINTER: u8 = 0x0C; // Deprecated -const ELEMENT_TYPE_JAVASCRIPT_CODE: u8 = 0x0D; -const ELEMENT_TYPE_SYMBOL: u8 = 0x0E; // Deprecated -const ELEMENT_TYPE_JAVASCRIPT_CODE_WITH_SCOPE: u8 = 0x0F; -const ELEMENT_TYPE_32BIT_INTEGER: u8 = 0x10; -const ELEMENT_TYPE_TIMESTAMP: u8 = 0x11; -const ELEMENT_TYPE_64BIT_INTEGER: u8 = 0x12; -#[allow(unused)] -const ELEMENT_TYPE_128BIT_DECIMAL: u8 = 0x13; -const ELEMENT_TYPE_MINKEY: u8 = 0xFF; -const ELEMENT_TYPE_MAXKEY: u8 = 0x7F; - -const BINARY_SUBTYPE_GENERIC: u8 = 0x00; -const BINARY_SUBTYPE_FUNCTION: u8 = 0x01; -const BINARY_SUBTYPE_BINARY_OLD: u8 = 0x02; -const BINARY_SUBTYPE_UUID_OLD: u8 = 0x03; -const BINARY_SUBTYPE_UUID: u8 = 0x04; -const BINARY_SUBTYPE_MD5: u8 = 0x05; -const BINARY_SUBTYPE_ENCRYPTED: u8 = 0x06; -const BINARY_SUBTYPE_COLUMN: u8 = 0x07; -const BINARY_SUBTYPE_SENSITIVE: u8 = 0x08; -const BINARY_SUBTYPE_USER_DEFINED: u8 = 0x80; - -/// All available BSON element types. -/// -/// Not all element types are representable by the [`Bson`](crate::Bson) type. -#[repr(u8)] -#[derive(Debug, Eq, PartialEq, Clone, Copy)] -pub enum ElementType { - /// 64-bit binary floating point - Double = ELEMENT_TYPE_FLOATING_POINT, - /// UTF-8 string - String = ELEMENT_TYPE_UTF8_STRING, - /// Embedded document - EmbeddedDocument = ELEMENT_TYPE_EMBEDDED_DOCUMENT, - /// Array - Array = ELEMENT_TYPE_ARRAY, - /// Binary data - Binary = ELEMENT_TYPE_BINARY, - /// Deprecated. Undefined (value) - Undefined = ELEMENT_TYPE_UNDEFINED, - /// [ObjectId](http://dochub.mongodb.org/core/objectids) - ObjectId = ELEMENT_TYPE_OBJECT_ID, - /// Bool value - Boolean = ELEMENT_TYPE_BOOLEAN, - /// UTC datetime - DateTime = ELEMENT_TYPE_UTC_DATETIME, - /// Null value - Null = ELEMENT_TYPE_NULL_VALUE, - /// Regular expression - The first cstring is the regex pattern, the second is the regex - /// options string. Options are identified by characters, which must be stored in - /// alphabetical order. Valid options are 'i' for case insensitive matching, 'm' for - /// multiline matching, 'x' for verbose mode, 'l' to make \w, \W, etc. locale dependent, - /// 's' for dotall mode ('.' matches everything), and 'u' to make \w, \W, etc. match - /// unicode. - RegularExpression = ELEMENT_TYPE_REGULAR_EXPRESSION, - /// Deprecated. - DbPointer = ELEMENT_TYPE_DBPOINTER, - /// JavaScript code - JavaScriptCode = ELEMENT_TYPE_JAVASCRIPT_CODE, - /// Deprecated. - Symbol = ELEMENT_TYPE_SYMBOL, - /// JavaScript code w/ scope - JavaScriptCodeWithScope = ELEMENT_TYPE_JAVASCRIPT_CODE_WITH_SCOPE, - /// 32-bit integer - Int32 = ELEMENT_TYPE_32BIT_INTEGER, - /// Timestamp - Timestamp = ELEMENT_TYPE_TIMESTAMP, - /// 64-bit integer - Int64 = ELEMENT_TYPE_64BIT_INTEGER, - /// [128-bit decimal floating point](https://github.com/mongodb/specifications/blob/master/source/bson-decimal128/decimal128.rst) - Decimal128 = ELEMENT_TYPE_128BIT_DECIMAL, - MaxKey = ELEMENT_TYPE_MAXKEY, - MinKey = ELEMENT_TYPE_MINKEY, -} - -impl ElementType { - /// Attempt to convert from a `u8`. - #[inline] - pub fn from(tag: u8) -> Option { - use self::ElementType::*; - Some(match tag { - ELEMENT_TYPE_FLOATING_POINT => Self::Double, - ELEMENT_TYPE_UTF8_STRING => Self::String, - ELEMENT_TYPE_EMBEDDED_DOCUMENT => EmbeddedDocument, - ELEMENT_TYPE_ARRAY => Array, - ELEMENT_TYPE_BINARY => Binary, - ELEMENT_TYPE_UNDEFINED => Undefined, - ELEMENT_TYPE_OBJECT_ID => ObjectId, - ELEMENT_TYPE_BOOLEAN => Boolean, - ELEMENT_TYPE_UTC_DATETIME => Self::DateTime, - ELEMENT_TYPE_NULL_VALUE => Self::Null, - ELEMENT_TYPE_REGULAR_EXPRESSION => RegularExpression, - ELEMENT_TYPE_DBPOINTER => DbPointer, - ELEMENT_TYPE_JAVASCRIPT_CODE => JavaScriptCode, - ELEMENT_TYPE_SYMBOL => Symbol, - ELEMENT_TYPE_JAVASCRIPT_CODE_WITH_SCOPE => JavaScriptCodeWithScope, - ELEMENT_TYPE_32BIT_INTEGER => Int32, - ELEMENT_TYPE_TIMESTAMP => Timestamp, - ELEMENT_TYPE_64BIT_INTEGER => Int64, - ELEMENT_TYPE_128BIT_DECIMAL => Decimal128, - ELEMENT_TYPE_MAXKEY => MaxKey, - ELEMENT_TYPE_MINKEY => MinKey, - _ => return None, - }) - } -} - -/// The available binary subtypes, plus a user-defined slot. -#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] -#[non_exhaustive] -pub enum BinarySubtype { - Generic, - Function, - BinaryOld, - UuidOld, - Uuid, - Md5, - Encrypted, - Column, - Sensitive, - UserDefined(u8), - Reserved(u8), -} - -impl From for u8 { - #[inline] - fn from(t: BinarySubtype) -> u8 { - match t { - BinarySubtype::Generic => BINARY_SUBTYPE_GENERIC, - BinarySubtype::Function => BINARY_SUBTYPE_FUNCTION, - BinarySubtype::BinaryOld => BINARY_SUBTYPE_BINARY_OLD, - BinarySubtype::UuidOld => BINARY_SUBTYPE_UUID_OLD, - BinarySubtype::Uuid => BINARY_SUBTYPE_UUID, - BinarySubtype::Md5 => BINARY_SUBTYPE_MD5, - BinarySubtype::Encrypted => BINARY_SUBTYPE_ENCRYPTED, - BinarySubtype::Column => BINARY_SUBTYPE_COLUMN, - BinarySubtype::Sensitive => BINARY_SUBTYPE_SENSITIVE, - BinarySubtype::UserDefined(x) => x, - BinarySubtype::Reserved(x) => x, - } - } -} - -impl From for BinarySubtype { - #[inline] - fn from(t: u8) -> BinarySubtype { - match t { - BINARY_SUBTYPE_GENERIC => BinarySubtype::Generic, - BINARY_SUBTYPE_FUNCTION => BinarySubtype::Function, - BINARY_SUBTYPE_BINARY_OLD => BinarySubtype::BinaryOld, - BINARY_SUBTYPE_UUID_OLD => BinarySubtype::UuidOld, - BINARY_SUBTYPE_UUID => BinarySubtype::Uuid, - BINARY_SUBTYPE_MD5 => BinarySubtype::Md5, - BINARY_SUBTYPE_ENCRYPTED => BinarySubtype::Encrypted, - BINARY_SUBTYPE_COLUMN => BinarySubtype::Column, - BINARY_SUBTYPE_SENSITIVE => BinarySubtype::Sensitive, - _ if t < BINARY_SUBTYPE_USER_DEFINED => BinarySubtype::Reserved(t), - _ => BinarySubtype::UserDefined(t), - } - } -} From 0d24c9fcaeac535fe42f74a11c53023c8726a5ee Mon Sep 17 00:00:00 2001 From: Patrick Meredith Date: Fri, 13 Dec 2024 11:35:31 -0500 Subject: [PATCH 05/10] Devin moved the spec file --- src/spec/fmt.rs | 9 +++ src/spec/mod.rs | 202 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+) create mode 100644 src/spec/fmt.rs create mode 100644 src/spec/mod.rs diff --git a/src/spec/fmt.rs b/src/spec/fmt.rs new file mode 100644 index 00000000..1cbb6fb3 --- /dev/null +++ b/src/spec/fmt.rs @@ -0,0 +1,9 @@ +use std::fmt; +use crate::spec::BinarySubtype; + +impl fmt::LowerHex for BinarySubtype { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let value: u8 = (*self).into(); + fmt::LowerHex::fmt(&value, f) + } +} diff --git a/src/spec/mod.rs b/src/spec/mod.rs new file mode 100644 index 00000000..bce083d3 --- /dev/null +++ b/src/spec/mod.rs @@ -0,0 +1,202 @@ +// The MIT License (MIT) + +// Copyright (c) 2015 Y. T. Chung + +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software is furnished to do so, +// subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +//! Constants derived from the [BSON Specification Version 1.1](http://bsonspec.org/spec.html). + +use std::convert::From; + +mod fmt; +#[allow(unused_imports)] +pub use self::fmt::*; + +const ELEMENT_TYPE_FLOATING_POINT: u8 = 0x01; +const ELEMENT_TYPE_UTF8_STRING: u8 = 0x02; +const ELEMENT_TYPE_EMBEDDED_DOCUMENT: u8 = 0x03; +const ELEMENT_TYPE_ARRAY: u8 = 0x04; +const ELEMENT_TYPE_BINARY: u8 = 0x05; +const ELEMENT_TYPE_UNDEFINED: u8 = 0x06; // Deprecated +const ELEMENT_TYPE_OBJECT_ID: u8 = 0x07; +const ELEMENT_TYPE_BOOLEAN: u8 = 0x08; +const ELEMENT_TYPE_UTC_DATETIME: u8 = 0x09; +const ELEMENT_TYPE_NULL_VALUE: u8 = 0x0A; +const ELEMENT_TYPE_REGULAR_EXPRESSION: u8 = 0x0B; +const ELEMENT_TYPE_DBPOINTER: u8 = 0x0C; // Deprecated +const ELEMENT_TYPE_JAVASCRIPT_CODE: u8 = 0x0D; +const ELEMENT_TYPE_SYMBOL: u8 = 0x0E; // Deprecated +const ELEMENT_TYPE_JAVASCRIPT_CODE_WITH_SCOPE: u8 = 0x0F; +const ELEMENT_TYPE_32BIT_INTEGER: u8 = 0x10; +const ELEMENT_TYPE_TIMESTAMP: u8 = 0x11; +const ELEMENT_TYPE_64BIT_INTEGER: u8 = 0x12; +#[allow(unused)] +const ELEMENT_TYPE_128BIT_DECIMAL: u8 = 0x13; +const ELEMENT_TYPE_MINKEY: u8 = 0xFF; +const ELEMENT_TYPE_MAXKEY: u8 = 0x7F; + +const BINARY_SUBTYPE_GENERIC: u8 = 0x00; +const BINARY_SUBTYPE_FUNCTION: u8 = 0x01; +const BINARY_SUBTYPE_BINARY_OLD: u8 = 0x02; +const BINARY_SUBTYPE_UUID_OLD: u8 = 0x03; +const BINARY_SUBTYPE_UUID: u8 = 0x04; +const BINARY_SUBTYPE_MD5: u8 = 0x05; +const BINARY_SUBTYPE_ENCRYPTED: u8 = 0x06; +const BINARY_SUBTYPE_COLUMN: u8 = 0x07; +const BINARY_SUBTYPE_SENSITIVE: u8 = 0x08; +const BINARY_SUBTYPE_USER_DEFINED: u8 = 0x80; + +/// All available BSON element types. +/// +/// Not all element types are representable by the [`Bson`](crate::Bson) type. +#[repr(u8)] +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub enum ElementType { + /// 64-bit binary floating point + Double = ELEMENT_TYPE_FLOATING_POINT, + /// UTF-8 string + String = ELEMENT_TYPE_UTF8_STRING, + /// Embedded document + EmbeddedDocument = ELEMENT_TYPE_EMBEDDED_DOCUMENT, + /// Array + Array = ELEMENT_TYPE_ARRAY, + /// Binary data + Binary = ELEMENT_TYPE_BINARY, + /// Deprecated. Undefined (value) + Undefined = ELEMENT_TYPE_UNDEFINED, + /// [ObjectId](http://dochub.mongodb.org/core/objectids) + ObjectId = ELEMENT_TYPE_OBJECT_ID, + /// Bool value + Boolean = ELEMENT_TYPE_BOOLEAN, + /// UTC datetime + DateTime = ELEMENT_TYPE_UTC_DATETIME, + /// Null value + Null = ELEMENT_TYPE_NULL_VALUE, + /// Regular expression - The first cstring is the regex pattern, the second is the regex + /// options string. Options are identified by characters, which must be stored in + /// alphabetical order. Valid options are 'i' for case insensitive matching, 'm' for + /// multiline matching, 'x' for verbose mode, 'l' to make \w, \W, etc. locale dependent, + /// 's' for dotall mode ('.' matches everything), and 'u' to make \w, \W, etc. match + /// unicode. + RegularExpression = ELEMENT_TYPE_REGULAR_EXPRESSION, + /// Deprecated. + DbPointer = ELEMENT_TYPE_DBPOINTER, + /// JavaScript code + JavaScriptCode = ELEMENT_TYPE_JAVASCRIPT_CODE, + /// Deprecated. + Symbol = ELEMENT_TYPE_SYMBOL, + /// JavaScript code w/ scope + JavaScriptCodeWithScope = ELEMENT_TYPE_JAVASCRIPT_CODE_WITH_SCOPE, + /// 32-bit integer + Int32 = ELEMENT_TYPE_32BIT_INTEGER, + /// Timestamp + Timestamp = ELEMENT_TYPE_TIMESTAMP, + /// 64-bit integer + Int64 = ELEMENT_TYPE_64BIT_INTEGER, + /// [128-bit decimal floating point](https://github.com/mongodb/specifications/blob/master/source/bson-decimal128/decimal128.rst) + Decimal128 = ELEMENT_TYPE_128BIT_DECIMAL, + MaxKey = ELEMENT_TYPE_MAXKEY, + MinKey = ELEMENT_TYPE_MINKEY, +} + +impl ElementType { + /// Attempt to convert from a `u8`. + #[inline] + pub fn from(tag: u8) -> Option { + use self::ElementType::*; + Some(match tag { + ELEMENT_TYPE_FLOATING_POINT => Self::Double, + ELEMENT_TYPE_UTF8_STRING => Self::String, + ELEMENT_TYPE_EMBEDDED_DOCUMENT => EmbeddedDocument, + ELEMENT_TYPE_ARRAY => Array, + ELEMENT_TYPE_BINARY => Binary, + ELEMENT_TYPE_UNDEFINED => Undefined, + ELEMENT_TYPE_OBJECT_ID => ObjectId, + ELEMENT_TYPE_BOOLEAN => Boolean, + ELEMENT_TYPE_UTC_DATETIME => Self::DateTime, + ELEMENT_TYPE_NULL_VALUE => Self::Null, + ELEMENT_TYPE_REGULAR_EXPRESSION => RegularExpression, + ELEMENT_TYPE_DBPOINTER => DbPointer, + ELEMENT_TYPE_JAVASCRIPT_CODE => JavaScriptCode, + ELEMENT_TYPE_SYMBOL => Symbol, + ELEMENT_TYPE_JAVASCRIPT_CODE_WITH_SCOPE => JavaScriptCodeWithScope, + ELEMENT_TYPE_32BIT_INTEGER => Int32, + ELEMENT_TYPE_TIMESTAMP => Timestamp, + ELEMENT_TYPE_64BIT_INTEGER => Int64, + ELEMENT_TYPE_128BIT_DECIMAL => Decimal128, + ELEMENT_TYPE_MAXKEY => MaxKey, + ELEMENT_TYPE_MINKEY => MinKey, + _ => return None, + }) + } +} + +/// The available binary subtypes, plus a user-defined slot. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +#[non_exhaustive] +pub enum BinarySubtype { + Generic, + Function, + BinaryOld, + UuidOld, + Uuid, + Md5, + Encrypted, + Column, + Sensitive, + UserDefined(u8), + Reserved(u8), +} + +impl From for u8 { + #[inline] + fn from(t: BinarySubtype) -> u8 { + match t { + BinarySubtype::Generic => BINARY_SUBTYPE_GENERIC, + BinarySubtype::Function => BINARY_SUBTYPE_FUNCTION, + BinarySubtype::BinaryOld => BINARY_SUBTYPE_BINARY_OLD, + BinarySubtype::UuidOld => BINARY_SUBTYPE_UUID_OLD, + BinarySubtype::Uuid => BINARY_SUBTYPE_UUID, + BinarySubtype::Md5 => BINARY_SUBTYPE_MD5, + BinarySubtype::Encrypted => BINARY_SUBTYPE_ENCRYPTED, + BinarySubtype::Column => BINARY_SUBTYPE_COLUMN, + BinarySubtype::Sensitive => BINARY_SUBTYPE_SENSITIVE, + BinarySubtype::UserDefined(x) => x, + BinarySubtype::Reserved(x) => x, + } + } +} + +impl From for BinarySubtype { + #[inline] + fn from(t: u8) -> BinarySubtype { + match t { + BINARY_SUBTYPE_GENERIC => BinarySubtype::Generic, + BINARY_SUBTYPE_FUNCTION => BinarySubtype::Function, + BINARY_SUBTYPE_BINARY_OLD => BinarySubtype::BinaryOld, + BINARY_SUBTYPE_UUID_OLD => BinarySubtype::UuidOld, + BINARY_SUBTYPE_UUID => BinarySubtype::Uuid, + BINARY_SUBTYPE_MD5 => BinarySubtype::Md5, + BINARY_SUBTYPE_ENCRYPTED => BinarySubtype::Encrypted, + BINARY_SUBTYPE_COLUMN => BinarySubtype::Column, + BINARY_SUBTYPE_SENSITIVE => BinarySubtype::Sensitive, + _ if t < BINARY_SUBTYPE_USER_DEFINED => BinarySubtype::Reserved(t), + _ => BinarySubtype::UserDefined(t), + } + } +} From aabac7c2d5976ddfaf96f9f63d63601011b00ef6 Mon Sep 17 00:00:00 2001 From: Patrick Meredith Date: Mon, 16 Dec 2024 16:23:26 -0500 Subject: [PATCH 06/10] Cleanup some of Devin's silliness --- fuzz/fuzz_targets/malformed_length.rs | 14 ++-- fuzz/fuzz_targets/serialization.rs | 105 +++++++++++++++----------- fuzz/fuzz_targets/string_handling.rs | 6 +- fuzz/generate_corpus.rs | 31 +++++--- src/{spec/mod.rs => spec.rs} | 11 ++- src/spec/fmt.rs | 9 --- 6 files changed, 97 insertions(+), 79 deletions(-) rename src/{spec/mod.rs => spec.rs} (97%) delete mode 100644 src/spec/fmt.rs diff --git a/fuzz/fuzz_targets/malformed_length.rs b/fuzz/fuzz_targets/malformed_length.rs index 53ebf147..a007e146 100644 --- a/fuzz/fuzz_targets/malformed_length.rs +++ b/fuzz/fuzz_targets/malformed_length.rs @@ -1,20 +1,20 @@ //! BSON Document Length Field Fuzzer //! -//! This fuzz test focuses on finding security vulnerabilities related to BSON document length fields. -//! It specifically targets: +//! This fuzz test focuses on finding security vulnerabilities related to BSON document length +//! fields. It specifically targets: //! - Integer overflow/underflow in length calculations //! - Malformed length fields that could cause buffer overruns //! - Mismatches between declared and actual document sizes //! - Memory allocation issues with large or invalid lengths #![no_main] -#[macro_use] extern crate libfuzzer_sys; +#[macro_use] +extern crate libfuzzer_sys; extern crate bson; use bson::RawDocument; fuzz_target!(|buf: &[u8]| { - if buf.len() >= 4 { - // Focus on document length field manipulation - let _ = RawDocument::from_bytes(buf); - } + // Focus on document length field manipulation + // This should return an error if the buf.len() < 4 rather than panic. + let _ = RawDocument::from_bytes(buf); }); diff --git a/fuzz/fuzz_targets/serialization.rs b/fuzz/fuzz_targets/serialization.rs index ccdf22a7..ac779142 100644 --- a/fuzz/fuzz_targets/serialization.rs +++ b/fuzz/fuzz_targets/serialization.rs @@ -13,8 +13,6 @@ fn convert_bson_ref(bson_ref: RawBsonRef) -> Option { RawBsonRef::Double(d) => { if d.is_nan() { Some(RawBsonRef::Double(f64::NAN).to_raw_bson()) - } else if d.is_infinite() { - Some(RawBsonRef::Double(d).to_raw_bson()) } else { Some(RawBsonRef::Double(d).to_raw_bson()) } @@ -62,25 +60,23 @@ fn convert_bson_ref(bson_ref: RawBsonRef) -> Option { } } RawBsonRef::Binary(b) => { - if b.bytes.len() <= i32::MAX as usize && - match b.subtype { - BinarySubtype::Generic | - BinarySubtype::Function | - BinarySubtype::BinaryOld | - BinarySubtype::UuidOld | - BinarySubtype::Uuid | - BinarySubtype::Md5 | - BinarySubtype::UserDefined(_) => true, - _ => false - } { + if b.bytes.len() <= i32::MAX as usize + && match b.subtype { + BinarySubtype::Generic + | BinarySubtype::Function + | BinarySubtype::BinaryOld + | BinarySubtype::UuidOld + | BinarySubtype::Uuid + | BinarySubtype::Md5 + | BinarySubtype::UserDefined(_) => true, + _ => false, + } + { Some(RawBsonRef::Binary(b).to_raw_bson()) } else { None } } - RawBsonRef::ObjectId(id) => Some(RawBsonRef::ObjectId(id).to_raw_bson()), - RawBsonRef::Boolean(b) => Some(RawBsonRef::Boolean(b).to_raw_bson()), - RawBsonRef::Null => Some(RawBsonRef::Null.to_raw_bson()), RawBsonRef::RegularExpression(regex) => { let valid_options = "ilmsux"; let mut options_sorted = regex.options.chars().collect::>(); @@ -88,9 +84,10 @@ fn convert_bson_ref(bson_ref: RawBsonRef) -> Option { options_sorted.dedup(); let sorted_str: String = options_sorted.into_iter().collect(); - if sorted_str.chars().all(|c| valid_options.contains(c)) && - !regex.pattern.contains('\0') && - regex.pattern.len() <= (i32::MAX as usize) { + if sorted_str.chars().all(|c| valid_options.contains(c)) + && !regex.pattern.contains('\0') + && regex.pattern.len() <= (i32::MAX as usize) + { Some(RawBsonRef::RegularExpression(regex).to_raw_bson()) } else { None @@ -104,9 +101,10 @@ fn convert_bson_ref(bson_ref: RawBsonRef) -> Option { } } RawBsonRef::JavaScriptCodeWithScope(code_w_scope) => { - if !code_w_scope.code.is_empty() && - !code_w_scope.code.contains('\0') && - code_w_scope.code.len() <= (i32::MAX as usize) { + if !code_w_scope.code.is_empty() + && !code_w_scope.code.contains('\0') + && code_w_scope.code.len() <= (i32::MAX as usize) + { Some(RawBsonRef::JavaScriptCodeWithScope(code_w_scope).to_raw_bson()) } else { None @@ -123,10 +121,6 @@ fn convert_bson_ref(bson_ref: RawBsonRef) -> Option { None } } - RawBsonRef::Int32(i) => Some(RawBsonRef::Int32(i).to_raw_bson()), - RawBsonRef::Int64(i) => Some(RawBsonRef::Int64(i).to_raw_bson()), - RawBsonRef::Timestamp(ts) => Some(RawBsonRef::Timestamp(ts).to_raw_bson()), - RawBsonRef::DateTime(dt) => Some(RawBsonRef::DateTime(dt).to_raw_bson()), RawBsonRef::Decimal128(d) => { let d_str = d.to_string(); if d_str.contains("NaN") { @@ -145,9 +139,7 @@ fn convert_bson_ref(bson_ref: RawBsonRef) -> Option { Some(RawBsonRef::Decimal128(d).to_raw_bson()) } } - RawBsonRef::MinKey => Some(RawBsonRef::MinKey.to_raw_bson()), - RawBsonRef::MaxKey => Some(RawBsonRef::MaxKey.to_raw_bson()), - RawBsonRef::Undefined => Some(RawBsonRef::Undefined.to_raw_bson()), + other => Some(other.to_raw_bson()), } } @@ -165,25 +157,42 @@ fuzz_target!(|input: Input| { match value { RawBsonRef::Double(d) if d.is_nan() => { if let Some(converted_ref) = converted.as_raw_bson_ref().as_f64() { - assert!(converted_ref.is_nan(), - "NaN comparison failed for key: {}", key); + assert!( + converted_ref.is_nan(), + "NaN comparison failed for key: {}", + key + ); } } RawBsonRef::Double(d) if d.is_infinite() => { if let Some(converted_ref) = converted.as_raw_bson_ref().as_f64() { - assert_eq!(d.is_sign_positive(), converted_ref.is_sign_positive(), - "Infinity sign mismatch for key: {}", key); - assert!(converted_ref.is_infinite(), - "Infinity comparison failed for key: {}", key); + assert_eq!( + d.is_sign_positive(), + converted_ref.is_sign_positive(), + "Infinity sign mismatch for key: {}", + key + ); + assert!( + converted_ref.is_infinite(), + "Infinity comparison failed for key: {}", + key + ); } } RawBsonRef::Decimal128(d) if d.to_string().contains("NaN") => { match converted.as_raw_bson_ref() { RawBsonRef::Decimal128(cd) => { - assert!(cd.to_string().contains("NaN"), - "Decimal128 NaN comparison failed for key: {}", key); + assert!( + cd.to_string().contains("NaN"), + "Decimal128 NaN comparison failed for key: {}", + key + ); } - _ => panic!("Type mismatch: expected Decimal128, got different type for key: {}", key), + _ => panic!( + "Type mismatch: expected Decimal128, got different type for \ + key: {}", + key + ), } } RawBsonRef::Decimal128(d) if d.to_string().contains("Infinity") => { @@ -191,15 +200,25 @@ fuzz_target!(|input: Input| { RawBsonRef::Decimal128(cd) => { let d_str = d.to_string(); let cd_str = cd.to_string(); - assert_eq!(d_str, cd_str, - "Decimal128 Infinity comparison failed for key: {}", key); + assert_eq!( + d_str, cd_str, + "Decimal128 Infinity comparison failed for key: {}", + key + ); } - _ => panic!("Type mismatch: expected Decimal128, got different type for key: {}", key), + _ => panic!( + "Type mismatch: expected Decimal128, got different type for \ + key: {}", + key + ), } } _ => { - assert_eq!(converted, original_bytes, - "Serialization mismatch for key: {}", key); + assert_eq!( + converted, original_bytes, + "Serialization mismatch for key: {}", + key + ); } } } diff --git a/fuzz/fuzz_targets/string_handling.rs b/fuzz/fuzz_targets/string_handling.rs index 502a4135..090d132f 100644 --- a/fuzz/fuzz_targets/string_handling.rs +++ b/fuzz/fuzz_targets/string_handling.rs @@ -2,7 +2,7 @@ #[macro_use] extern crate libfuzzer_sys; extern crate bson; -use bson::{spec::BinarySubtype, RawBsonRef, RawDocument}; +use bson::{RawBsonRef, RawDocument}; use std::convert::TryInto; fuzz_target!(|buf: &[u8]| { @@ -15,10 +15,6 @@ fuzz_target!(|buf: &[u8]| { let _ = s.len(); let _ = s.chars().count(); } - RawBsonRef::Binary(b) if b.subtype == BinarySubtype::Generic => { - // Test UTF-8 validation on binary data - let _ = std::str::from_utf8(b.bytes); - } _ => {} } } diff --git a/fuzz/generate_corpus.rs b/fuzz/generate_corpus.rs index 88033a33..c67c4cfc 100644 --- a/fuzz/generate_corpus.rs +++ b/fuzz/generate_corpus.rs @@ -1,8 +1,10 @@ use bson::{doc, Bson, Decimal128}; -use std::fs; -use std::path::Path; -use std::io::{Error, ErrorKind}; -use std::str::FromStr; +use std::{ + fs, + io::{Error, ErrorKind}, + path::Path, + str::FromStr, +}; fn main() -> std::io::Result<()> { let corpus_dir = Path::new("fuzz/corpus"); @@ -20,18 +22,21 @@ fn generate_length_edge_cases(dir: &Path) -> std::io::Result<()> { let target_dir = dir.join("malformed_length"); fs::create_dir_all(&target_dir)?; + // Invalid length + fs::write(target_dir.join("invalid_len"), vec![4, 5])?; + // Minimal valid document let min_doc = doc! {}; fs::write( target_dir.join("min_doc"), - bson::to_vec(&min_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))? + bson::to_vec(&min_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?, )?; // Document with length near i32::MAX let large_doc = doc! { "a": "b".repeat(i32::MAX as usize / 2) }; fs::write( target_dir.join("large_doc"), - bson::to_vec(&large_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))? + bson::to_vec(&large_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?, )?; Ok(()) @@ -68,7 +73,7 @@ fn generate_type_marker_cases(dir: &Path) -> std::io::Result<()> { }; fs::write( target_dir.join("all_types"), - bson::to_vec(&all_types).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))? + bson::to_vec(&all_types).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?, )?; Ok(()) @@ -95,7 +100,7 @@ fn generate_string_edge_cases(dir: &Path) -> std::io::Result<()> { }; fs::write( target_dir.join("utf8_cases"), - bson::to_vec(&utf8_cases).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))? + bson::to_vec(&utf8_cases).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?, )?; Ok(()) @@ -111,11 +116,15 @@ fn generate_serialization_cases(dir: &Path) -> std::io::Result<()> { for i in 0..100 { let next_doc = doc! {}; current.insert(i.to_string(), next_doc); - current = current.get_mut(&i.to_string()).unwrap().as_document_mut().unwrap(); + current = current + .get_mut(&i.to_string()) + .unwrap() + .as_document_mut() + .unwrap(); } fs::write( target_dir.join("nested_doc"), - bson::to_vec(&nested_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))? + bson::to_vec(&nested_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?, )?; // Document with large binary data @@ -127,7 +136,7 @@ fn generate_serialization_cases(dir: &Path) -> std::io::Result<()> { }; fs::write( target_dir.join("large_binary"), - bson::to_vec(&large_binary).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))? + bson::to_vec(&large_binary).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?, )?; Ok(()) diff --git a/src/spec/mod.rs b/src/spec.rs similarity index 97% rename from src/spec/mod.rs rename to src/spec.rs index bce083d3..e853c150 100644 --- a/src/spec/mod.rs +++ b/src/spec.rs @@ -21,11 +21,14 @@ //! Constants derived from the [BSON Specification Version 1.1](http://bsonspec.org/spec.html). -use std::convert::From; +use std::{convert::From, fmt}; -mod fmt; -#[allow(unused_imports)] -pub use self::fmt::*; +impl fmt::LowerHex for BinarySubtype { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let value: u8 = (*self).into(); + fmt::LowerHex::fmt(&value, f) + } +} const ELEMENT_TYPE_FLOATING_POINT: u8 = 0x01; const ELEMENT_TYPE_UTF8_STRING: u8 = 0x02; diff --git a/src/spec/fmt.rs b/src/spec/fmt.rs deleted file mode 100644 index 1cbb6fb3..00000000 --- a/src/spec/fmt.rs +++ /dev/null @@ -1,9 +0,0 @@ -use std::fmt; -use crate::spec::BinarySubtype; - -impl fmt::LowerHex for BinarySubtype { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let value: u8 = (*self).into(); - fmt::LowerHex::fmt(&value, f) - } -} From f490e93ba54b3c860252f3ac299ba3e8528bb7c7 Mon Sep 17 00:00:00 2001 From: Patrick Meredith Date: Mon, 16 Dec 2024 17:34:17 -0500 Subject: [PATCH 07/10] Fix the strange serialization test --- fuzz/Cargo.toml | 4 - fuzz/fuzz_targets/serialization.rs | 232 ++--------------------------- 2 files changed, 14 insertions(+), 222 deletions(-) diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 2e79dbb5..abdcffb5 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -14,10 +14,6 @@ path = ".." [dependencies.libfuzzer-sys] version = "0.4.0" -[dependencies.arbitrary] -version = "1.3.0" -features = ["derive"] - [dependencies.serde] version = "1.0" diff --git a/fuzz/fuzz_targets/serialization.rs b/fuzz/fuzz_targets/serialization.rs index ac779142..82568bc1 100644 --- a/fuzz/fuzz_targets/serialization.rs +++ b/fuzz/fuzz_targets/serialization.rs @@ -1,226 +1,22 @@ #![no_main] -use arbitrary::Arbitrary; use bson::{ - raw::{RawBson, RawBsonRef, RawDocument}, - spec::BinarySubtype, - Decimal128, + raw::{RawDocument, RawDocumentBuf}, + Document, }; use libfuzzer_sys::fuzz_target; -use std::str::FromStr; -fn convert_bson_ref(bson_ref: RawBsonRef) -> Option { - match bson_ref { - RawBsonRef::Double(d) => { - if d.is_nan() { - Some(RawBsonRef::Double(f64::NAN).to_raw_bson()) - } else { - Some(RawBsonRef::Double(d).to_raw_bson()) - } - } - RawBsonRef::String(s) => { - if !s.is_empty() && !s.contains('\0') && s.len() <= (i32::MAX as usize) { - Some(RawBsonRef::String(s).to_raw_bson()) - } else { - None - } - } - RawBsonRef::Document(d) => { - let mut valid = true; - for result in d.iter() { - match result { - Ok((key, _)) if key.is_empty() || key.contains('\0') => { - valid = false; - break; - } - Err(_) => { - valid = false; - break; - } - _ => {} - } - } - if valid { - Some(RawBsonRef::Document(d).to_raw_bson()) - } else { - None - } - } - RawBsonRef::Array(a) => { - let mut valid = true; - for result in a.into_iter() { - if result.is_err() { - valid = false; - break; - } - } - if valid { - Some(RawBsonRef::Array(a).to_raw_bson()) - } else { - None - } - } - RawBsonRef::Binary(b) => { - if b.bytes.len() <= i32::MAX as usize - && match b.subtype { - BinarySubtype::Generic - | BinarySubtype::Function - | BinarySubtype::BinaryOld - | BinarySubtype::UuidOld - | BinarySubtype::Uuid - | BinarySubtype::Md5 - | BinarySubtype::UserDefined(_) => true, - _ => false, - } - { - Some(RawBsonRef::Binary(b).to_raw_bson()) - } else { - None - } - } - RawBsonRef::RegularExpression(regex) => { - let valid_options = "ilmsux"; - let mut options_sorted = regex.options.chars().collect::>(); - options_sorted.sort_unstable(); - options_sorted.dedup(); - let sorted_str: String = options_sorted.into_iter().collect(); - - if sorted_str.chars().all(|c| valid_options.contains(c)) - && !regex.pattern.contains('\0') - && regex.pattern.len() <= (i32::MAX as usize) - { - Some(RawBsonRef::RegularExpression(regex).to_raw_bson()) - } else { - None - } - } - RawBsonRef::JavaScriptCode(code) => { - if !code.is_empty() && !code.contains('\0') && code.len() <= (i32::MAX as usize) { - Some(RawBsonRef::JavaScriptCode(code).to_raw_bson()) - } else { - None - } - } - RawBsonRef::JavaScriptCodeWithScope(code_w_scope) => { - if !code_w_scope.code.is_empty() - && !code_w_scope.code.contains('\0') - && code_w_scope.code.len() <= (i32::MAX as usize) - { - Some(RawBsonRef::JavaScriptCodeWithScope(code_w_scope).to_raw_bson()) - } else { - None - } - } - RawBsonRef::DbPointer(ptr) => { - let raw_bson = RawBsonRef::DbPointer(ptr).to_raw_bson(); - Some(raw_bson) - } - RawBsonRef::Symbol(s) => { - if !s.is_empty() && !s.contains('\0') && s.len() <= i32::MAX as usize { - Some(RawBsonRef::Symbol(s).to_raw_bson()) - } else { - None - } - } - RawBsonRef::Decimal128(d) => { - let d_str = d.to_string(); - if d_str.contains("NaN") { - if let Ok(nan) = Decimal128::from_str("NaN") { - Some(RawBsonRef::Decimal128(nan).to_raw_bson()) - } else { - None - } - } else if d_str == "Infinity" || d_str == "-Infinity" { - if let Ok(val) = Decimal128::from_str(&d_str) { - Some(RawBsonRef::Decimal128(val).to_raw_bson()) - } else { - None - } - } else { - Some(RawBsonRef::Decimal128(d).to_raw_bson()) - } - } - other => Some(other.to_raw_bson()), - } -} - -#[derive(Debug, Arbitrary)] -struct Input { - bytes: Vec, -} - -fuzz_target!(|input: Input| { - if let Ok(doc) = RawDocument::from_bytes(&input.bytes) { - for result in doc.iter() { - if let Ok((key, value)) = result { - if let Some(converted) = convert_bson_ref(value) { - let original_bytes = value.to_raw_bson(); - match value { - RawBsonRef::Double(d) if d.is_nan() => { - if let Some(converted_ref) = converted.as_raw_bson_ref().as_f64() { - assert!( - converted_ref.is_nan(), - "NaN comparison failed for key: {}", - key - ); - } - } - RawBsonRef::Double(d) if d.is_infinite() => { - if let Some(converted_ref) = converted.as_raw_bson_ref().as_f64() { - assert_eq!( - d.is_sign_positive(), - converted_ref.is_sign_positive(), - "Infinity sign mismatch for key: {}", - key - ); - assert!( - converted_ref.is_infinite(), - "Infinity comparison failed for key: {}", - key - ); - } - } - RawBsonRef::Decimal128(d) if d.to_string().contains("NaN") => { - match converted.as_raw_bson_ref() { - RawBsonRef::Decimal128(cd) => { - assert!( - cd.to_string().contains("NaN"), - "Decimal128 NaN comparison failed for key: {}", - key - ); - } - _ => panic!( - "Type mismatch: expected Decimal128, got different type for \ - key: {}", - key - ), - } - } - RawBsonRef::Decimal128(d) if d.to_string().contains("Infinity") => { - match converted.as_raw_bson_ref() { - RawBsonRef::Decimal128(cd) => { - let d_str = d.to_string(); - let cd_str = cd.to_string(); - assert_eq!( - d_str, cd_str, - "Decimal128 Infinity comparison failed for key: {}", - key - ); - } - _ => panic!( - "Type mismatch: expected Decimal128, got different type for \ - key: {}", - key - ), - } - } - _ => { - assert_eq!( - converted, original_bytes, - "Serialization mismatch for key: {}", - key - ); - } - } +fuzz_target!(|input: &[u8]| { + if let Ok(rawdoc) = RawDocument::from_bytes(&input) { + if let Ok(doc) = Document::try_from(rawdoc) { + let out = RawDocumentBuf::try_from(&doc).unwrap(); + let out_bytes = out.as_bytes(); + if input != out_bytes { + let reserialized = RawDocument::from_bytes(&out_bytes).unwrap(); + let reserialized_doc = Document::try_from(reserialized).unwrap(); + // Ensure that the reserialized document is the same as the original document, the + // bytes can differ while still resulting in the same Document. + if doc != reserialized_doc { + panic!("reserialization failed"); } } } From ecfec8af45fd88ee69383542fd6125b918fa0196 Mon Sep 17 00:00:00 2001 From: Patrick Meredith Date: Mon, 16 Dec 2024 17:37:38 -0500 Subject: [PATCH 08/10] Use assert --- fuzz/fuzz_targets/serialization.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fuzz/fuzz_targets/serialization.rs b/fuzz/fuzz_targets/serialization.rs index 82568bc1..b889ca03 100644 --- a/fuzz/fuzz_targets/serialization.rs +++ b/fuzz/fuzz_targets/serialization.rs @@ -15,9 +15,7 @@ fuzz_target!(|input: &[u8]| { let reserialized_doc = Document::try_from(reserialized).unwrap(); // Ensure that the reserialized document is the same as the original document, the // bytes can differ while still resulting in the same Document. - if doc != reserialized_doc { - panic!("reserialization failed"); - } + assert_eq!(doc, reserialized_doc, "reserialization failed"); } } } From 100f70e6ca2b3906f00ab1c434adf956e7c660c5 Mon Sep 17 00:00:00 2001 From: Patrick Meredith Date: Mon, 16 Dec 2024 17:47:21 -0500 Subject: [PATCH 09/10] Still need to worry about double nan --- fuzz/fuzz_targets/serialization.rs | 35 +++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/fuzz/fuzz_targets/serialization.rs b/fuzz/fuzz_targets/serialization.rs index b889ca03..e5ba621d 100644 --- a/fuzz/fuzz_targets/serialization.rs +++ b/fuzz/fuzz_targets/serialization.rs @@ -1,10 +1,37 @@ #![no_main] use bson::{ raw::{RawDocument, RawDocumentBuf}, + Bson, Document, }; use libfuzzer_sys::fuzz_target; +fn compare_docs(doc1: &Document, doc2: &Document) -> bool { + if doc1.len() != doc2.len() { + return false; + } + for (key, value) in doc1 { + if !doc2.contains_key(key) { + return false; + } + if let Some(val2) = doc2.get(key) { + match (value, val2) { + (Bson::Double(d1), Bson::Double(d2)) => { + if (!d1.is_nan() || !d2.is_nan()) && d1 != d2 { + return false; + } + } + (v1, v2) => { + if v1 != v2 { + return false; + } + } + } + } + } + true +} + fuzz_target!(|input: &[u8]| { if let Ok(rawdoc) = RawDocument::from_bytes(&input) { if let Ok(doc) = Document::try_from(rawdoc) { @@ -15,7 +42,13 @@ fuzz_target!(|input: &[u8]| { let reserialized_doc = Document::try_from(reserialized).unwrap(); // Ensure that the reserialized document is the same as the original document, the // bytes can differ while still resulting in the same Document. - assert_eq!(doc, reserialized_doc, "reserialization failed"); + if !compare_docs(&doc, &reserialized_doc) { + panic!( + "Reserialized document is not the same as the original document: {:?} != \ + {:?}", + doc, reserialized_doc + ); + } } } } From 81d4a093e8738748c86e85ffd781d026fc393580 Mon Sep 17 00:00:00 2001 From: Patrick Meredith Date: Tue, 17 Dec 2024 17:27:26 -0500 Subject: [PATCH 10/10] Remove redundant test --- fuzz/fuzz_targets/malformed_length.rs | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 fuzz/fuzz_targets/malformed_length.rs diff --git a/fuzz/fuzz_targets/malformed_length.rs b/fuzz/fuzz_targets/malformed_length.rs deleted file mode 100644 index a007e146..00000000 --- a/fuzz/fuzz_targets/malformed_length.rs +++ /dev/null @@ -1,20 +0,0 @@ -//! BSON Document Length Field Fuzzer -//! -//! This fuzz test focuses on finding security vulnerabilities related to BSON document length -//! fields. It specifically targets: -//! - Integer overflow/underflow in length calculations -//! - Malformed length fields that could cause buffer overruns -//! - Mismatches between declared and actual document sizes -//! - Memory allocation issues with large or invalid lengths - -#![no_main] -#[macro_use] -extern crate libfuzzer_sys; -extern crate bson; -use bson::RawDocument; - -fuzz_target!(|buf: &[u8]| { - // Focus on document length field manipulation - // This should return an error if the buf.len() < 4 rather than panic. - let _ = RawDocument::from_bytes(buf); -});