-
Notifications
You must be signed in to change notification settings - Fork 145
New PR with Devin's complete changes #507
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
1839b0d
0bb5da2
35fb75a
b44f491
0d24c9f
ec09601
aabac7c
f490e93
ecfec8a
100f70e
81d4a09
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,15 +13,18 @@ stepback: true | |
command_type: system | ||
|
||
# Protect ourself against rogue test case, or curl gone wild, that runs forever | ||
# 12 minutes is the longest we'll ever run | ||
exec_timeout_secs: 3600 # 12 minutes is the longest we'll ever run | ||
# 60 minutes is the longest we'll ever run | ||
exec_timeout_secs: 3600 # 1 hour total for security-focused fuzzing | ||
|
||
# What to do when evergreen hits the timeout (`post:` tasks are run automatically) | ||
timeout: | ||
- command: shell.exec | ||
params: | ||
script: | | ||
ls -la | ||
echo "Fuzzing timed out. Collecting any available artifacts..." | ||
if [ -d "src/fuzz/artifacts" ]; then | ||
tar czf "${PROJECT_DIRECTORY}/crash-artifacts.tar.gz" src/fuzz/artifacts/ | ||
fi | ||
|
||
functions: | ||
"fetch source": | ||
|
@@ -154,7 +157,25 @@ functions: | |
- command: shell.exec | ||
params: | ||
script: | | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These evergreen changes are completely AI written except for the echo statement. Pretty impressive |
||
# Nothing needs to be done here | ||
# Archive crash artifacts if they exist and contain crashes | ||
if [ -d "src/fuzz/artifacts" ] && [ "$(ls -A src/fuzz/artifacts)" ]; then | ||
echo "Crashes found in artifacts directory. Creating archive..." | ||
tar czf "${PROJECT_DIRECTORY}/crash-artifacts.tar.gz" src/fuzz/artifacts/ | ||
else | ||
echo "No crashes found in artifacts directory. Skipping archive creation." | ||
fi | ||
# Upload crash artifacts if they exist | ||
- command: s3.put | ||
params: | ||
aws_key: ${aws_key} | ||
aws_secret: ${aws_secret} | ||
local_file: ${PROJECT_DIRECTORY}/crash-artifacts.tar.gz | ||
remote_file: ${CURRENT_VERSION}/crash-artifacts.tar.gz | ||
bucket: mciuploads | ||
permissions: public-read | ||
content_type: application/x-gzip | ||
optional: true | ||
|
||
pre: | ||
- func: "fetch source" | ||
- func: "install dependencies" | ||
|
@@ -259,4 +280,4 @@ buildvariants: | |
run_on: | ||
- ubuntu1804-test | ||
tasks: | ||
- name: "wasm-test" | ||
- name: "wasm-test" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,29 @@ | ||
|
||
[package] | ||
name = "bson-fuzz" | ||
version = "0.0.1" | ||
authors = ["Automatically generated"] | ||
publish = false | ||
edition = "2021" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd generally prefer to do edition bumps in their own PRs, but it doesn't look like it had any impact here otherwise. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Devin was not pleased that we didn't have an edition 😂 |
||
|
||
[package.metadata] | ||
cargo-fuzz = true | ||
|
||
[dependencies.bson] | ||
path = ".." | ||
|
||
[dependencies.libfuzzer-sys] | ||
version = "0.4.0" | ||
|
||
# Prevent this from interfering with workspaces | ||
[dependencies.arbitrary] | ||
version = "1.3.0" | ||
features = ["derive"] | ||
|
||
[dependencies.serde] | ||
version = "1.0" | ||
|
||
[dependencies.serde_json] | ||
version = "1.0" | ||
|
||
[workspace] | ||
members = ["."] | ||
|
||
|
@@ -32,3 +42,23 @@ path = "fuzz_targets/raw_deserialize.rs" | |
[[bin]] | ||
name = "raw_deserialize_utf8_lossy" | ||
path = "fuzz_targets/raw_deserialize_utf8_lossy.rs" | ||
|
||
[[bin]] | ||
name = "malformed_length" | ||
path = "fuzz_targets/malformed_length.rs" | ||
|
||
[[bin]] | ||
name = "type_markers" | ||
path = "fuzz_targets/type_markers.rs" | ||
|
||
[[bin]] | ||
name = "string_handling" | ||
path = "fuzz_targets/string_handling.rs" | ||
|
||
[[bin]] | ||
name = "serialization" | ||
path = "fuzz_targets/serialization.rs" | ||
|
||
[[bin]] | ||
name = "generate_corpus" | ||
path = "generate_corpus.rs" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
//! BSON Document Length Field Fuzzer | ||
//! | ||
//! This fuzz test focuses on finding security vulnerabilities related to BSON document length fields. | ||
//! It specifically targets: | ||
//! - Integer overflow/underflow in length calculations | ||
//! - Malformed length fields that could cause buffer overruns | ||
//! - Mismatches between declared and actual document sizes | ||
//! - Memory allocation issues with large or invalid lengths | ||
|
||
#![no_main] | ||
#[macro_use] extern crate libfuzzer_sys; | ||
extern crate bson; | ||
use bson::RawDocument; | ||
|
||
fuzz_target!(|buf: &[u8]| { | ||
if buf.len() >= 4 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why the length guard? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Devin got this from the docs and/or source, I agree it would be better to remove this guard. |
||
// Focus on document length field manipulation | ||
let _ = RawDocument::from_bytes(buf); | ||
} | ||
}); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,209 @@ | ||
#![no_main] | ||
use arbitrary::Arbitrary; | ||
use bson::{ | ||
raw::{RawBson, RawBsonRef, RawDocument}, | ||
spec::BinarySubtype, | ||
Decimal128, | ||
}; | ||
use libfuzzer_sys::fuzz_target; | ||
use std::str::FromStr; | ||
|
||
fn convert_bson_ref(bson_ref: RawBsonRef) -> Option<RawBson> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure I understand the intent of this function (or of the fuzz test in this file, for that matter). This function converts from a borrowed to an owned value (e.g. what The handling of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this is what it did when told to compare the input to the output |
||
match bson_ref { | ||
RawBsonRef::Double(d) => { | ||
if d.is_nan() { | ||
Some(RawBsonRef::Double(f64::NAN).to_raw_bson()) | ||
} else if d.is_infinite() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: this case can be collapsed into the final There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Presumably Devin would fix this kind of issue if asked. You're definitely right. In this case I think I'll fix it myself. |
||
Some(RawBsonRef::Double(d).to_raw_bson()) | ||
} else { | ||
Some(RawBsonRef::Double(d).to_raw_bson()) | ||
} | ||
} | ||
RawBsonRef::String(s) => { | ||
if !s.is_empty() && !s.contains('\0') && s.len() <= (i32::MAX as usize) { | ||
Some(RawBsonRef::String(s).to_raw_bson()) | ||
} else { | ||
None | ||
} | ||
} | ||
RawBsonRef::Document(d) => { | ||
let mut valid = true; | ||
for result in d.iter() { | ||
match result { | ||
Ok((key, _)) if key.is_empty() || key.contains('\0') => { | ||
valid = false; | ||
break; | ||
} | ||
Err(_) => { | ||
valid = false; | ||
break; | ||
} | ||
_ => {} | ||
} | ||
} | ||
if valid { | ||
Some(RawBsonRef::Document(d).to_raw_bson()) | ||
} else { | ||
None | ||
} | ||
} | ||
RawBsonRef::Array(a) => { | ||
let mut valid = true; | ||
for result in a.into_iter() { | ||
if result.is_err() { | ||
valid = false; | ||
break; | ||
} | ||
} | ||
if valid { | ||
Some(RawBsonRef::Array(a).to_raw_bson()) | ||
} else { | ||
None | ||
} | ||
} | ||
RawBsonRef::Binary(b) => { | ||
if b.bytes.len() <= i32::MAX as usize && | ||
match b.subtype { | ||
BinarySubtype::Generic | | ||
BinarySubtype::Function | | ||
BinarySubtype::BinaryOld | | ||
BinarySubtype::UuidOld | | ||
BinarySubtype::Uuid | | ||
BinarySubtype::Md5 | | ||
BinarySubtype::UserDefined(_) => true, | ||
_ => false | ||
} { | ||
Some(RawBsonRef::Binary(b).to_raw_bson()) | ||
} else { | ||
None | ||
} | ||
} | ||
RawBsonRef::ObjectId(id) => Some(RawBsonRef::ObjectId(id).to_raw_bson()), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This (and all the other cases with no extra logic) can just be collapsed down to a single
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is interesting. I often don't like catch-alls because they can cause errors when new types are added, but I don't think the bson spec will be adding types any time soon. |
||
RawBsonRef::Boolean(b) => Some(RawBsonRef::Boolean(b).to_raw_bson()), | ||
RawBsonRef::Null => Some(RawBsonRef::Null.to_raw_bson()), | ||
RawBsonRef::RegularExpression(regex) => { | ||
let valid_options = "ilmsux"; | ||
let mut options_sorted = regex.options.chars().collect::<Vec<_>>(); | ||
options_sorted.sort_unstable(); | ||
options_sorted.dedup(); | ||
let sorted_str: String = options_sorted.into_iter().collect(); | ||
|
||
if sorted_str.chars().all(|c| valid_options.contains(c)) && | ||
!regex.pattern.contains('\0') && | ||
regex.pattern.len() <= (i32::MAX as usize) { | ||
Some(RawBsonRef::RegularExpression(regex).to_raw_bson()) | ||
} else { | ||
None | ||
} | ||
} | ||
RawBsonRef::JavaScriptCode(code) => { | ||
if !code.is_empty() && !code.contains('\0') && code.len() <= (i32::MAX as usize) { | ||
Some(RawBsonRef::JavaScriptCode(code).to_raw_bson()) | ||
} else { | ||
None | ||
} | ||
} | ||
RawBsonRef::JavaScriptCodeWithScope(code_w_scope) => { | ||
if !code_w_scope.code.is_empty() && | ||
!code_w_scope.code.contains('\0') && | ||
code_w_scope.code.len() <= (i32::MAX as usize) { | ||
Some(RawBsonRef::JavaScriptCodeWithScope(code_w_scope).to_raw_bson()) | ||
} else { | ||
None | ||
} | ||
} | ||
RawBsonRef::DbPointer(ptr) => { | ||
let raw_bson = RawBsonRef::DbPointer(ptr).to_raw_bson(); | ||
Some(raw_bson) | ||
} | ||
RawBsonRef::Symbol(s) => { | ||
if !s.is_empty() && !s.contains('\0') && s.len() <= i32::MAX as usize { | ||
Some(RawBsonRef::Symbol(s).to_raw_bson()) | ||
} else { | ||
None | ||
} | ||
} | ||
RawBsonRef::Int32(i) => Some(RawBsonRef::Int32(i).to_raw_bson()), | ||
RawBsonRef::Int64(i) => Some(RawBsonRef::Int64(i).to_raw_bson()), | ||
RawBsonRef::Timestamp(ts) => Some(RawBsonRef::Timestamp(ts).to_raw_bson()), | ||
RawBsonRef::DateTime(dt) => Some(RawBsonRef::DateTime(dt).to_raw_bson()), | ||
RawBsonRef::Decimal128(d) => { | ||
let d_str = d.to_string(); | ||
if d_str.contains("NaN") { | ||
if let Ok(nan) = Decimal128::from_str("NaN") { | ||
Some(RawBsonRef::Decimal128(nan).to_raw_bson()) | ||
} else { | ||
None | ||
} | ||
} else if d_str == "Infinity" || d_str == "-Infinity" { | ||
if let Ok(val) = Decimal128::from_str(&d_str) { | ||
Some(RawBsonRef::Decimal128(val).to_raw_bson()) | ||
} else { | ||
None | ||
} | ||
} else { | ||
Some(RawBsonRef::Decimal128(d).to_raw_bson()) | ||
} | ||
} | ||
RawBsonRef::MinKey => Some(RawBsonRef::MinKey.to_raw_bson()), | ||
RawBsonRef::MaxKey => Some(RawBsonRef::MaxKey.to_raw_bson()), | ||
RawBsonRef::Undefined => Some(RawBsonRef::Undefined.to_raw_bson()), | ||
} | ||
} | ||
|
||
#[derive(Debug, Arbitrary)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I rewrote this test entirely. Going to have to say Devin failed on this one. |
||
struct Input { | ||
bytes: Vec<u8>, | ||
} | ||
|
||
fuzz_target!(|input: Input| { | ||
if let Ok(doc) = RawDocument::from_bytes(&input.bytes) { | ||
for result in doc.iter() { | ||
if let Ok((key, value)) = result { | ||
if let Some(converted) = convert_bson_ref(value) { | ||
let original_bytes = value.to_raw_bson(); | ||
match value { | ||
RawBsonRef::Double(d) if d.is_nan() => { | ||
if let Some(converted_ref) = converted.as_raw_bson_ref().as_f64() { | ||
assert!(converted_ref.is_nan(), | ||
"NaN comparison failed for key: {}", key); | ||
} | ||
} | ||
RawBsonRef::Double(d) if d.is_infinite() => { | ||
if let Some(converted_ref) = converted.as_raw_bson_ref().as_f64() { | ||
assert_eq!(d.is_sign_positive(), converted_ref.is_sign_positive(), | ||
"Infinity sign mismatch for key: {}", key); | ||
assert!(converted_ref.is_infinite(), | ||
"Infinity comparison failed for key: {}", key); | ||
} | ||
} | ||
RawBsonRef::Decimal128(d) if d.to_string().contains("NaN") => { | ||
match converted.as_raw_bson_ref() { | ||
RawBsonRef::Decimal128(cd) => { | ||
assert!(cd.to_string().contains("NaN"), | ||
"Decimal128 NaN comparison failed for key: {}", key); | ||
} | ||
_ => panic!("Type mismatch: expected Decimal128, got different type for key: {}", key), | ||
} | ||
} | ||
RawBsonRef::Decimal128(d) if d.to_string().contains("Infinity") => { | ||
match converted.as_raw_bson_ref() { | ||
RawBsonRef::Decimal128(cd) => { | ||
let d_str = d.to_string(); | ||
let cd_str = cd.to_string(); | ||
assert_eq!(d_str, cd_str, | ||
"Decimal128 Infinity comparison failed for key: {}", key); | ||
} | ||
_ => panic!("Type mismatch: expected Decimal128, got different type for key: {}", key), | ||
} | ||
} | ||
_ => { | ||
assert_eq!(converted, original_bytes, | ||
"Serialization mismatch for key: {}", key); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, devin actually updated this comment