Skip to content

RUST-1111 Support deserializing RawBson from Bson #331

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 27 additions & 18 deletions serde-tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use bson::{
RawDbPointerRef,
RawDocument,
RawDocumentBuf,
RawJavaScriptCodeWithScope,
RawJavaScriptCodeWithScopeRef,
RawRegexRef,
Regex,
Expand All @@ -52,9 +53,9 @@ use bson::{
/// - serializing the `expected_value` to a `Document` matches the `expected_doc`
/// - deserializing from the serialized document produces `expected_value`
/// - round trip through raw BSON:
/// - deserializing a `T` from the raw BSON version of `expected_doc` produces `expected_value`
/// - deserializing a `Document` from the raw BSON version of `expected_doc` produces
/// `expected_doc`
/// - serializing `expected_value` to BSON bytes matches the raw BSON bytes of `expected_doc`
/// - deserializing a `T` from the serialized bytes produces `expected_value`
/// - deserializing a `Document` from the serialized bytes produces `expected_doc`
/// - `bson::to_writer` and `Document::to_writer` produce the same result given the same input
fn run_test<T>(expected_value: &T, expected_doc: &Document, description: &str)
where
Expand Down Expand Up @@ -1244,17 +1245,34 @@ fn owned_raw_types() {

let oid = ObjectId::new();
let dt = DateTime::now();
let d128 = Decimal128::from_bytes([1; 16]);

let raw_code_w_scope = RawJavaScriptCodeWithScope {
code: "code".to_string(),
scope: RawDocumentBuf::new(),
};
let code_w_scope = JavaScriptCodeWithScope {
code: "code".to_string(),
scope: doc! {},
};

let f = Foo {
subdoc: RawDocumentBuf::from_iter([
("a key", RawBson::String("a value".to_string())),
("an objectid", RawBson::ObjectId(oid)),
("a date", RawBson::DateTime(dt)),
(
"code_w_scope",
RawBson::JavaScriptCodeWithScope(raw_code_w_scope.clone()),
),
("decimal128", RawBson::Decimal128(d128)),
]),
array: RawArrayBuf::from_iter([
RawBson::String("a string".to_string()),
RawBson::ObjectId(oid),
RawBson::DateTime(dt),
RawBson::JavaScriptCodeWithScope(raw_code_w_scope),
RawBson::Decimal128(d128),
]),
};

Expand All @@ -1263,28 +1281,19 @@ fn owned_raw_types() {
"a key": "a value",
"an objectid": oid,
"a date": dt,
"code_w_scope": code_w_scope.clone(),
"decimal128": d128,
},
"array": [
"a string",
oid,
dt
dt,
code_w_scope,
d128,
]
};

// TODO: RUST-1111
// can't use run_test here because deserializing RawDocumentBuf and RawArrayBuf
// from Bson or Document currently don't work.

let bytes = bson::to_vec(&expected).unwrap();

let deserialized: Foo = bson::from_slice(bytes.as_slice()).unwrap();
assert_eq!(deserialized, f);

let serialized = bson::to_document(&deserialized).unwrap();
assert_eq!(serialized, expected);

let serialized_bytes = bson::to_vec(&deserialized).unwrap();
assert_eq!(serialized_bytes, bytes);
run_test(&f, &expected, "owned_raw_types");
}

#[test]
Expand Down
24 changes: 18 additions & 6 deletions src/bson.rs
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ impl Bson {
/// This function mainly used for [extended JSON format](https://docs.mongodb.com/manual/reference/mongodb-extended-json/).
// TODO RUST-426: Investigate either removing this from the serde implementation or unifying
// with the extended JSON implementation.
pub(crate) fn into_extended_document(self) -> Document {
pub(crate) fn into_extended_document(self, rawbson: bool) -> Document {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rather than implementing various MapAccess structs, the Bson deserializer instead converts things into their "extended document" form and then uses a single MapAccess implementation to walk over the resultant document. The changes here modify this method to create the documents in the form that the raw BSON types expect where necessary.

match self {
Bson::RegularExpression(Regex {
ref pattern,
Expand Down Expand Up @@ -566,12 +566,21 @@ impl Bson {
}
}
}
Bson::Binary(Binary { subtype, ref bytes }) => {
Bson::Binary(Binary { subtype, bytes }) => {
let tval: u8 = From::from(subtype);
doc! {
"$binary": {
"base64": base64::encode(bytes),
"subType": hex::encode([tval]),
if rawbson {
doc! {
"$binary": {
"bytes": Binary { subtype: BinarySubtype::Generic, bytes },
"subType": Bson::Int32(tval.into())
}
}
} else {
doc! {
"$binary": {
"base64": base64::encode(bytes),
"subType": hex::encode([tval]),
}
}
}
}
Expand All @@ -580,6 +589,9 @@ impl Bson {
"$oid": v.to_string(),
}
}
Bson::DateTime(v) if rawbson => doc! {
"$date": v.timestamp_millis(),
},
Bson::DateTime(v) if v.timestamp_millis() >= 0 && v.to_chrono().year() <= 9999 => {
doc! {
"$date": v.to_rfc3339_string(),
Expand Down
16 changes: 16 additions & 0 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,22 @@ pub(crate) const MIN_BSON_DOCUMENT_SIZE: i32 = 4 + 1; // 4 bytes for length, one
pub(crate) const MIN_BSON_STRING_SIZE: i32 = 4 + 1; // 4 bytes for length, one byte for null terminator
pub(crate) const MIN_CODE_WITH_SCOPE_SIZE: i32 = 4 + MIN_BSON_STRING_SIZE + MIN_BSON_DOCUMENT_SIZE;

/// Hint provided to the deserializer via `deserialize_newtype_struct` as to the type of thing
/// being deserialized.
#[derive(Debug, Clone, Copy)]
enum DeserializerHint {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was originally just used in the raw deserializer, but now that the non-raw deserializer needs to interpret these hints, I've moved it to the top level of the de module.

/// No hint provided, deserialize normally.
None,

/// The type being deserialized expects the BSON to contain a binary value with the provided
/// subtype. This is currently used to deserialize `bson::Uuid` values.
BinarySubtype(BinarySubtype),

/// The type being deserialized is raw BSON, meaning no allocations should occur as part of
/// deserializing and everything should be visited via borrowing or `Copy` if possible.
RawBson,
}

/// Run the provided closure, ensuring that over the course of its execution, exactly `length` bytes
/// were read from the reader.
pub(crate) fn ensure_read_exactly<F, R>(
Expand Down
32 changes: 12 additions & 20 deletions src/de/raw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use crate::{
Bson,
DateTime,
Decimal128,
DeserializerOptions,
RawDocument,
Timestamp,
};
Expand All @@ -31,29 +32,14 @@ use super::{
read_i64,
read_string,
read_u8,
DeserializerHint,
Error,
Result,
MAX_BSON_SIZE,
MIN_CODE_WITH_SCOPE_SIZE,
};
use crate::de::serde::MapDeserializer;

/// Hint provided to the deserializer via `deserialize_newtype_struct` as to the type of thing
/// being deserialized.
#[derive(Debug, Clone, Copy)]
enum DeserializerHint {
/// No hint provided, deserialize normally.
None,

/// The type being deserialized expects the BSON to contain a binary value with the provided
/// subtype. This is currently used to deserialize `bson::Uuid` values.
BinarySubtype(BinarySubtype),

/// The type being deserialized is raw BSON, meaning no allocations should occur as part of
/// deserializing and everything should be visited via borrowing or `Copy`.
RawBson,
}

/// Deserializer used to parse and deserialize raw BSON bytes.
pub(crate) struct Deserializer<'de> {
bytes: BsonBuf<'de>,
Expand Down Expand Up @@ -307,8 +293,11 @@ impl<'de> Deserializer<'de> {
)),
_ => {
let code = read_string(&mut self.bytes, utf8_lossy)?;
let doc = Bson::JavaScriptCode(code).into_extended_document();
visitor.visit_map(MapDeserializer::new(doc))
let doc = Bson::JavaScriptCode(code).into_extended_document(false);
visitor.visit_map(MapDeserializer::new(
doc,
DeserializerOptions::builder().human_readable(false).build(),
))
}
}
}
Expand Down Expand Up @@ -361,8 +350,11 @@ impl<'de> Deserializer<'de> {
)),
_ => {
let symbol = read_string(&mut self.bytes, utf8_lossy)?;
let doc = Bson::Symbol(symbol).into_extended_document();
visitor.visit_map(MapDeserializer::new(doc))
let doc = Bson::Symbol(symbol).into_extended_document(false);
visitor.visit_map(MapDeserializer::new(
doc,
DeserializerOptions::builder().human_readable(false).build(),
))
}
}
}
Expand Down
Loading