diff --git a/serde-tests/test.rs b/serde-tests/test.rs index eeae4500..f860ef7f 100644 --- a/serde-tests/test.rs +++ b/serde-tests/test.rs @@ -39,6 +39,7 @@ use bson::{ RawDbPointerRef, RawDocument, RawDocumentBuf, + RawJavaScriptCodeWithScope, RawJavaScriptCodeWithScopeRef, RawRegexRef, Regex, @@ -52,9 +53,9 @@ use bson::{ /// - serializing the `expected_value` to a `Document` matches the `expected_doc` /// - deserializing from the serialized document produces `expected_value` /// - round trip through raw BSON: -/// - deserializing a `T` from the raw BSON version of `expected_doc` produces `expected_value` -/// - deserializing a `Document` from the raw BSON version of `expected_doc` produces -/// `expected_doc` +/// - serializing `expected_value` to BSON bytes matches the raw BSON bytes of `expected_doc` +/// - deserializing a `T` from the serialized bytes produces `expected_value` +/// - deserializing a `Document` from the serialized bytes produces `expected_doc` /// - `bson::to_writer` and `Document::to_writer` produce the same result given the same input fn run_test(expected_value: &T, expected_doc: &Document, description: &str) where @@ -1244,17 +1245,34 @@ fn owned_raw_types() { let oid = ObjectId::new(); let dt = DateTime::now(); + let d128 = Decimal128::from_bytes([1; 16]); + + let raw_code_w_scope = RawJavaScriptCodeWithScope { + code: "code".to_string(), + scope: RawDocumentBuf::new(), + }; + let code_w_scope = JavaScriptCodeWithScope { + code: "code".to_string(), + scope: doc! {}, + }; let f = Foo { subdoc: RawDocumentBuf::from_iter([ ("a key", RawBson::String("a value".to_string())), ("an objectid", RawBson::ObjectId(oid)), ("a date", RawBson::DateTime(dt)), + ( + "code_w_scope", + RawBson::JavaScriptCodeWithScope(raw_code_w_scope.clone()), + ), + ("decimal128", RawBson::Decimal128(d128)), ]), array: RawArrayBuf::from_iter([ RawBson::String("a string".to_string()), RawBson::ObjectId(oid), RawBson::DateTime(dt), + RawBson::JavaScriptCodeWithScope(raw_code_w_scope), + RawBson::Decimal128(d128), ]), }; @@ -1263,28 +1281,19 @@ fn owned_raw_types() { "a key": "a value", "an objectid": oid, "a date": dt, + "code_w_scope": code_w_scope.clone(), + "decimal128": d128, }, "array": [ "a string", oid, - dt + dt, + code_w_scope, + d128, ] }; - // TODO: RUST-1111 - // can't use run_test here because deserializing RawDocumentBuf and RawArrayBuf - // from Bson or Document currently don't work. - - let bytes = bson::to_vec(&expected).unwrap(); - - let deserialized: Foo = bson::from_slice(bytes.as_slice()).unwrap(); - assert_eq!(deserialized, f); - - let serialized = bson::to_document(&deserialized).unwrap(); - assert_eq!(serialized, expected); - - let serialized_bytes = bson::to_vec(&deserialized).unwrap(); - assert_eq!(serialized_bytes, bytes); + run_test(&f, &expected, "owned_raw_types"); } #[test] diff --git a/src/bson.rs b/src/bson.rs index d8d7398f..28799103 100644 --- a/src/bson.rs +++ b/src/bson.rs @@ -529,7 +529,7 @@ impl Bson { /// This function mainly used for [extended JSON format](https://docs.mongodb.com/manual/reference/mongodb-extended-json/). // TODO RUST-426: Investigate either removing this from the serde implementation or unifying // with the extended JSON implementation. - pub(crate) fn into_extended_document(self) -> Document { + pub(crate) fn into_extended_document(self, rawbson: bool) -> Document { match self { Bson::RegularExpression(Regex { ref pattern, @@ -566,12 +566,21 @@ impl Bson { } } } - Bson::Binary(Binary { subtype, ref bytes }) => { + Bson::Binary(Binary { subtype, bytes }) => { let tval: u8 = From::from(subtype); - doc! { - "$binary": { - "base64": base64::encode(bytes), - "subType": hex::encode([tval]), + if rawbson { + doc! { + "$binary": { + "bytes": Binary { subtype: BinarySubtype::Generic, bytes }, + "subType": Bson::Int32(tval.into()) + } + } + } else { + doc! { + "$binary": { + "base64": base64::encode(bytes), + "subType": hex::encode([tval]), + } } } } @@ -580,6 +589,9 @@ impl Bson { "$oid": v.to_string(), } } + Bson::DateTime(v) if rawbson => doc! { + "$date": v.timestamp_millis(), + }, Bson::DateTime(v) if v.timestamp_millis() >= 0 && v.to_chrono().year() <= 9999 => { doc! { "$date": v.to_rfc3339_string(), diff --git a/src/de/mod.rs b/src/de/mod.rs index dcb56d18..5078263e 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -56,6 +56,22 @@ pub(crate) const MIN_BSON_DOCUMENT_SIZE: i32 = 4 + 1; // 4 bytes for length, one pub(crate) const MIN_BSON_STRING_SIZE: i32 = 4 + 1; // 4 bytes for length, one byte for null terminator pub(crate) const MIN_CODE_WITH_SCOPE_SIZE: i32 = 4 + MIN_BSON_STRING_SIZE + MIN_BSON_DOCUMENT_SIZE; +/// Hint provided to the deserializer via `deserialize_newtype_struct` as to the type of thing +/// being deserialized. +#[derive(Debug, Clone, Copy)] +enum DeserializerHint { + /// No hint provided, deserialize normally. + None, + + /// The type being deserialized expects the BSON to contain a binary value with the provided + /// subtype. This is currently used to deserialize `bson::Uuid` values. + BinarySubtype(BinarySubtype), + + /// The type being deserialized is raw BSON, meaning no allocations should occur as part of + /// deserializing and everything should be visited via borrowing or `Copy` if possible. + RawBson, +} + /// Run the provided closure, ensuring that over the course of its execution, exactly `length` bytes /// were read from the reader. pub(crate) fn ensure_read_exactly( diff --git a/src/de/raw.rs b/src/de/raw.rs index 1e6901b1..17f26eaf 100644 --- a/src/de/raw.rs +++ b/src/de/raw.rs @@ -19,6 +19,7 @@ use crate::{ Bson, DateTime, Decimal128, + DeserializerOptions, RawDocument, Timestamp, }; @@ -31,6 +32,7 @@ use super::{ read_i64, read_string, read_u8, + DeserializerHint, Error, Result, MAX_BSON_SIZE, @@ -38,22 +40,6 @@ use super::{ }; use crate::de::serde::MapDeserializer; -/// Hint provided to the deserializer via `deserialize_newtype_struct` as to the type of thing -/// being deserialized. -#[derive(Debug, Clone, Copy)] -enum DeserializerHint { - /// No hint provided, deserialize normally. - None, - - /// The type being deserialized expects the BSON to contain a binary value with the provided - /// subtype. This is currently used to deserialize `bson::Uuid` values. - BinarySubtype(BinarySubtype), - - /// The type being deserialized is raw BSON, meaning no allocations should occur as part of - /// deserializing and everything should be visited via borrowing or `Copy`. - RawBson, -} - /// Deserializer used to parse and deserialize raw BSON bytes. pub(crate) struct Deserializer<'de> { bytes: BsonBuf<'de>, @@ -307,8 +293,11 @@ impl<'de> Deserializer<'de> { )), _ => { let code = read_string(&mut self.bytes, utf8_lossy)?; - let doc = Bson::JavaScriptCode(code).into_extended_document(); - visitor.visit_map(MapDeserializer::new(doc)) + let doc = Bson::JavaScriptCode(code).into_extended_document(false); + visitor.visit_map(MapDeserializer::new( + doc, + DeserializerOptions::builder().human_readable(false).build(), + )) } } } @@ -361,8 +350,11 @@ impl<'de> Deserializer<'de> { )), _ => { let symbol = read_string(&mut self.bytes, utf8_lossy)?; - let doc = Bson::Symbol(symbol).into_extended_document(); - visitor.visit_map(MapDeserializer::new(doc)) + let doc = Bson::Symbol(symbol).into_extended_document(false); + visitor.visit_map(MapDeserializer::new( + doc, + DeserializerOptions::builder().human_readable(false).build(), + )) } } } diff --git a/src/de/serde.rs b/src/de/serde.rs index fe5042f5..4598a935 100644 --- a/src/de/serde.rs +++ b/src/de/serde.rs @@ -25,13 +25,13 @@ use crate::{ datetime::DateTime, document::{Document, IntoIter}, oid::ObjectId, - raw::RawBsonRef, + raw::{RawBsonRef, RAW_ARRAY_NEWTYPE, RAW_BSON_NEWTYPE, RAW_DOCUMENT_NEWTYPE}, spec::BinarySubtype, uuid::UUID_NEWTYPE_NAME, Decimal128, }; -use super::raw::Decimal128Access; +use super::{raw::Decimal128Access, DeserializerHint}; pub(crate) struct BsonVisitor; @@ -615,6 +615,60 @@ impl Deserializer { options, } } + + fn deserialize_next<'de, V>( + mut self, + visitor: V, + hint: DeserializerHint, + ) -> Result + where + V: serde::de::Visitor<'de>, + { + let value = match self.value.take() { + Some(value) => value, + None => return Err(crate::de::Error::EndOfStream), + }; + + let is_rawbson = matches!(hint, DeserializerHint::RawBson); + + if let DeserializerHint::BinarySubtype(expected_st) = hint { + match value { + Bson::Binary(ref b) if b.subtype == expected_st => {} + ref b => { + return Err(Error::custom(format!( + "expected Binary with subtype {:?}, instead got {:?}", + expected_st, b + ))); + } + } + }; + + match value { + Bson::Double(v) => visitor.visit_f64(v), + Bson::String(v) => visitor.visit_string(v), + Bson::Array(v) => { + let len = v.len(); + visitor.visit_seq(SeqDeserializer { + iter: v.into_iter(), + options: self.options, + len, + }) + } + Bson::Document(v) => visitor.visit_map(MapDeserializer::new(v, self.options)), + Bson::Boolean(v) => visitor.visit_bool(v), + Bson::Null => visitor.visit_unit(), + Bson::Int32(v) => visitor.visit_i32(v), + Bson::Int64(v) => visitor.visit_i64(v), + Bson::Binary(b) if b.subtype == BinarySubtype::Generic => { + visitor.visit_byte_buf(b.bytes) + } + Bson::Decimal128(d) => visitor.visit_map(Decimal128Access::new(d)), + _ => { + let doc = value.into_extended_document(is_rawbson); + visitor.visit_map(MapDeserializer::new(doc, self.options)) + } + } + } } macro_rules! forward_to_deserialize { @@ -662,61 +716,11 @@ impl<'de> de::Deserializer<'de> for Deserializer { } #[inline] - fn deserialize_any(mut self, visitor: V) -> crate::de::Result + fn deserialize_any(self, visitor: V) -> crate::de::Result where V: Visitor<'de>, { - let value = match self.value.take() { - Some(value) => value, - None => return Err(crate::de::Error::EndOfStream), - }; - - match value { - Bson::Double(v) => visitor.visit_f64(v), - Bson::String(v) => visitor.visit_string(v), - Bson::Array(v) => { - let len = v.len(); - visitor.visit_seq(SeqDeserializer { - iter: v.into_iter(), - options: self.options, - len, - }) - } - Bson::Document(v) => { - let len = v.len(); - visitor.visit_map(MapDeserializer { - iter: v.into_iter(), - value: None, - len, - options: self.options, - }) - } - Bson::Boolean(v) => visitor.visit_bool(v), - Bson::Null => visitor.visit_unit(), - Bson::Int32(v) => visitor.visit_i32(v), - Bson::Int64(v) => visitor.visit_i64(v), - Bson::Binary(Binary { - subtype: BinarySubtype::Generic, - bytes, - }) => visitor.visit_byte_buf(bytes), - binary @ Bson::Binary(..) => visitor.visit_map(MapDeserializer { - iter: binary.into_extended_document().into_iter(), - value: None, - len: 2, - options: self.options, - }), - Bson::Decimal128(d) => visitor.visit_map(Decimal128Access::new(d)), - _ => { - let doc = value.into_extended_document(); - let len = doc.len(); - visitor.visit_map(MapDeserializer { - iter: doc.into_iter(), - value: None, - len, - options: self.options, - }) - } - } + self.deserialize_next(visitor, DeserializerHint::None) } #[inline] @@ -813,19 +817,33 @@ impl<'de> de::Deserializer<'de> for Deserializer { where V: Visitor<'de>, { - // if this is a UUID, ensure that value is a subtype 4 binary - if name == UUID_NEWTYPE_NAME { - match self.value { - Some(Bson::Binary(ref b)) if b.subtype == BinarySubtype::Uuid => { - self.deserialize_any(visitor) + match name { + UUID_NEWTYPE_NAME => self.deserialize_next( + visitor, + DeserializerHint::BinarySubtype(BinarySubtype::Uuid), + ), + RAW_BSON_NEWTYPE => self.deserialize_next(visitor, DeserializerHint::RawBson), + RAW_DOCUMENT_NEWTYPE => { + if !matches!(self.value, Some(Bson::Document(_))) { + return Err(serde::de::Error::custom(format!( + "expected raw document, instead got {:?}", + self.value + ))); } - b => Err(Error::custom(format!( - "expected Binary with subtype 4, instead got {:?}", - b - ))), + + self.deserialize_next(visitor, DeserializerHint::RawBson) } - } else { - visitor.visit_newtype_struct(self) + RAW_ARRAY_NEWTYPE => { + if !matches!(self.value, Some(Bson::Array(_))) { + return Err(serde::de::Error::custom(format!( + "expected raw array, instead got {:?}", + self.value + ))); + } + + self.deserialize_next(visitor, DeserializerHint::RawBson) + } + _ => visitor.visit_newtype_struct(self), } } @@ -1035,13 +1053,13 @@ pub(crate) struct MapDeserializer { } impl MapDeserializer { - pub(crate) fn new(doc: Document) -> Self { + pub(crate) fn new(doc: Document, options: impl Into>) -> Self { let len = doc.len(); MapDeserializer { iter: doc.into_iter(), len, value: None, - options: Default::default(), + options: options.into().unwrap_or_default(), } } } diff --git a/src/lib.rs b/src/lib.rs index 8ac4ee37..21a298cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -279,7 +279,7 @@ pub use self::{ decimal128::Decimal128, raw::{ RawArray, RawArrayBuf, RawBinaryRef, RawBson, RawBsonRef, RawDbPointerRef, RawDocument, - RawDocumentBuf, RawJavaScriptCodeWithScopeRef, RawRegexRef, + RawDocumentBuf, RawJavaScriptCodeWithScope, RawJavaScriptCodeWithScopeRef, RawRegexRef, }, ser::{ to_bson, to_bson_with_options, to_document, to_document_with_options, to_raw_document_buf, diff --git a/src/tests/spec/corpus.rs b/src/tests/spec/corpus.rs index 41159cd6..ceb1976c 100644 --- a/src/tests/spec/corpus.rs +++ b/src/tests/spec/corpus.rs @@ -217,16 +217,28 @@ fn run_test(test: TestFile) { test_key: bson_field, }; + // deserialize the field from a Bson into a RawBson + let deserializer_value_raw = + crate::Deserializer::new(Bson::Document(documentfromreader_cb.clone())); + let raw_bson_field = deserializer_value_raw + .deserialize_any(FieldVisitor(test_key.as_str(), PhantomData::)) + .expect(&description); + let from_value_raw_doc = doc! { + test_key: Bson::try_from(raw_bson_field).expect(&description), + }; + // convert back into raw BSON for comparison with canonical BSON let from_raw_vec = crate::to_vec(&from_raw_doc).expect(&description); let from_slice_value_vec = crate::to_vec(&from_slice_value_doc).expect(&description); let from_bson_value_vec = crate::to_vec(&from_value_value_doc).expect(&description); + let from_value_raw_vec = crate::to_vec(&from_value_raw_doc).expect(&description); assert_eq!(from_raw_vec, canonical_bson, "{}", description); assert_eq!(from_slice_value_vec, canonical_bson, "{}", description); assert_eq!(from_bson_value_vec, canonical_bson, "{}", description); assert_eq!(from_slice_owned_vec, canonical_bson, "{}", description); + assert_eq!(from_value_raw_vec, canonical_bson, "{}", description); } }