From 8a4d9f6d065917509d099d66252d2234d0f0e41f Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Mon, 12 Jul 2021 18:59:23 -0400 Subject: [PATCH 01/21] initial impl working --- src/ser/mod.rs | 26 ++ src/ser/raw.rs | 733 +++++++++++++++++++++++++++++++++++++++++++++++++ src/spec.rs | 8 + 3 files changed, 767 insertions(+) create mode 100644 src/ser/raw.rs diff --git a/src/ser/mod.rs b/src/ser/mod.rs index a2669007..898ffca9 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -22,6 +22,7 @@ //! Serializer mod error; +mod raw; mod serde; pub use self::{ @@ -52,6 +53,11 @@ fn write_cstring(writer: &mut W, s: &str) -> Result<()> { Ok(()) } +#[inline] +pub(crate) fn write_u8(writer: &mut W, val: u8) -> Result<()> { + writer.write_all(&[val]).map(|_| ()).map_err(From::from) +} + #[inline] pub(crate) fn write_i32(writer: &mut W, val: i32) -> Result<()> { writer @@ -204,3 +210,23 @@ where }), } } + +pub fn to_writer(value: &T, mut writer: W) -> Result<()> +where + T: Serialize, + W: Write, +{ + let mut serializer = raw::Serializer::new(); + value.serialize(&mut serializer)?; + writer.write_all(&mut serializer.into_vec())?; + Ok(()) +} + +pub fn to_vec(value: &T) -> Result> +where + T: Serialize, +{ + let mut serializer = raw::Serializer::new(); + value.serialize(&mut serializer)?; + Ok(serializer.into_vec()) +} diff --git a/src/ser/raw.rs b/src/ser/raw.rs new file mode 100644 index 00000000..9752b553 --- /dev/null +++ b/src/ser/raw.rs @@ -0,0 +1,733 @@ +use std::{borrow::Borrow, io::Write, ops::Index}; + +use serde::{ + ser::{Error as SerdeError, Impossible, SerializeMap}, + Serialize, + Serializer as SerdeSerializer, +}; + +use super::{write_cstring, write_f64, write_i32, write_i64, write_string, write_u8}; +use crate::{ + ser::{Error, Result}, + spec::{BinarySubtype, ElementType}, +}; + +pub(crate) struct Serializer { + bytes: Vec, + type_index: usize, +} + +impl Serializer { + pub(crate) fn new() -> Self { + Self { + bytes: Vec::new(), + type_index: 0, + } + } + + pub(crate) fn into_vec(self) -> Vec { + self.bytes + } + + fn update_element_type(&mut self, t: ElementType) -> Result<()> { + if self.type_index == 0 { + if matches!(t, ElementType::EmbeddedDocument) { + // don't need to set the element type for the top level document + return Ok(()); + } else { + return Err(Error::custom(format!( + "attempted to encode a non-document type at the top level: {:?}", + t + ))); + } + } + + self.bytes[self.type_index] = t as u8; + Ok(()) + } +} + +impl<'a> serde::Serializer for &'a mut Serializer { + type Ok = (); + type Error = Error; + + type SerializeSeq = DocumentSerializer<'a>; + type SerializeTuple = DocumentSerializer<'a>; + type SerializeTupleStruct = DocumentSerializer<'a>; + type SerializeTupleVariant = VariantSerializer<'a>; + type SerializeMap = DocumentSerializer<'a>; + type SerializeStruct = DocumentSerializer<'a>; + type SerializeStructVariant = VariantSerializer<'a>; + + fn serialize_bool(self, v: bool) -> Result { + self.update_element_type(ElementType::Boolean)?; + self.bytes.push(if v { 1 } else { 0 }); + Ok(()) + } + + fn serialize_i8(self, v: i8) -> Result { + self.update_element_type(ElementType::Int32)?; + write_i32(&mut self.bytes, v.into())?; + Ok(()) + } + + fn serialize_i16(self, v: i16) -> Result { + self.update_element_type(ElementType::Int32)?; + write_i32(&mut self.bytes, v.into())?; + Ok(()) + } + + fn serialize_i32(self, v: i32) -> Result { + self.update_element_type(ElementType::Int32)?; + write_i32(&mut self.bytes, v)?; + Ok(()) + } + + fn serialize_i64(self, v: i64) -> Result { + self.update_element_type(ElementType::Int64)?; + write_i64(&mut self.bytes, v)?; + Ok(()) + } + + fn serialize_u8(self, v: u8) -> Result { + todo!() + } + + fn serialize_u16(self, v: u16) -> Result { + todo!() + } + + fn serialize_u32(self, v: u32) -> Result { + todo!() + } + + fn serialize_u64(self, v: u64) -> Result { + todo!() + } + + fn serialize_f32(self, v: f32) -> Result { + self.update_element_type(ElementType::Double)?; + write_f64(&mut self.bytes, v.into()) + } + + fn serialize_f64(self, v: f64) -> Result { + self.update_element_type(ElementType::Double)?; + write_f64(&mut self.bytes, v.into()) + } + + fn serialize_char(self, v: char) -> Result { + todo!() + } + + fn serialize_str(self, v: &str) -> Result { + self.update_element_type(ElementType::String)?; + write_string(&mut self.bytes, v) + } + + fn serialize_bytes(self, v: &[u8]) -> Result { + self.update_element_type(ElementType::Binary)?; + let len = v.len() as i32; + write_i32(&mut self.bytes, len)?; + write_u8(&mut self.bytes, BinarySubtype::Generic.into())?; + self.bytes.write_all(v)?; + Ok(()) + } + + fn serialize_none(self) -> Result { + self.update_element_type(ElementType::Null)?; + Ok(()) + } + + fn serialize_some(self, value: &T) -> Result + where + T: serde::Serialize, + { + value.serialize(self) + } + + #[inline] + fn serialize_unit(self) -> Result { + self.serialize_none() + } + + #[inline] + fn serialize_unit_struct(self, name: &'static str) -> Result { + self.serialize_unit() + } + + #[inline] + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + ) -> Result { + self.serialize_str(variant) + } + + fn serialize_newtype_struct(self, _name: &'static str, value: &T) -> Result + where + T: serde::Serialize, + { + value.serialize(self) + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + value: &T, + ) -> Result + where + T: serde::Serialize, + { + self.update_element_type(ElementType::EmbeddedDocument)?; + let mut d = DocumentSerializer::start(&mut *self)?; + d.serialize_entry(variant, value)?; + d.end_doc() + } + + #[inline] + fn serialize_seq(self, _len: Option) -> Result { + self.update_element_type(ElementType::Array)?; + DocumentSerializer::start(&mut *self) + } + + #[inline] + fn serialize_tuple(self, len: usize) -> Result { + self.serialize_seq(Some(len)) + } + + #[inline] + fn serialize_tuple_struct( + self, + _name: &'static str, + len: usize, + ) -> Result { + self.serialize_seq(Some(len)) + } + + #[inline] + fn serialize_tuple_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result { + self.update_element_type(ElementType::EmbeddedDocument)?; + VariantSerializer::start(&mut *self, variant, VariantInnerType::Tuple) + } + + fn serialize_map(self, len: Option) -> Result { + self.update_element_type(ElementType::EmbeddedDocument)?; + DocumentSerializer::start(&mut *self) + } + + fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { + self.update_element_type(ElementType::EmbeddedDocument)?; + DocumentSerializer::start(&mut *self) + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + _len: usize, + ) -> Result { + VariantSerializer::start(&mut *self, variant, VariantInnerType::Struct) + } +} + +pub(crate) struct DocumentSerializer<'a> { + root_serializer: &'a mut Serializer, + num_keys_serialized: usize, + start: usize, +} + +impl<'a> DocumentSerializer<'a> { + fn start(rs: &'a mut Serializer) -> crate::ser::Result { + let start = rs.bytes.len(); + write_i32(&mut rs.bytes, 0)?; + Ok(Self { + root_serializer: rs, + num_keys_serialized: 0, + start, + }) + } + + fn serialize_doc_key(&mut self, key: &T) -> Result<()> + where + T: serde::Serialize + ?Sized, + { + // push a dummy element type for now, will update this once we serialize the value + self.root_serializer.type_index = self.root_serializer.bytes.len(); + self.root_serializer.bytes.push(0); + key.serialize(KeySerializer { + root_serializer: &mut *self.root_serializer, + })?; + + self.num_keys_serialized += 1; + Ok(()) + } + + fn end_doc(self) -> crate::ser::Result<()> { + self.root_serializer.bytes.push(0); + let length = (self.root_serializer.bytes.len() - self.start) as i32; + self.root_serializer.bytes.splice( + self.start..self.start + 4, + length.to_le_bytes().iter().cloned(), + ); + Ok(()) + } +} + +impl<'a> serde::ser::SerializeSeq for DocumentSerializer<'a> { + type Ok = (); + type Error = Error; + + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize, + { + self.serialize_doc_key(&self.num_keys_serialized.to_string())?; + value.serialize(&mut *self.root_serializer) + } + + fn end(self) -> Result { + self.end_doc() + } +} + +impl<'a> serde::ser::SerializeMap for DocumentSerializer<'a> { + type Ok = (); + + type Error = Error; + + fn serialize_key(&mut self, key: &T) -> Result<()> + where + T: serde::Serialize, + { + self.serialize_doc_key(key) + } + + fn serialize_value(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize, + { + value.serialize(&mut *self.root_serializer) + } + + fn end(self) -> Result { + self.end_doc() + } +} + +impl<'a> serde::ser::SerializeStruct for DocumentSerializer<'a> { + type Ok = (); + + type Error = Error; + + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: serde::Serialize, + { + self.serialize_doc_key(key)?; + value.serialize(&mut *self.root_serializer) + } + + fn end(self) -> Result { + self.end_doc() + } +} + +impl<'a> serde::ser::SerializeTuple for DocumentSerializer<'a> { + type Ok = (); + + type Error = Error; + + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize, + { + self.serialize_doc_key(&self.num_keys_serialized.to_string())?; + value.serialize(&mut *self.root_serializer) + } + + fn end(self) -> Result { + self.end_doc() + } +} + +impl<'a> serde::ser::SerializeTupleStruct for DocumentSerializer<'a> { + type Ok = (); + + type Error = Error; + + fn serialize_field(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize, + { + self.serialize_doc_key(&self.num_keys_serialized.to_string())?; + value.serialize(&mut *self.root_serializer) + } + + fn end(self) -> Result { + self.end_doc() + } +} + +enum VariantInnerType { + Tuple, + Struct, +} + +pub(crate) struct VariantSerializer<'a> { + root_serializer: &'a mut Serializer, + doc_start: usize, + inner_start: usize, + num_elements_serialized: usize, +} + +impl<'a> VariantSerializer<'a> { + fn start( + rs: &'a mut Serializer, + variant: &'static str, + inner_type: VariantInnerType, + ) -> Result { + rs.update_element_type(ElementType::EmbeddedDocument)?; + let doc_start = rs.bytes.len(); + write_i32(&mut rs.bytes, 0)?; + + let inner = match inner_type { + VariantInnerType::Struct => ElementType::EmbeddedDocument, + VariantInnerType::Tuple => ElementType::Array, + }; + rs.bytes.push(inner as u8); + write_cstring(&mut rs.bytes, variant)?; + let inner_start = rs.bytes.len(); + + Ok(Self { + root_serializer: rs, + num_elements_serialized: 0, + doc_start, + inner_start, + }) + } + + fn serialize_element(&mut self, k: &str, v: &T) -> Result<()> + where + T: Serialize + ?Sized, + { + self.root_serializer.bytes.push(0); + write_cstring(&mut self.root_serializer.bytes, k)?; + v.serialize(&mut *self.root_serializer)?; + + self.num_elements_serialized += 1; + Ok(()) + } + + fn end_both(self) -> Result<()> { + // null byte for the inner + self.root_serializer.bytes.push(0); + let arr_length = (self.root_serializer.bytes.len() - self.inner_start) as i32; + self.root_serializer.bytes.splice( + self.inner_start..self.inner_start + 4, + arr_length.to_le_bytes().iter().cloned(), + ); + + // null byte for document + self.root_serializer.bytes.push(0); + let doc_length = (self.root_serializer.bytes.len() - self.doc_start) as i32; + self.root_serializer.bytes.splice( + self.doc_start..self.doc_start + 4, + doc_length.to_le_bytes().iter().cloned(), + ); + Ok(()) + } +} + +impl<'a> serde::ser::SerializeTupleVariant for VariantSerializer<'a> { + type Ok = (); + + type Error = Error; + + fn serialize_field(&mut self, value: &T) -> Result<()> + where + T: Serialize, + { + self.serialize_element(format!("{}", self.num_elements_serialized).as_str(), value) + } + + fn end(self) -> Result { + self.end_both() + } +} + +impl<'a> serde::ser::SerializeStructVariant for VariantSerializer<'a> { + type Ok = (); + + type Error = Error; + + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: Serialize, + { + self.serialize_element(key, value) + } + + fn end(self) -> Result { + self.end_both() + } +} + +struct KeySerializer<'a> { + root_serializer: &'a mut Serializer, +} + +impl<'a> serde::Serializer for KeySerializer<'a> { + type Ok = (); + + type Error = Error; + + type SerializeSeq = Impossible<(), Error>; + type SerializeTuple = Impossible<(), Error>; + type SerializeTupleStruct = Impossible<(), Error>; + type SerializeTupleVariant = Impossible<(), Error>; + type SerializeMap = Impossible<(), Error>; + type SerializeStruct = Impossible<(), Error>; + type SerializeStructVariant = Impossible<(), Error>; + + fn serialize_bool(self, v: bool) -> Result { + todo!() + } + + fn serialize_i8(self, v: i8) -> Result { + todo!() + } + + fn serialize_i16(self, v: i16) -> Result { + todo!() + } + + fn serialize_i32(self, v: i32) -> Result { + todo!() + } + + fn serialize_i64(self, v: i64) -> Result { + todo!() + } + + fn serialize_u8(self, v: u8) -> Result { + todo!() + } + + fn serialize_u16(self, v: u16) -> Result { + todo!() + } + + fn serialize_u32(self, v: u32) -> Result { + todo!() + } + + fn serialize_u64(self, v: u64) -> Result { + todo!() + } + + fn serialize_f32(self, v: f32) -> Result { + todo!() + } + + fn serialize_f64(self, v: f64) -> Result { + todo!() + } + + fn serialize_char(self, v: char) -> Result { + todo!() + } + + fn serialize_str(self, v: &str) -> Result { + write_cstring(&mut self.root_serializer.bytes, v) + } + + fn serialize_bytes(self, v: &[u8]) -> Result { + todo!() + } + + fn serialize_none(self) -> Result { + todo!() + } + + fn serialize_some(self, value: &T) -> Result + where + T: Serialize, + { + todo!() + } + + fn serialize_unit(self) -> Result { + todo!() + } + + fn serialize_unit_struct(self, name: &'static str) -> Result { + todo!() + } + + fn serialize_unit_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + ) -> Result { + todo!() + } + + fn serialize_newtype_struct(self, name: &'static str, value: &T) -> Result + where + T: Serialize, + { + todo!() + } + + fn serialize_newtype_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + value: &T, + ) -> Result + where + T: Serialize, + { + todo!() + } + + fn serialize_seq(self, len: Option) -> Result { + todo!() + } + + fn serialize_tuple(self, len: usize) -> Result { + todo!() + } + + fn serialize_tuple_struct( + self, + name: &'static str, + len: usize, + ) -> Result { + todo!() + } + + fn serialize_tuple_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result { + todo!() + } + + fn serialize_map(self, len: Option) -> Result { + todo!() + } + + fn serialize_struct(self, name: &'static str, len: usize) -> Result { + todo!() + } + + fn serialize_struct_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result { + todo!() + } +} + +#[cfg(test)] +mod test { + use crate::doc; + + #[test] + fn raw_serialize() { + let doc = doc! { + "x": { "y": "ok" }, + "a": true, + "b": 1i32, + "c": 2i64, + "d": 5.5, + "e": [ true, "aaa", { "ok": 1.0 } ] + }; + println!("{}", doc); + let mut v = Vec::new(); + doc.to_writer(&mut v).unwrap(); + + let raw_v = crate::ser::to_vec(&doc).unwrap(); + assert_eq!(raw_v, v); + } + use std::time::Instant; + + use serde::Deserialize; + + use crate::{oid::ObjectId, Document}; + + #[derive(Debug, Deserialize)] + struct D { + x: i32, + y: i32, + i: I, + // oid: ObjectId, + null: Option, + b: bool, + d: f32, + } + + #[derive(Debug, Deserialize)] + struct I { + a: i32, + b: i32, + } + + #[test] + fn raw_bench() { + let doc = doc! { + "ok": 1, + "x": 1, + "y": 2, + "i": { "a": 300, "b": 12345 }, + // "oid": ObjectId::new(), + "null": crate::Bson::Null, + "b": true, + "d": 12.5, + }; + + let raw_start = Instant::now(); + for _ in 0..10_000 { + let _b = crate::ser::to_vec(&doc).unwrap(); + } + let raw_time = raw_start.elapsed(); + println!("raw time: {}", raw_time.as_secs_f32()); + + let normal_start = Instant::now(); + for _ in 0..10_000 { + let d: Document = crate::to_document(&doc).unwrap(); + let mut v = Vec::new(); + d.to_writer(&mut v).unwrap(); + } + let normal_time = normal_start.elapsed(); + println!("normal time: {}", normal_time.as_secs_f32()); + + let normal_start = Instant::now(); + for _ in 0..10_000 { + let mut v = Vec::new(); + doc.to_writer(&mut v).unwrap(); + } + let normal_time = normal_start.elapsed(); + println!("decode time: {}", normal_time.as_secs_f32()); + } +} diff --git a/src/spec.rs b/src/spec.rs index 7fed83bb..fb05f81b 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -140,6 +140,14 @@ impl ElementType { } } +// impl From for u8 { +// fn from(et: ElementType) -> Self { +// match et { +// ElementType::Array => +// } +// } +// } + /// The available binary subtypes, plus a user-defined slot. #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] #[non_exhaustive] From 82fb93653ff03ca928f44b1317004f5a6e842560 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Mon, 12 Jul 2021 19:06:42 -0400 Subject: [PATCH 02/21] corpus, code w scope not working --- src/lib.rs | 2 +- src/ser/raw.rs | 18 +++++++++++------- src/tests/spec/corpus.rs | 10 ++++++++++ 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5b9f64eb..66cedd9e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -198,7 +198,7 @@ pub use self::{ Deserializer, }, decimal128::Decimal128, - ser::{to_bson, to_document, Serializer}, + ser::{to_bson, to_document, to_writer, to_vec, Serializer}, }; #[macro_use] diff --git a/src/ser/raw.rs b/src/ser/raw.rs index 9752b553..7e882377 100644 --- a/src/ser/raw.rs +++ b/src/ser/raw.rs @@ -651,17 +651,21 @@ impl<'a> serde::Serializer for KeySerializer<'a> { #[cfg(test)] mod test { - use crate::doc; + use crate::{JavaScriptCodeWithScope, doc}; #[test] fn raw_serialize() { let doc = doc! { - "x": { "y": "ok" }, - "a": true, - "b": 1i32, - "c": 2i64, - "d": 5.5, - "e": [ true, "aaa", { "ok": 1.0 } ] + "a": JavaScriptCodeWithScope { + code: "".to_string(), + scope: doc! {} + } + // "x": { "y": "ok" }, + // "a": true, + // "b": 1i32, + // "c": 2i64, + // "d": 5.5, + // "e": [ true, "aaa", { "ok": 1.0 } ] }; println!("{}", doc); let mut v = Vec::new(); diff --git a/src/tests/spec/corpus.rs b/src/tests/spec/corpus.rs index a1a574b8..75f2342c 100644 --- a/src/tests/spec/corpus.rs +++ b/src/tests/spec/corpus.rs @@ -85,6 +85,9 @@ fn run_test(test: TestFile) { .to_writer(&mut native_to_bson_native_to_native_cb_serde) .expect(&description); + let mut native_to_bson_serde_bson_to_native_cb = Vec::new(); + crate::to_writer(&bson_to_native_cb, &mut native_to_bson_serde_bson_to_native_cb).expect(&description); + // native_to_bson( bson_to_native(cB) ) = cB assert_eq!( @@ -101,6 +104,13 @@ fn run_test(test: TestFile) { description, ); + assert_eq!( + hex::encode(native_to_bson_serde_bson_to_native_cb).to_lowercase(), + valid.canonical_bson.to_lowercase(), + "{}", + description, + ); + assert_eq!( hex::encode(native_to_bson_native_to_native_cb_serde).to_lowercase(), valid.canonical_bson.to_lowercase(), From 123a102e76b9257b1a7d472f06c65938808bbc6c Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 14 Jul 2021 16:06:24 -0400 Subject: [PATCH 03/21] wip custom stuff --- src/ser/raw.rs | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/src/ser/raw.rs b/src/ser/raw.rs index 7e882377..8033b221 100644 --- a/src/ser/raw.rs +++ b/src/ser/raw.rs @@ -225,9 +225,17 @@ impl<'a> serde::Serializer for &'a mut Serializer { DocumentSerializer::start(&mut *self) } - fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { - self.update_element_type(ElementType::EmbeddedDocument)?; - DocumentSerializer::start(&mut *self) + fn serialize_struct(self, name: &'static str, _len: usize) -> Result { + match name { + "$oid" => { + self.update_element_type(ElementType::ObjectId)?; + todo!() + }, + _ => { + self.update_element_type(ElementType::EmbeddedDocument)?; + DocumentSerializer::start(&mut *self) + } + } } fn serialize_struct_variant( @@ -652,6 +660,7 @@ impl<'a> serde::Serializer for KeySerializer<'a> { #[cfg(test)] mod test { use crate::{JavaScriptCodeWithScope, doc}; + use serde::Serialize; #[test] fn raw_serialize() { @@ -672,7 +681,9 @@ mod test { doc.to_writer(&mut v).unwrap(); let raw_v = crate::ser::to_vec(&doc).unwrap(); - assert_eq!(raw_v, v); + // assert_eq!(raw_v, v); + let d = Document::from_reader(raw_v.as_slice()).unwrap(); + println!("{:#?}", d); } use std::time::Instant; @@ -697,6 +708,29 @@ mod test { b: i32, } + #[derive(Debug, Serialize)] + struct Code { + c: JavaScriptCodeWithScope + } + + // #[test] + // fn raw_serialize() { + // let c = Code { + // c: JavaScriptCodeWithScope { + // code: "".to_string(), + // scope: doc! {}, + // } + // }; + + // let v = crate::ser::to_vec(&c).unwrap(); + + // let doc = crate::to_document(&c).unwrap(); + // let mut v2 = Vec::new(); + // doc.to_writer(&mut v2).unwrap(); + + // assert_eq!(v, v2); + // } + #[test] fn raw_bench() { let doc = doc! { From 67b195d73cb664c0d5e70126c008dde52eb6a50e Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 14 Jul 2021 19:06:45 -0400 Subject: [PATCH 04/21] datetime binary oid symbol done --- src/extjson/models.rs | 15 +- src/ser/mod.rs | 33 ++-- src/ser/raw.rs | 413 +++++++++++++++++++++++++++++++++++++++--- src/ser/serde.rs | 55 +++--- 4 files changed, 451 insertions(+), 65 deletions(-) diff --git a/src/extjson/models.rs b/src/extjson/models.rs index 551d5813..b08733f6 100644 --- a/src/extjson/models.rs +++ b/src/extjson/models.rs @@ -4,6 +4,7 @@ use chrono::Utc; use serde::{ de::{Error, Unexpected}, Deserialize, + Serialize, }; use crate::{extjson, oid, spec::BinarySubtype, Bson}; @@ -27,7 +28,7 @@ impl Int32 { } } -#[derive(Deserialize)] +#[derive(Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub(crate) struct Int64 { #[serde(rename = "$numberLong")] @@ -127,7 +128,7 @@ pub(crate) struct Binary { pub(crate) body: BinaryBody, } -#[derive(Deserialize)] +#[derive(Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub(crate) struct BinaryBody { pub(crate) base64: String, @@ -223,20 +224,26 @@ impl Timestamp { } } -#[derive(Deserialize)] +#[derive(Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub(crate) struct DateTime { #[serde(rename = "$date")] pub(crate) body: DateTimeBody, } -#[derive(Deserialize)] +#[derive(Deserialize, Serialize)] #[serde(untagged)] pub(crate) enum DateTimeBody { Canonical(Int64), Relaxed(String), } +impl DateTimeBody { + pub(crate) fn from_millis(m: i64) -> Self { + DateTimeBody::Canonical(Int64 { value: m.to_string() }) + } +} + impl DateTime { pub(crate) fn parse(self) -> extjson::de::Result { match self.body { diff --git a/src/ser/mod.rs b/src/ser/mod.rs index 898ffca9..1ef84307 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -89,6 +89,24 @@ fn write_f128(writer: &mut W, val: Decimal128) -> Result<()> writer.write_all(&raw).map_err(From::from) } +#[inline] +fn write_binary(mut writer: W, bytes: &[u8], subtype: BinarySubtype) -> Result<()> { + let len = if let BinarySubtype::BinaryOld = subtype { + bytes.len() + 4 + } else { + bytes.len() + }; + + write_i32(&mut writer, len as i32)?; + writer.write_all(&[subtype.into()])?; + + if let BinarySubtype::BinaryOld = subtype { + write_i32(&mut writer, len as i32 - 4)?; + }; + + writer.write_all(bytes).map_err(From::from) +} + fn serialize_array(writer: &mut W, arr: &[Bson]) -> Result<()> { let mut buf = Vec::new(); for (key, val) in arr.iter().enumerate() { @@ -148,20 +166,7 @@ pub(crate) fn serialize_bson( Bson::Int64(v) => write_i64(writer, v), Bson::Timestamp(ts) => write_i64(writer, ts.to_le_i64()), Bson::Binary(Binary { subtype, ref bytes }) => { - let len = if let BinarySubtype::BinaryOld = subtype { - bytes.len() + 4 - } else { - bytes.len() - }; - - write_i32(writer, len as i32)?; - writer.write_all(&[subtype.into()])?; - - if let BinarySubtype::BinaryOld = subtype { - write_i32(writer, len as i32 - 4)?; - }; - - writer.write_all(bytes).map_err(From::from) + write_binary(writer, bytes, subtype) } Bson::DateTime(ref v) => write_i64(writer, v.timestamp_millis()), Bson::Null => Ok(()), diff --git a/src/ser/raw.rs b/src/ser/raw.rs index 8033b221..dce84ebf 100644 --- a/src/ser/raw.rs +++ b/src/ser/raw.rs @@ -1,15 +1,17 @@ use std::{borrow::Borrow, io::Write, ops::Index}; use serde::{ - ser::{Error as SerdeError, Impossible, SerializeMap}, - Serialize, - Serializer as SerdeSerializer, + de::IntoDeserializer, + ser::{Error as SerdeError, Impossible, SerializeMap, SerializeStruct}, + Serialize, Serializer as SerdeSerializer, }; use super::{write_cstring, write_f64, write_i32, write_i64, write_string, write_u8}; use crate::{ - ser::{Error, Result}, + oid::ObjectId, + ser::{write_binary, Error, Result}, spec::{BinarySubtype, ElementType}, + Bson, }; pub(crate) struct Serializer { @@ -56,7 +58,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { type SerializeTupleStruct = DocumentSerializer<'a>; type SerializeTupleVariant = VariantSerializer<'a>; type SerializeMap = DocumentSerializer<'a>; - type SerializeStruct = DocumentSerializer<'a>; + type SerializeStruct = StructSerializer<'a>; type SerializeStructVariant = VariantSerializer<'a>; fn serialize_bool(self, v: bool) -> Result { @@ -226,16 +228,15 @@ impl<'a> serde::Serializer for &'a mut Serializer { } fn serialize_struct(self, name: &'static str, _len: usize) -> Result { - match name { - "$oid" => { - self.update_element_type(ElementType::ObjectId)?; - todo!() - }, - _ => { - self.update_element_type(ElementType::EmbeddedDocument)?; - DocumentSerializer::start(&mut *self) - } - } + let element_type = match name { + "$oid" => ElementType::ObjectId, + "$date" => ElementType::DateTime, + "$binary" => ElementType::Binary, + _ => ElementType::EmbeddedDocument, + }; + + self.update_element_type(element_type)?; + StructSerializer::new(&mut *self, element_type) } fn serialize_struct_variant( @@ -387,6 +388,359 @@ impl<'a> serde::ser::SerializeTupleStruct for DocumentSerializer<'a> { } } +pub(crate) struct BsonTypeSerializer<'a> { + root_serializer: &'a mut Serializer, + state: SerializationStep, +} + +impl<'a> BsonTypeSerializer<'a> { + fn new(rs: &'a mut Serializer, element_type: ElementType) -> Self { + let state = match element_type { + ElementType::DateTime => SerializationStep::DateTime, + ElementType::Binary => SerializationStep::Binary, + ElementType::ObjectId => SerializationStep::Oid, + _ => todo!(), + }; + Self { + root_serializer: rs, + state, + } + } +} + +impl<'a, 'b> serde::Serializer for &'b mut BsonTypeSerializer<'a> { + type Ok = (); + type Error = Error; + + type SerializeSeq = Impossible<(), Error>; + type SerializeTuple = Impossible<(), Error>; + type SerializeTupleStruct = Impossible<(), Error>; + type SerializeTupleVariant = Impossible<(), Error>; + type SerializeMap = Impossible<(), Error>; + type SerializeStruct = Self; + type SerializeStructVariant = Impossible<(), Error>; + + fn serialize_bool(self, v: bool) -> Result { + todo!() + } + + fn serialize_i8(self, v: i8) -> Result { + todo!() + } + + fn serialize_i16(self, v: i16) -> Result { + todo!() + } + + fn serialize_i32(self, v: i32) -> Result { + todo!() + } + + fn serialize_i64(self, v: i64) -> Result { + todo!() + } + + fn serialize_u8(self, v: u8) -> Result { + todo!() + } + + fn serialize_u16(self, v: u16) -> Result { + todo!() + } + + fn serialize_u32(self, v: u32) -> Result { + todo!() + } + + fn serialize_u64(self, v: u64) -> Result { + todo!() + } + + fn serialize_f32(self, v: f32) -> Result { + todo!() + } + + fn serialize_f64(self, v: f64) -> Result { + todo!() + } + + fn serialize_char(self, v: char) -> Result { + todo!() + } + + fn serialize_str(self, v: &str) -> Result { + // match self.bson_type { + // ElementType::ObjectId => { + // let oid = ObjectId::parse_str(v).map_err(Error::custom)?; + // self.root_serializer.bytes.write_all(&oid.bytes())?; + // } + // _ => todo!(), + // } + + match &self.state { + SerializationStep::DateTimeNumberLong => { + let millis: i64 = v.parse().map_err(Error::custom)?; + write_i64(&mut self.root_serializer.bytes, millis)?; + } + SerializationStep::Oid => { + let oid = ObjectId::parse_str(v).map_err(Error::custom)?; + self.root_serializer.bytes.write_all(&oid.bytes())?; + } + SerializationStep::BinaryBase64 => { + self.state = SerializationStep::BinarySubType { + base64: v.to_string(), + }; + } + SerializationStep::BinarySubType { base64 } => { + let subtype_byte = hex::decode(v).map_err(Error::custom)?; + let subtype: BinarySubtype = subtype_byte[0].into(); + + let bytes = base64::decode(base64.as_str()).map_err(Error::custom)?; + + write_binary(&mut self.root_serializer.bytes, bytes.as_slice(), subtype)?; + }, + SerializationStep::Symbol => { + write_string(&mut self.root_serializer.bytes, v)?; + } + _ => todo!(), + } + Ok(()) + } + + fn serialize_bytes(self, v: &[u8]) -> Result { + todo!() + } + + fn serialize_none(self) -> Result { + todo!() + } + + fn serialize_some(self, value: &T) -> Result + where + T: Serialize, + { + todo!() + } + + fn serialize_unit(self) -> Result { + todo!() + } + + fn serialize_unit_struct(self, name: &'static str) -> Result { + todo!() + } + + fn serialize_unit_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + ) -> Result { + todo!() + } + + fn serialize_newtype_struct(self, name: &'static str, value: &T) -> Result + where + T: Serialize, + { + todo!() + } + + fn serialize_newtype_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + value: &T, + ) -> Result + where + T: Serialize, + { + todo!() + } + + fn serialize_seq(self, len: Option) -> Result { + todo!() + } + + fn serialize_tuple(self, len: usize) -> Result { + todo!() + } + + fn serialize_tuple_struct( + self, + name: &'static str, + len: usize, + ) -> Result { + todo!() + } + + fn serialize_tuple_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result { + todo!() + } + + fn serialize_map(self, len: Option) -> Result { + todo!() + } + + fn serialize_struct(self, name: &'static str, len: usize) -> Result { + Ok(self) + } + + fn serialize_struct_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result { + todo!() + } +} + +impl<'a, 'b> SerializeStruct for &'a mut BsonTypeSerializer<'b> { + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: Serialize, + { + // println!("{:?} + {}", self.state, key); + // match self.bson_type { + // ElementType::DateTime => match key { + // "$numberLong" => match crate::to_bson(value)? { + // Bson::String(s) => write_i64( + // &mut self.root_serializer.bytes, + // s.parse().map_err(Error::custom)?, + // ), + // _ => todo!(), + // }, + // // ElementType::Binary => match key { + // // "base64" + // // } + // _ => todo!(), + // }, + // _ => todo!(), + // } + + match (&self.state, key) { + (SerializationStep::DateTime, "$date") => { + self.state = SerializationStep::DateTimeNumberLong; + value.serialize(&mut **self)?; + } + (SerializationStep::DateTimeNumberLong, "$numberLong") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::Done; + } + (SerializationStep::Oid, "$oid") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::Done; + } + (SerializationStep::Binary, "$binary") => { + self.state = SerializationStep::BinaryBase64; + value.serialize(&mut **self)?; + } + (SerializationStep::BinaryBase64, "base64") => { + value.serialize(&mut **self)?; + } + (SerializationStep::BinarySubType { .. }, "subType") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::Done; + } + (SerializationStep::Symbol, "$symbol") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::Done; + } + (state, k) => panic!("bad combo: {:?} + {:?}", state, k), + } + + Ok(()) + } + + fn end(self) -> Result { + Ok(()) + } +} + +#[derive(Debug)] +enum SerializationStep { + Oid, + DateTime, + DateTimeNumberLong, + Binary, + BinaryBase64, + BinarySubType { base64: String }, + Symbol, + Done, +} + +// pub(crate) struct StructSerializer<'a> { +// root_serializer: &'a mut Serializer, +// bson_type: ElementType +// } + +pub(crate) enum StructSerializer<'a> { + Value(BsonTypeSerializer<'a>), + Document(DocumentSerializer<'a>), +} + +impl<'a> StructSerializer<'a> { + fn new(rs: &'a mut Serializer, element_type: ElementType) -> Result { + if let ElementType::EmbeddedDocument = element_type { + Ok(Self::Document(DocumentSerializer::start(rs)?)) + } else { + Ok(Self::Value(BsonTypeSerializer::new(rs, element_type))) + } + } +} + +impl<'a> SerializeStruct for StructSerializer<'a> { + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: Serialize, + { + // println!("got field: {}", key); + match self { + // StructSerializer::Value { + // element_type, + // root_serializer, + // } => { + // match element_type { + // ElementType::ObjectId => { + // assert_eq!(key, "$oid"); + // } + // ElementType::DateTime => { + // assert_eq!(key, "$date"); + // } + // _ => todo!(), + // } + // let mut s = BsonTypeSerializer::new(&mut *root_serializer, *element_type); + // value.serialize(&mut s) + // } + StructSerializer::Value(ref mut v) => (&mut *v).serialize_field(key, value), + StructSerializer::Document(d) => d.serialize_field(key, value), + } + // Ok(()) + } + + fn end(self) -> Result { + match self { + StructSerializer::Document(d) => SerializeStruct::end(d), + _ => Ok(()), + } + } +} + enum VariantInnerType { Tuple, Struct, @@ -659,16 +1013,23 @@ impl<'a> serde::Serializer for KeySerializer<'a> { #[cfg(test)] mod test { - use crate::{JavaScriptCodeWithScope, doc}; + use crate::{Binary, DateTime, JavaScriptCodeWithScope, doc}; use serde::Serialize; #[test] fn raw_serialize() { + let binary = Binary { + subtype: crate::spec::BinarySubtype::BinaryOld, + bytes: Vec::new(), + }; let doc = doc! { - "a": JavaScriptCodeWithScope { - code: "".to_string(), - scope: doc! {} - } + // "a": JavaScriptCodeWithScope { + // code: "".to_string(), + // scope: doc! {} + // } + "o": ObjectId::new(), + "d": DateTime::now(), + "b": binary, // "x": { "y": "ok" }, // "a": true, // "b": 1i32, @@ -677,8 +1038,8 @@ mod test { // "e": [ true, "aaa", { "ok": 1.0 } ] }; println!("{}", doc); - let mut v = Vec::new(); - doc.to_writer(&mut v).unwrap(); + // let mut v = Vec::new(); + // doc.to_writer(&mut v).unwrap(); let raw_v = crate::ser::to_vec(&doc).unwrap(); // assert_eq!(raw_v, v); @@ -710,7 +1071,7 @@ mod test { #[derive(Debug, Serialize)] struct Code { - c: JavaScriptCodeWithScope + c: JavaScriptCodeWithScope, } // #[test] @@ -733,6 +1094,10 @@ mod test { #[test] fn raw_bench() { + let binary = Binary { + subtype: crate::spec::BinarySubtype::Generic, + bytes: vec![1, 2, 3, 4, 5], + }; let doc = doc! { "ok": 1, "x": 1, @@ -741,7 +1106,9 @@ mod test { // "oid": ObjectId::new(), "null": crate::Bson::Null, "b": true, + "dt": DateTime::now(), "d": 12.5, + "b": binary, }; let raw_start = Instant::now(); diff --git a/src/ser/serde.rs b/src/ser/serde.rs index e903b1e2..8e4ae554 100644 --- a/src/ser/serde.rs +++ b/src/ser/serde.rs @@ -12,12 +12,7 @@ use serde::ser::{ #[cfg(feature = "decimal128")] use crate::decimal128::Decimal128; -use crate::{ - bson::{Array, Binary, Bson, DbPointer, Document, JavaScriptCodeWithScope, Regex, Timestamp}, - datetime::DateTime, - oid::ObjectId, - spec::BinarySubtype, -}; +use crate::{bson::{Array, Binary, Bson, DbPointer, Document, JavaScriptCodeWithScope, Regex, Timestamp}, datetime::DateTime, extjson::{self, models::DateTimeBody}, oid::ObjectId, spec::BinarySubtype}; use super::{to_bson, Error}; @@ -27,8 +22,8 @@ impl Serialize for ObjectId { where S: serde::ser::Serializer, { - let mut ser = serializer.serialize_map(Some(1))?; - ser.serialize_entry("$oid", &self.to_string())?; + let mut ser = serializer.serialize_struct("$oid", 1)?; + ser.serialize_field("$oid", &self.to_string())?; ser.end() } } @@ -53,19 +48,18 @@ impl Serialize for Bson { where S: ser::Serializer, { - match *self { - Bson::Double(v) => serializer.serialize_f64(v), - Bson::String(ref v) => serializer.serialize_str(v), - Bson::Array(ref v) => v.serialize(serializer), - Bson::Document(ref v) => v.serialize(serializer), - Bson::Boolean(v) => serializer.serialize_bool(v), + match self { + Bson::Double(v) => serializer.serialize_f64(*v), + Bson::String(v) => serializer.serialize_str(v), + Bson::Array(v) => v.serialize(serializer), + Bson::Document(v) => v.serialize(serializer), + Bson::Boolean(v) => serializer.serialize_bool(*v), Bson::Null => serializer.serialize_unit(), - Bson::Int32(v) => serializer.serialize_i32(v), - Bson::Int64(v) => serializer.serialize_i64(v), - Bson::Binary(Binary { - subtype: BinarySubtype::Generic, - ref bytes, - }) => serializer.serialize_bytes(bytes), + Bson::Int32(v) => serializer.serialize_i32(*v), + Bson::Int64(v) => serializer.serialize_i64(*v), + Bson::ObjectId(oid) => oid.serialize(serializer), + Bson::DateTime(dt) => dt.serialize(serializer), + Bson::Binary(b) => b.serialize(serializer), _ => { let doc = self.clone().into_extended_document(); doc.serialize(serializer) @@ -539,8 +533,17 @@ impl Serialize for Binary { where S: ser::Serializer, { - let value = Bson::Binary(self.clone()); - value.serialize(serializer) + if let BinarySubtype::Generic = self.subtype { + serializer.serialize_bytes(self.bytes.as_slice()) + } else { + let mut state = serializer.serialize_struct("$binary", 1)?; + let body = extjson::models::BinaryBody { + base64: base64::encode(self.bytes.as_slice()), + subtype: hex::encode([self.subtype.into()]), + }; + state.serialize_field("$binary", &body)?; + state.end() + } } } @@ -563,8 +566,12 @@ impl Serialize for DateTime { S: ser::Serializer, { // Cloning a `DateTime` is extremely cheap - let value = Bson::DateTime(*self); - value.serialize(serializer) + // let value = Bson::DateTime(*self); + // value.serialize(serializer) + let mut state = serializer.serialize_struct("$date", 1)?; + let body = extjson::models::DateTimeBody::from_millis(self.timestamp_millis()); + state.serialize_field("$date", &body)?; + state.end() } } From c15cb01f75d5e62ca484c18c1755cf96877aa855 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Thu, 15 Jul 2021 18:13:34 -0400 Subject: [PATCH 05/21] checkpoint --- src/ser/raw.rs | 234 ++++++++++++++++++++++++++++++++++++----------- src/ser/serde.rs | 19 ++-- 2 files changed, 189 insertions(+), 64 deletions(-) diff --git a/src/ser/raw.rs b/src/ser/raw.rs index dce84ebf..88c7bca8 100644 --- a/src/ser/raw.rs +++ b/src/ser/raw.rs @@ -1,18 +1,12 @@ -use std::{borrow::Borrow, io::Write, ops::Index}; +use std::io::Write; use serde::{ - de::IntoDeserializer, ser::{Error as SerdeError, Impossible, SerializeMap, SerializeStruct}, Serialize, Serializer as SerdeSerializer, }; use super::{write_cstring, write_f64, write_i32, write_i64, write_string, write_u8}; -use crate::{ - oid::ObjectId, - ser::{write_binary, Error, Result}, - spec::{BinarySubtype, ElementType}, - Bson, -}; +use crate::{Document, oid::ObjectId, ser::{write_binary, Error, Result}, spec::{BinarySubtype, ElementType}}; pub(crate) struct Serializer { bytes: Vec, @@ -47,6 +41,13 @@ impl Serializer { self.bytes[self.type_index] = t as u8; Ok(()) } + + fn replace_i32(&mut self, at: usize, with: i32) { + self.bytes.splice( + at..at + 4, + with.to_le_bytes().iter().cloned(), + ); + } } impl<'a> serde::Serializer for &'a mut Serializer { @@ -187,7 +188,8 @@ impl<'a> serde::Serializer for &'a mut Serializer { self.update_element_type(ElementType::EmbeddedDocument)?; let mut d = DocumentSerializer::start(&mut *self)?; d.serialize_entry(variant, value)?; - d.end_doc() + d.end_doc()?; + Ok(()) } #[inline] @@ -250,6 +252,11 @@ impl<'a> serde::Serializer for &'a mut Serializer { } } +struct DocumentSerializationResult<'a> { + length: i32, + root_serializer: &'a mut Serializer, +} + pub(crate) struct DocumentSerializer<'a> { root_serializer: &'a mut Serializer, num_keys_serialized: usize, @@ -282,14 +289,14 @@ impl<'a> DocumentSerializer<'a> { Ok(()) } - fn end_doc(self) -> crate::ser::Result<()> { + fn end_doc(self) -> crate::ser::Result> { self.root_serializer.bytes.push(0); let length = (self.root_serializer.bytes.len() - self.start) as i32; - self.root_serializer.bytes.splice( - self.start..self.start + 4, - length.to_le_bytes().iter().cloned(), - ); - Ok(()) + self.root_serializer.replace_i32(self.start, length); + Ok(DocumentSerializationResult { + length, + root_serializer: self.root_serializer + }) } } @@ -306,7 +313,7 @@ impl<'a> serde::ser::SerializeSeq for DocumentSerializer<'a> { } fn end(self) -> Result { - self.end_doc() + self.end_doc().map(|_| ()) } } @@ -330,7 +337,7 @@ impl<'a> serde::ser::SerializeMap for DocumentSerializer<'a> { } fn end(self) -> Result { - self.end_doc() + self.end_doc().map(|_| ()) } } @@ -348,7 +355,7 @@ impl<'a> serde::ser::SerializeStruct for DocumentSerializer<'a> { } fn end(self) -> Result { - self.end_doc() + self.end_doc().map(|_| ()) } } @@ -366,7 +373,7 @@ impl<'a> serde::ser::SerializeTuple for DocumentSerializer<'a> { } fn end(self) -> Result { - self.end_doc() + self.end_doc().map(|_| ()) } } @@ -384,7 +391,7 @@ impl<'a> serde::ser::SerializeTupleStruct for DocumentSerializer<'a> { } fn end(self) -> Result { - self.end_doc() + self.end_doc().map(|_| ()) } } @@ -399,6 +406,10 @@ impl<'a> BsonTypeSerializer<'a> { ElementType::DateTime => SerializationStep::DateTime, ElementType::Binary => SerializationStep::Binary, ElementType::ObjectId => SerializationStep::Oid, + ElementType::Symbol => SerializationStep::Symbol, + ElementType::RegularExpression => SerializationStep::RegEx, + ElementType::Timestamp => SerializationStep::Timestamp, + ElementType::DbPointer => SerializationStep::DbPointer, _ => todo!(), }; Self { @@ -408,7 +419,7 @@ impl<'a> BsonTypeSerializer<'a> { } } -impl<'a, 'b> serde::Serializer for &'b mut BsonTypeSerializer<'a> { +impl<'a, 'b, 'c: 'a + 'b> serde::Serializer for &'b mut BsonTypeSerializer<'a> { type Ok = (); type Error = Error; @@ -416,7 +427,7 @@ impl<'a, 'b> serde::Serializer for &'b mut BsonTypeSerializer<'a> { type SerializeTuple = Impossible<(), Error>; type SerializeTupleStruct = Impossible<(), Error>; type SerializeTupleVariant = Impossible<(), Error>; - type SerializeMap = Impossible<(), Error>; + type SerializeMap = CodeWithScopeSerializer<'a>; type SerializeStruct = Self; type SerializeStructVariant = Impossible<(), Error>; @@ -433,7 +444,13 @@ impl<'a, 'b> serde::Serializer for &'b mut BsonTypeSerializer<'a> { } fn serialize_i32(self, v: i32) -> Result { - todo!() + match self.state { + SerializationStep::TimestampTime | SerializationStep::TimestampIncrement => { + write_i32(&mut self.root_serializer.bytes, v)?; + } + _ => todo!(), + } + Ok(()) } fn serialize_i64(self, v: i64) -> Result { @@ -498,10 +515,21 @@ impl<'a, 'b> serde::Serializer for &'b mut BsonTypeSerializer<'a> { let bytes = base64::decode(base64.as_str()).map_err(Error::custom)?; write_binary(&mut self.root_serializer.bytes, bytes.as_slice(), subtype)?; - }, - SerializationStep::Symbol => { + } + SerializationStep::Symbol | SerializationStep::DbPointerRef => { write_string(&mut self.root_serializer.bytes, v)?; } + SerializationStep::RegExPattern | SerializationStep::RegExOptions => { + write_cstring(&mut self.root_serializer.bytes, v)?; + } + SerializationStep::Code => { + write_string(&mut self.root_serializer.bytes, v)?; + } + SerializationStep::CodeWithScopeCode => { + self.state = SerializationStep::CodeWithScopeScope { + code: v.to_string(), + }; + } _ => todo!(), } Ok(()) @@ -586,7 +614,12 @@ impl<'a, 'b> serde::Serializer for &'b mut BsonTypeSerializer<'a> { } fn serialize_map(self, len: Option) -> Result { - todo!() + match self.state { + SerializationStep::CodeWithScopeScope { ref code } => { + CodeWithScopeSerializer::start(code.as_str(), self.root_serializer) + } + _ => todo!(), + } } fn serialize_struct(self, name: &'static str, len: usize) -> Result { @@ -604,7 +637,7 @@ impl<'a, 'b> serde::Serializer for &'b mut BsonTypeSerializer<'a> { } } -impl<'a, 'b> SerializeStruct for &'a mut BsonTypeSerializer<'b> { +impl<'a> SerializeStruct for BsonTypeSerializer<'a> { type Ok = (); type Error = Error; @@ -612,24 +645,6 @@ impl<'a, 'b> SerializeStruct for &'a mut BsonTypeSerializer<'b> { where T: Serialize, { - // println!("{:?} + {}", self.state, key); - // match self.bson_type { - // ElementType::DateTime => match key { - // "$numberLong" => match crate::to_bson(value)? { - // Bson::String(s) => write_i64( - // &mut self.root_serializer.bytes, - // s.parse().map_err(Error::custom)?, - // ), - // _ => todo!(), - // }, - // // ElementType::Binary => match key { - // // "base64" - // // } - // _ => todo!(), - // }, - // _ => todo!(), - // } - match (&self.state, key) { (SerializationStep::DateTime, "$date") => { self.state = SerializationStep::DateTimeNumberLong; @@ -648,6 +663,7 @@ impl<'a, 'b> SerializeStruct for &'a mut BsonTypeSerializer<'b> { value.serialize(&mut **self)?; } (SerializationStep::BinaryBase64, "base64") => { + // state is updated in serialize value.serialize(&mut **self)?; } (SerializationStep::BinarySubType { .. }, "subType") => { @@ -658,6 +674,53 @@ impl<'a, 'b> SerializeStruct for &'a mut BsonTypeSerializer<'b> { value.serialize(&mut **self)?; self.state = SerializationStep::Done; } + (SerializationStep::RegEx, "$regularExpression") => { + self.state = SerializationStep::RegExPattern; + value.serialize(&mut **self)?; + } + (SerializationStep::RegExPattern, "pattern") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::RegExOptions; + } + (SerializationStep::RegExOptions, "options") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::Done; + } + (SerializationStep::Timestamp, "$timestamp") => { + self.state = SerializationStep::TimestampTime; + value.serialize(&mut **self)?; + } + (SerializationStep::TimestampTime, "t") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::TimestampIncrement; + } + (SerializationStep::TimestampIncrement, "i") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::Done; + } + (SerializationStep::DbPointer, "$dbPointer") => { + self.state = SerializationStep::DbPointerRef; + value.serialize(&mut **self)?; + } + (SerializationStep::DbPointerRef, "$ref") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::DbPointerId; + } + (SerializationStep::DbPointerId, "$id") => { + self.state = SerializationStep::Oid; + value.serialize(&mut **self)?; + } + (SerializationStep::Code, "$code") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::Done; + } + (SerializationStep::CodeWithScopeCode, "$code") => { + value.serialize(&mut **self)?; + } + (SerializationStep::CodeWithScopeScope { .. }, "$scope") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::Done; + } (state, k) => panic!("bad combo: {:?} + {:?}", state, k), } @@ -672,12 +735,33 @@ impl<'a, 'b> SerializeStruct for &'a mut BsonTypeSerializer<'b> { #[derive(Debug)] enum SerializationStep { Oid, + DateTime, DateTimeNumberLong, + Binary, BinaryBase64, BinarySubType { base64: String }, + Symbol, + + RegEx, + RegExPattern, + RegExOptions, + + Timestamp, + TimestampTime, + TimestampIncrement, + + DbPointer, + DbPointerRef, + DbPointerId, + + Code, + + CodeWithScopeCode, + CodeWithScopeScope { code: String }, + Done, } @@ -795,18 +879,12 @@ impl<'a> VariantSerializer<'a> { // null byte for the inner self.root_serializer.bytes.push(0); let arr_length = (self.root_serializer.bytes.len() - self.inner_start) as i32; - self.root_serializer.bytes.splice( - self.inner_start..self.inner_start + 4, - arr_length.to_le_bytes().iter().cloned(), - ); + self.root_serializer.replace_i32(self.inner_start, arr_length); // null byte for document self.root_serializer.bytes.push(0); let doc_length = (self.root_serializer.bytes.len() - self.doc_start) as i32; - self.root_serializer.bytes.splice( - self.doc_start..self.doc_start + 4, - doc_length.to_le_bytes().iter().cloned(), - ); + self.root_serializer.replace_i32(self.doc_start, doc_length); Ok(()) } } @@ -1011,9 +1089,57 @@ impl<'a> serde::Serializer for KeySerializer<'a> { } } +struct CodeWithScopeSerializer<'a> { + code_length: usize, + start: usize, + doc: DocumentSerializer<'a>, +} + +impl<'a> CodeWithScopeSerializer<'a> { + fn start(code: &str, rs: &'a mut Serializer) -> Result { + let start = rs.bytes.len(); + write_i32(&mut rs.bytes, 0)?; // placeholder length + write_string(&mut rs.bytes, code)?; + + let doc = DocumentSerializer::start(rs)?; + Ok(Self { + code_length: code.len(), + start, + doc, + }) + } +} + +impl<'a> SerializeMap for CodeWithScopeSerializer<'a> { + type Ok = (); + type Error = Error; + + fn serialize_key(&mut self, key: &T) -> Result<()> + where + T: Serialize, + { + self.doc.serialize_key(key) + } + + fn serialize_value(&mut self, value: &T) -> Result<()> + where + T: Serialize, + { + self.doc.serialize_value(value) + } + + fn end(self) -> Result { + let result = self.doc.end_doc()?; + + let total_len = result.length + self.code_length as i32; + result.root_serializer.replace_i32(self.start, total_len); + Ok(()) + } +} + #[cfg(test)] mod test { - use crate::{Binary, DateTime, JavaScriptCodeWithScope, doc}; + use crate::{doc, Binary, DateTime, JavaScriptCodeWithScope}; use serde::Serialize; #[test] diff --git a/src/ser/serde.rs b/src/ser/serde.rs index 8e4ae554..5a2e7265 100644 --- a/src/ser/serde.rs +++ b/src/ser/serde.rs @@ -1,18 +1,17 @@ use serde::ser::{ - self, - Serialize, - SerializeMap, - SerializeSeq, - SerializeStruct, - SerializeStructVariant, - SerializeTuple, - SerializeTupleStruct, - SerializeTupleVariant, + self, Serialize, SerializeMap, SerializeSeq, SerializeStruct, SerializeStructVariant, + SerializeTuple, SerializeTupleStruct, SerializeTupleVariant, }; #[cfg(feature = "decimal128")] use crate::decimal128::Decimal128; -use crate::{bson::{Array, Binary, Bson, DbPointer, Document, JavaScriptCodeWithScope, Regex, Timestamp}, datetime::DateTime, extjson::{self, models::DateTimeBody}, oid::ObjectId, spec::BinarySubtype}; +use crate::{ + bson::{Array, Binary, Bson, DbPointer, Document, JavaScriptCodeWithScope, Regex, Timestamp}, + datetime::DateTime, + extjson::{self, models::DateTimeBody}, + oid::ObjectId, + spec::BinarySubtype, +}; use super::{to_bson, Error}; From 586038dfc96b8266428dea5b855d62ef5d57aee8 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Thu, 15 Jul 2021 21:04:06 -0400 Subject: [PATCH 06/21] wip --- src/ser/raw.rs | 21 ++++++++++++++++++--- src/ser/serde.rs | 32 +++++++++++++++++++++++++++++--- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/src/ser/raw.rs b/src/ser/raw.rs index 88c7bca8..1097f59b 100644 --- a/src/ser/raw.rs +++ b/src/ser/raw.rs @@ -410,6 +410,11 @@ impl<'a> BsonTypeSerializer<'a> { ElementType::RegularExpression => SerializationStep::RegEx, ElementType::Timestamp => SerializationStep::Timestamp, ElementType::DbPointer => SerializationStep::DbPointer, + ElementType::JavaScriptCode => SerializationStep::Code, + ElementType::JavaScriptCodeWithScope => SerializationStep::CodeWithScopeCode, + ElementType::MinKey => SerializationStep::MinKey, + ElementType::MaxKey => SerializationStep::MaxKey, + _ => todo!(), }; Self { @@ -427,7 +432,7 @@ impl<'a, 'b, 'c: 'a + 'b> serde::Serializer for &'b mut BsonTypeSerializer<'a> { type SerializeTuple = Impossible<(), Error>; type SerializeTupleStruct = Impossible<(), Error>; type SerializeTupleVariant = Impossible<(), Error>; - type SerializeMap = CodeWithScopeSerializer<'a>; + type SerializeMap = CodeWithScopeSerializer<'b>; type SerializeStruct = Self; type SerializeStructVariant = Impossible<(), Error>; @@ -637,7 +642,7 @@ impl<'a, 'b, 'c: 'a + 'b> serde::Serializer for &'b mut BsonTypeSerializer<'a> { } } -impl<'a> SerializeStruct for BsonTypeSerializer<'a> { +impl<'a, 'b> SerializeStruct for &'b mut BsonTypeSerializer<'a> { type Ok = (); type Error = Error; @@ -721,6 +726,12 @@ impl<'a> SerializeStruct for BsonTypeSerializer<'a> { value.serialize(&mut **self)?; self.state = SerializationStep::Done; } + (SerializationStep::MinKey { .. }, "$minKey") => { + self.state = SerializationStep::Done; + } + (SerializationStep::MaxKey { .. }, "$maxKey") => { + self.state = SerializationStep::Done; + } (state, k) => panic!("bad combo: {:?} + {:?}", state, k), } @@ -762,6 +773,10 @@ enum SerializationStep { CodeWithScopeCode, CodeWithScopeScope { code: String }, + MinKey, + + MaxKey, + Done, } @@ -1089,7 +1104,7 @@ impl<'a> serde::Serializer for KeySerializer<'a> { } } -struct CodeWithScopeSerializer<'a> { +pub(crate) struct CodeWithScopeSerializer<'a> { code_length: usize, start: usize, doc: DocumentSerializer<'a>, diff --git a/src/ser/serde.rs b/src/ser/serde.rs index 5a2e7265..197fb90e 100644 --- a/src/ser/serde.rs +++ b/src/ser/serde.rs @@ -59,10 +59,36 @@ impl Serialize for Bson { Bson::ObjectId(oid) => oid.serialize(serializer), Bson::DateTime(dt) => dt.serialize(serializer), Bson::Binary(b) => b.serialize(serializer), - _ => { - let doc = self.clone().into_extended_document(); - doc.serialize(serializer) + Bson::JavaScriptCode(c) => { + let mut state = serializer.serialize_struct("$code", 1)?; + state.serialize_field("$code", c)?; + state.end() + }, + Bson::JavaScriptCodeWithScope(code_w_scope) => code_w_scope.serialize(serializer), + Bson::DbPointer(dbp) => dbp.serialize(serializer), + Bson::Symbol(s) => { + let mut state = serializer.serialize_struct("$symbol", 1)?; + state.serialize_field("$symbol", s)?; + state.end() } + Bson::RegularExpression(re) => re.serialize(serializer), + Bson::Timestamp(t) => t.serialize(serializer), + Bson::Decimal128(_) => todo!(), + Bson::Undefined => serializer.serialize_unit(), + Bson::MaxKey => { + let mut state = serializer.serialize_struct("$maxKey", 1)?; + state.serialize_field("$maxKey", &1)?; + state.end() + }, + Bson::MinKey => { + let mut state = serializer.serialize_struct("$minKey", 1)?; + state.serialize_field("$minKey", &1)?; + state.end() + }, + // Bson::Document(_) => { + // let doc = self.clone().into_extended_document(); + // doc.serialize(serializer) + // } } } } From 9e5894a50ac6b022769dc5e6ff915231897a8d79 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Mon, 19 Jul 2021 18:45:08 -0400 Subject: [PATCH 07/21] corpus working --- src/extjson/models.rs | 34 +++++++++-- src/lib.rs | 2 +- src/ser/mod.rs | 4 +- src/ser/raw.rs | 124 ++++++++++++++++++++++++--------------- src/ser/serde.rs | 83 ++++++++++++++++++-------- src/tests/spec/corpus.rs | 6 +- 6 files changed, 172 insertions(+), 81 deletions(-) diff --git a/src/extjson/models.rs b/src/extjson/models.rs index b08733f6..1d179b6c 100644 --- a/src/extjson/models.rs +++ b/src/extjson/models.rs @@ -3,6 +3,7 @@ use chrono::Utc; use serde::{ de::{Error, Unexpected}, + ser::SerializeStruct, Deserialize, Serialize, }; @@ -73,7 +74,7 @@ impl Double { } } -#[derive(Deserialize)] +#[derive(Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub(crate) struct ObjectId { #[serde(rename = "$oid")] @@ -87,6 +88,12 @@ impl ObjectId { } } +impl From for ObjectId { + fn from(id: crate::oid::ObjectId) -> Self { + Self { oid: id.to_hex() } + } +} + #[derive(Deserialize)] #[serde(deny_unknown_fields)] pub(crate) struct Symbol { @@ -101,7 +108,7 @@ pub(crate) struct Regex { body: RegexBody, } -#[derive(Deserialize)] +#[derive(Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub(crate) struct RegexBody { pub(crate) pattern: String, @@ -208,10 +215,13 @@ pub(crate) struct Timestamp { body: TimestampBody, } -#[derive(Deserialize)] +#[derive(Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub(crate) struct TimestampBody { + #[serde(serialize_with = "crate::serde_helpers::serialize_u32_as_i64")] pub(crate) t: u32, + + #[serde(serialize_with = "crate::serde_helpers::serialize_u32_as_i64")] pub(crate) i: u32, } @@ -224,6 +234,18 @@ impl Timestamp { } } +// impl Serialize for TimestampBody { +// fn serialize(&self, serializer: S) -> Result +// where +// S: serde::Serializer +// { +// let mut state = serializer.serialize_struct("TimestampBody", 2)?; +// state.serialize_field("t", Bson::from(self.t))?; +// state.serialize_field("i", Bson::from(self.i))?; +// state.end() +// } +// } + #[derive(Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub(crate) struct DateTime { @@ -240,7 +262,9 @@ pub(crate) enum DateTimeBody { impl DateTimeBody { pub(crate) fn from_millis(m: i64) -> Self { - DateTimeBody::Canonical(Int64 { value: m.to_string() }) + DateTimeBody::Canonical(Int64 { + value: m.to_string(), + }) } } @@ -314,7 +338,7 @@ pub(crate) struct DbPointer { body: DbPointerBody, } -#[derive(Deserialize)] +#[derive(Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub(crate) struct DbPointerBody { #[serde(rename = "$ref")] diff --git a/src/lib.rs b/src/lib.rs index 66cedd9e..86466c37 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -198,7 +198,7 @@ pub use self::{ Deserializer, }, decimal128::Decimal128, - ser::{to_bson, to_document, to_writer, to_vec, Serializer}, + ser::{to_bson, to_document, to_vec, to_writer, Serializer}, }; #[macro_use] diff --git a/src/ser/mod.rs b/src/ser/mod.rs index 1ef84307..645b2a70 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -165,9 +165,7 @@ pub(crate) fn serialize_bson( Bson::Int32(v) => write_i32(writer, v), Bson::Int64(v) => write_i64(writer, v), Bson::Timestamp(ts) => write_i64(writer, ts.to_le_i64()), - Bson::Binary(Binary { subtype, ref bytes }) => { - write_binary(writer, bytes, subtype) - } + Bson::Binary(Binary { subtype, ref bytes }) => write_binary(writer, bytes, subtype), Bson::DateTime(ref v) => write_i64(writer, v.timestamp_millis()), Bson::Null => Ok(()), Bson::Symbol(ref v) => write_string(writer, v), diff --git a/src/ser/raw.rs b/src/ser/raw.rs index 1097f59b..f0f58626 100644 --- a/src/ser/raw.rs +++ b/src/ser/raw.rs @@ -1,12 +1,19 @@ -use std::io::Write; +use std::{borrow::Borrow, convert::TryFrom, io::Write}; use serde::{ ser::{Error as SerdeError, Impossible, SerializeMap, SerializeStruct}, - Serialize, Serializer as SerdeSerializer, + Serialize, + Serializer as SerdeSerializer, }; use super::{write_cstring, write_f64, write_i32, write_i64, write_string, write_u8}; -use crate::{Document, oid::ObjectId, ser::{write_binary, Error, Result}, spec::{BinarySubtype, ElementType}}; +use crate::{ + oid::ObjectId, + ser::{write_binary, Error, Result}, + spec::{BinarySubtype, ElementType}, + Decimal128, + Document, +}; pub(crate) struct Serializer { bytes: Vec, @@ -43,10 +50,8 @@ impl Serializer { } fn replace_i32(&mut self, at: usize, with: i32) { - self.bytes.splice( - at..at + 4, - with.to_le_bytes().iter().cloned(), - ); + self.bytes + .splice(at..at + 4, with.to_le_bytes().iter().cloned()); } } @@ -234,6 +239,16 @@ impl<'a> serde::Serializer for &'a mut Serializer { "$oid" => ElementType::ObjectId, "$date" => ElementType::DateTime, "$binary" => ElementType::Binary, + "$timestamp" => ElementType::Timestamp, + "$minKey" => ElementType::MinKey, + "$maxKey" => ElementType::MaxKey, + "$code" => ElementType::JavaScriptCode, + "$codeWithScope" => ElementType::JavaScriptCodeWithScope, + "$symbol" => ElementType::Symbol, + "$undefined" => ElementType::Undefined, + "$regularExpression" => ElementType::RegularExpression, + "$dbPointer" => ElementType::DbPointer, + "$numberDecimal" => ElementType::Decimal128, _ => ElementType::EmbeddedDocument, }; @@ -295,7 +310,7 @@ impl<'a> DocumentSerializer<'a> { self.root_serializer.replace_i32(self.start, length); Ok(DocumentSerializationResult { length, - root_serializer: self.root_serializer + root_serializer: self.root_serializer, }) } } @@ -414,6 +429,8 @@ impl<'a> BsonTypeSerializer<'a> { ElementType::JavaScriptCodeWithScope => SerializationStep::CodeWithScopeCode, ElementType::MinKey => SerializationStep::MinKey, ElementType::MaxKey => SerializationStep::MaxKey, + ElementType::Decimal128 => SerializationStep::Decimal128, + ElementType::Undefined => SerializationStep::Undefined, _ => todo!(), }; @@ -449,19 +466,26 @@ impl<'a, 'b, 'c: 'a + 'b> serde::Serializer for &'b mut BsonTypeSerializer<'a> { } fn serialize_i32(self, v: i32) -> Result { + todo!() + } + + fn serialize_i64(self, v: i64) -> Result { match self.state { - SerializationStep::TimestampTime | SerializationStep::TimestampIncrement => { - write_i32(&mut self.root_serializer.bytes, v)?; + SerializationStep::TimestampTime => { + self.state = SerializationStep::TimestampIncrement { time: v }; + } + SerializationStep::TimestampIncrement { time } => { + let t = u32::try_from(time).map_err(Error::custom)?; + let i = u32::try_from(v).map_err(Error::custom)?; + + write_i32(&mut self.root_serializer.bytes, i as i32)?; + write_i32(&mut self.root_serializer.bytes, t as i32)?; } _ => todo!(), } Ok(()) } - fn serialize_i64(self, v: i64) -> Result { - todo!() - } - fn serialize_u8(self, v: u8) -> Result { todo!() } @@ -535,13 +559,24 @@ impl<'a, 'b, 'c: 'a + 'b> serde::Serializer for &'b mut BsonTypeSerializer<'a> { code: v.to_string(), }; } + #[cfg(feature = "decimal128")] + SerializationStep::Decimal128Value => { + let d = Decimal128::from_str(v); + self.root_serializer.write_all(d.to_raw_bytes_le())?; + } _ => todo!(), } Ok(()) } fn serialize_bytes(self, v: &[u8]) -> Result { - todo!() + match self.state { + SerializationStep::Decimal128Value => { + self.root_serializer.bytes.write_all(v)?; + Ok(()) + } + _ => todo!(), + } } fn serialize_none(self) -> Result { @@ -696,10 +731,10 @@ impl<'a, 'b> SerializeStruct for &'b mut BsonTypeSerializer<'a> { value.serialize(&mut **self)?; } (SerializationStep::TimestampTime, "t") => { + // state is updated in serialize value.serialize(&mut **self)?; - self.state = SerializationStep::TimestampIncrement; } - (SerializationStep::TimestampIncrement, "i") => { + (SerializationStep::TimestampIncrement { .. }, "i") => { value.serialize(&mut **self)?; self.state = SerializationStep::Done; } @@ -726,10 +761,22 @@ impl<'a, 'b> SerializeStruct for &'b mut BsonTypeSerializer<'a> { value.serialize(&mut **self)?; self.state = SerializationStep::Done; } - (SerializationStep::MinKey { .. }, "$minKey") => { + (SerializationStep::MinKey, "$minKey") => { + self.state = SerializationStep::Done; + } + (SerializationStep::MaxKey, "$maxKey") => { + self.state = SerializationStep::Done; + } + (SerializationStep::Undefined, "$undefined") => { self.state = SerializationStep::Done; } - (SerializationStep::MaxKey { .. }, "$maxKey") => { + (SerializationStep::Decimal128, "$numberDecimal") + | (SerializationStep::Decimal128, "$numberDecimalBytes") => { + self.state = SerializationStep::Decimal128Value; + value.serialize(&mut **self)?; + } + (SerializationStep::Decimal128Value, "$numberDecimal") => { + value.serialize(&mut **self)?; self.state = SerializationStep::Done; } (state, k) => panic!("bad combo: {:?} + {:?}", state, k), @@ -762,7 +809,7 @@ enum SerializationStep { Timestamp, TimestampTime, - TimestampIncrement, + TimestampIncrement { time: i64 }, DbPointer, DbPointerRef, @@ -777,6 +824,11 @@ enum SerializationStep { MaxKey, + Undefined, + + Decimal128, + Decimal128Value, + Done, } @@ -808,34 +860,16 @@ impl<'a> SerializeStruct for StructSerializer<'a> { where T: Serialize, { - // println!("got field: {}", key); match self { - // StructSerializer::Value { - // element_type, - // root_serializer, - // } => { - // match element_type { - // ElementType::ObjectId => { - // assert_eq!(key, "$oid"); - // } - // ElementType::DateTime => { - // assert_eq!(key, "$date"); - // } - // _ => todo!(), - // } - // let mut s = BsonTypeSerializer::new(&mut *root_serializer, *element_type); - // value.serialize(&mut s) - // } StructSerializer::Value(ref mut v) => (&mut *v).serialize_field(key, value), StructSerializer::Document(d) => d.serialize_field(key, value), } - // Ok(()) } fn end(self) -> Result { match self { StructSerializer::Document(d) => SerializeStruct::end(d), - _ => Ok(()), + StructSerializer::Value(mut v) => v.end(), } } } @@ -894,7 +928,8 @@ impl<'a> VariantSerializer<'a> { // null byte for the inner self.root_serializer.bytes.push(0); let arr_length = (self.root_serializer.bytes.len() - self.inner_start) as i32; - self.root_serializer.replace_i32(self.inner_start, arr_length); + self.root_serializer + .replace_i32(self.inner_start, arr_length); // null byte for document self.root_serializer.bytes.push(0); @@ -1105,7 +1140,6 @@ impl<'a> serde::Serializer for KeySerializer<'a> { } pub(crate) struct CodeWithScopeSerializer<'a> { - code_length: usize, start: usize, doc: DocumentSerializer<'a>, } @@ -1117,11 +1151,7 @@ impl<'a> CodeWithScopeSerializer<'a> { write_string(&mut rs.bytes, code)?; let doc = DocumentSerializer::start(rs)?; - Ok(Self { - code_length: code.len(), - start, - doc, - }) + Ok(Self { start, doc }) } } @@ -1146,7 +1176,7 @@ impl<'a> SerializeMap for CodeWithScopeSerializer<'a> { fn end(self) -> Result { let result = self.doc.end_doc()?; - let total_len = result.length + self.code_length as i32; + let total_len = (result.root_serializer.bytes.len() - self.start) as i32; result.root_serializer.replace_i32(self.start, total_len); Ok(()) } diff --git a/src/ser/serde.rs b/src/ser/serde.rs index 197fb90e..3d200561 100644 --- a/src/ser/serde.rs +++ b/src/ser/serde.rs @@ -1,7 +1,15 @@ use serde::ser::{ - self, Serialize, SerializeMap, SerializeSeq, SerializeStruct, SerializeStructVariant, - SerializeTuple, SerializeTupleStruct, SerializeTupleVariant, + self, + Serialize, + SerializeMap, + SerializeSeq, + SerializeStruct, + SerializeStructVariant, + SerializeTuple, + SerializeTupleStruct, + SerializeTupleVariant, }; +use serde_bytes::Bytes; #[cfg(feature = "decimal128")] use crate::decimal128::Decimal128; @@ -63,7 +71,7 @@ impl Serialize for Bson { let mut state = serializer.serialize_struct("$code", 1)?; state.serialize_field("$code", c)?; state.end() - }, + } Bson::JavaScriptCodeWithScope(code_w_scope) => code_w_scope.serialize(serializer), Bson::DbPointer(dbp) => dbp.serialize(serializer), Bson::Symbol(s) => { @@ -73,22 +81,29 @@ impl Serialize for Bson { } Bson::RegularExpression(re) => re.serialize(serializer), Bson::Timestamp(t) => t.serialize(serializer), - Bson::Decimal128(_) => todo!(), - Bson::Undefined => serializer.serialize_unit(), + #[cfg(not(feature = "decimal128"))] + Bson::Decimal128(d) => { + let mut state = serializer.serialize_struct("$numberDecimal", 1)?; + state.serialize_field("$numberDecimalBytes", Bytes::new(&d.bytes))?; + state.end() + } + #[cfg(feature = "decimal128")] + Bson::Decimal128(d) => d.serialize(serializer), + Bson::Undefined => { + let mut state = serializer.serialize_struct("$undefined", 1)?; + state.serialize_field("$undefined", &1)?; + state.end() + } Bson::MaxKey => { let mut state = serializer.serialize_struct("$maxKey", 1)?; state.serialize_field("$maxKey", &1)?; state.end() - }, + } Bson::MinKey => { let mut state = serializer.serialize_struct("$minKey", 1)?; state.serialize_field("$minKey", &1)?; state.end() - }, - // Bson::Document(_) => { - // let doc = self.clone().into_extended_document(); - // doc.serialize(serializer) - // } + } } } } @@ -525,8 +540,13 @@ impl Serialize for Timestamp { where S: ser::Serializer, { - let value = Bson::Timestamp(*self); - value.serialize(serializer) + let mut state = serializer.serialize_struct("$timestamp", 1)?; + let body = extjson::models::TimestampBody { + t: self.time, + i: self.increment, + }; + state.serialize_field("$timestamp", &body)?; + state.end() } } @@ -536,8 +556,15 @@ impl Serialize for Regex { where S: ser::Serializer, { - let value = Bson::RegularExpression(self.clone()); - value.serialize(serializer) + // let value = Bson::RegularExpression(self.clone()); + // value.serialize(serializer) + let mut state = serializer.serialize_struct("$regularExpression", 1)?; + let body = extjson::models::RegexBody { + pattern: self.pattern.clone(), + options: self.options.clone(), + }; + state.serialize_field("$regularExpression", &body)?; + state.end() } } @@ -547,8 +574,10 @@ impl Serialize for JavaScriptCodeWithScope { where S: ser::Serializer, { - let value = Bson::JavaScriptCodeWithScope(self.clone()); - value.serialize(serializer) + let mut state = serializer.serialize_struct("$codeWithScope", 2)?; + state.serialize_field("$code", &self.code)?; + state.serialize_field("$scope", &self.scope)?; + state.end() } } @@ -579,8 +608,12 @@ impl Serialize for Decimal128 { where S: ser::Serializer, { - let value = Bson::Decimal128(self.clone()); - value.serialize(serializer) + let mut state = serializer.serialize_struct()?; + let body = extjson::models::Decimal128 { + value: self.to_string(), + }; + state.serialize_field("$numberDecimal", body)?; + state.end() } } @@ -590,9 +623,6 @@ impl Serialize for DateTime { where S: ser::Serializer, { - // Cloning a `DateTime` is extremely cheap - // let value = Bson::DateTime(*self); - // value.serialize(serializer) let mut state = serializer.serialize_struct("$date", 1)?; let body = extjson::models::DateTimeBody::from_millis(self.timestamp_millis()); state.serialize_field("$date", &body)?; @@ -606,7 +636,12 @@ impl Serialize for DbPointer { where S: ser::Serializer, { - let value = Bson::DbPointer(self.clone()); - value.serialize(serializer) + let mut state = serializer.serialize_struct("$dbPointer", 1)?; + let body = extjson::models::DbPointerBody { + ref_ns: self.namespace.clone(), + id: self.id.into(), + }; + state.serialize_field("$dbPointer", &body)?; + state.end() } } diff --git a/src/tests/spec/corpus.rs b/src/tests/spec/corpus.rs index 75f2342c..b23494d0 100644 --- a/src/tests/spec/corpus.rs +++ b/src/tests/spec/corpus.rs @@ -86,7 +86,11 @@ fn run_test(test: TestFile) { .expect(&description); let mut native_to_bson_serde_bson_to_native_cb = Vec::new(); - crate::to_writer(&bson_to_native_cb, &mut native_to_bson_serde_bson_to_native_cb).expect(&description); + crate::to_writer( + &bson_to_native_cb, + &mut native_to_bson_serde_bson_to_native_cb, + ) + .expect(&description); // native_to_bson( bson_to_native(cB) ) = cB From db62358bb5c12e374455088786f3ae0c6ae0eb7f Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Mon, 19 Jul 2021 18:49:56 -0400 Subject: [PATCH 08/21] move doc serializer to own file --- src/ser/raw/document_serializer.rs | 148 +++++++++++++++++++++++++++++ src/ser/{raw.rs => raw/mod.rs} | 145 +--------------------------- 2 files changed, 151 insertions(+), 142 deletions(-) create mode 100644 src/ser/raw/document_serializer.rs rename src/ser/{raw.rs => raw/mod.rs} (89%) diff --git a/src/ser/raw/document_serializer.rs b/src/ser/raw/document_serializer.rs new file mode 100644 index 00000000..a353fb94 --- /dev/null +++ b/src/ser/raw/document_serializer.rs @@ -0,0 +1,148 @@ +use crate::ser::write_i32; +use crate::ser::Result; +use crate::ser::Error; + +use super::Serializer; + +pub(super) struct DocumentSerializationResult<'a> { + pub(super) length: i32, + pub(super) root_serializer: &'a mut Serializer, +} + +pub(super) struct DocumentSerializer<'a> { + root_serializer: &'a mut Serializer, + num_keys_serialized: usize, + start: usize, +} + +impl<'a> DocumentSerializer<'a> { + pub(super) fn start(rs: &'a mut Serializer) -> crate::ser::Result { + let start = rs.bytes.len(); + write_i32(&mut rs.bytes, 0)?; + Ok(Self { + root_serializer: rs, + num_keys_serialized: 0, + start, + }) + } + + fn serialize_doc_key(&mut self, key: &T) -> Result<()> + where + T: serde::Serialize + ?Sized, + { + // push a dummy element type for now, will update this once we serialize the value + self.root_serializer.type_index = self.root_serializer.bytes.len(); + self.root_serializer.bytes.push(0); + key.serialize(KeySerializer { + root_serializer: &mut *self.root_serializer, + })?; + + self.num_keys_serialized += 1; + Ok(()) + } + + pub(super) fn end_doc(self) -> crate::ser::Result> { + self.root_serializer.bytes.push(0); + let length = (self.root_serializer.bytes.len() - self.start) as i32; + self.root_serializer.replace_i32(self.start, length); + Ok(DocumentSerializationResult { + length, + root_serializer: self.root_serializer, + }) + } +} + +impl<'a> serde::ser::SerializeSeq for DocumentSerializer<'a> { + type Ok = (); + type Error = Error; + + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize, + { + self.serialize_doc_key(&self.num_keys_serialized.to_string())?; + value.serialize(&mut *self.root_serializer) + } + + fn end(self) -> Result { + self.end_doc().map(|_| ()) + } +} + +impl<'a> serde::ser::SerializeMap for DocumentSerializer<'a> { + type Ok = (); + + type Error = Error; + + fn serialize_key(&mut self, key: &T) -> Result<()> + where + T: serde::Serialize, + { + self.serialize_doc_key(key) + } + + fn serialize_value(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize, + { + value.serialize(&mut *self.root_serializer) + } + + fn end(self) -> Result { + self.end_doc().map(|_| ()) + } +} + +impl<'a> serde::ser::SerializeStruct for DocumentSerializer<'a> { + type Ok = (); + + type Error = Error; + + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: serde::Serialize, + { + self.serialize_doc_key(key)?; + value.serialize(&mut *self.root_serializer) + } + + fn end(self) -> Result { + self.end_doc().map(|_| ()) + } +} + +impl<'a> serde::ser::SerializeTuple for DocumentSerializer<'a> { + type Ok = (); + + type Error = Error; + + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize, + { + self.serialize_doc_key(&self.num_keys_serialized.to_string())?; + value.serialize(&mut *self.root_serializer) + } + + fn end(self) -> Result { + self.end_doc().map(|_| ()) + } +} + +impl<'a> serde::ser::SerializeTupleStruct for DocumentSerializer<'a> { + type Ok = (); + + type Error = Error; + + fn serialize_field(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize, + { + self.serialize_doc_key(&self.num_keys_serialized.to_string())?; + value.serialize(&mut *self.root_serializer) + } + + fn end(self) -> Result { + self.end_doc().map(|_| ()) + } +} diff --git a/src/ser/raw.rs b/src/ser/raw/mod.rs similarity index 89% rename from src/ser/raw.rs rename to src/ser/raw/mod.rs index f0f58626..da84e098 100644 --- a/src/ser/raw.rs +++ b/src/ser/raw/mod.rs @@ -1,3 +1,5 @@ +mod document; + use std::{borrow::Borrow, convert::TryFrom, io::Write}; use serde::{ @@ -14,6 +16,7 @@ use crate::{ Decimal128, Document, }; +use document::DocumentSerializer; pub(crate) struct Serializer { bytes: Vec, @@ -267,148 +270,6 @@ impl<'a> serde::Serializer for &'a mut Serializer { } } -struct DocumentSerializationResult<'a> { - length: i32, - root_serializer: &'a mut Serializer, -} - -pub(crate) struct DocumentSerializer<'a> { - root_serializer: &'a mut Serializer, - num_keys_serialized: usize, - start: usize, -} - -impl<'a> DocumentSerializer<'a> { - fn start(rs: &'a mut Serializer) -> crate::ser::Result { - let start = rs.bytes.len(); - write_i32(&mut rs.bytes, 0)?; - Ok(Self { - root_serializer: rs, - num_keys_serialized: 0, - start, - }) - } - - fn serialize_doc_key(&mut self, key: &T) -> Result<()> - where - T: serde::Serialize + ?Sized, - { - // push a dummy element type for now, will update this once we serialize the value - self.root_serializer.type_index = self.root_serializer.bytes.len(); - self.root_serializer.bytes.push(0); - key.serialize(KeySerializer { - root_serializer: &mut *self.root_serializer, - })?; - - self.num_keys_serialized += 1; - Ok(()) - } - - fn end_doc(self) -> crate::ser::Result> { - self.root_serializer.bytes.push(0); - let length = (self.root_serializer.bytes.len() - self.start) as i32; - self.root_serializer.replace_i32(self.start, length); - Ok(DocumentSerializationResult { - length, - root_serializer: self.root_serializer, - }) - } -} - -impl<'a> serde::ser::SerializeSeq for DocumentSerializer<'a> { - type Ok = (); - type Error = Error; - - fn serialize_element(&mut self, value: &T) -> Result<()> - where - T: serde::Serialize, - { - self.serialize_doc_key(&self.num_keys_serialized.to_string())?; - value.serialize(&mut *self.root_serializer) - } - - fn end(self) -> Result { - self.end_doc().map(|_| ()) - } -} - -impl<'a> serde::ser::SerializeMap for DocumentSerializer<'a> { - type Ok = (); - - type Error = Error; - - fn serialize_key(&mut self, key: &T) -> Result<()> - where - T: serde::Serialize, - { - self.serialize_doc_key(key) - } - - fn serialize_value(&mut self, value: &T) -> Result<()> - where - T: serde::Serialize, - { - value.serialize(&mut *self.root_serializer) - } - - fn end(self) -> Result { - self.end_doc().map(|_| ()) - } -} - -impl<'a> serde::ser::SerializeStruct for DocumentSerializer<'a> { - type Ok = (); - - type Error = Error; - - fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> - where - T: serde::Serialize, - { - self.serialize_doc_key(key)?; - value.serialize(&mut *self.root_serializer) - } - - fn end(self) -> Result { - self.end_doc().map(|_| ()) - } -} - -impl<'a> serde::ser::SerializeTuple for DocumentSerializer<'a> { - type Ok = (); - - type Error = Error; - - fn serialize_element(&mut self, value: &T) -> Result<()> - where - T: serde::Serialize, - { - self.serialize_doc_key(&self.num_keys_serialized.to_string())?; - value.serialize(&mut *self.root_serializer) - } - - fn end(self) -> Result { - self.end_doc().map(|_| ()) - } -} - -impl<'a> serde::ser::SerializeTupleStruct for DocumentSerializer<'a> { - type Ok = (); - - type Error = Error; - - fn serialize_field(&mut self, value: &T) -> Result<()> - where - T: serde::Serialize, - { - self.serialize_doc_key(&self.num_keys_serialized.to_string())?; - value.serialize(&mut *self.root_serializer) - } - - fn end(self) -> Result { - self.end_doc().map(|_| ()) - } -} pub(crate) struct BsonTypeSerializer<'a> { root_serializer: &'a mut Serializer, From 48143e8dd5e11ad363bb30b77796ec008ed5cdeb Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Mon, 19 Jul 2021 19:26:56 -0400 Subject: [PATCH 09/21] move value serializer to its own file --- src/ser/raw/document_serializer.rs | 182 +++++++- src/ser/raw/mod.rs | 647 +---------------------------- src/ser/raw/value_serializer.rs | 491 ++++++++++++++++++++++ 3 files changed, 674 insertions(+), 646 deletions(-) create mode 100644 src/ser/raw/value_serializer.rs diff --git a/src/ser/raw/document_serializer.rs b/src/ser/raw/document_serializer.rs index a353fb94..6f297ff6 100644 --- a/src/ser/raw/document_serializer.rs +++ b/src/ser/raw/document_serializer.rs @@ -1,22 +1,26 @@ +use serde::Serialize; +use serde::ser::Impossible; + +use crate::ser::write_cstring; use crate::ser::write_i32; use crate::ser::Result; use crate::ser::Error; use super::Serializer; -pub(super) struct DocumentSerializationResult<'a> { - pub(super) length: i32, - pub(super) root_serializer: &'a mut Serializer, +pub(crate) struct DocumentSerializationResult<'a> { + pub(crate) length: i32, + pub(crate) root_serializer: &'a mut Serializer, } -pub(super) struct DocumentSerializer<'a> { +pub(crate) struct DocumentSerializer<'a> { root_serializer: &'a mut Serializer, num_keys_serialized: usize, start: usize, } impl<'a> DocumentSerializer<'a> { - pub(super) fn start(rs: &'a mut Serializer) -> crate::ser::Result { + pub(crate) fn start(rs: &'a mut Serializer) -> crate::ser::Result { let start = rs.bytes.len(); write_i32(&mut rs.bytes, 0)?; Ok(Self { @@ -41,7 +45,7 @@ impl<'a> DocumentSerializer<'a> { Ok(()) } - pub(super) fn end_doc(self) -> crate::ser::Result> { + pub(crate) fn end_doc(self) -> crate::ser::Result> { self.root_serializer.bytes.push(0); let length = (self.root_serializer.bytes.len() - self.start) as i32; self.root_serializer.replace_i32(self.start, length); @@ -146,3 +150,169 @@ impl<'a> serde::ser::SerializeTupleStruct for DocumentSerializer<'a> { self.end_doc().map(|_| ()) } } + +struct KeySerializer<'a> { + root_serializer: &'a mut Serializer, +} + +impl<'a> serde::Serializer for KeySerializer<'a> { + type Ok = (); + + type Error = Error; + + type SerializeSeq = Impossible<(), Error>; + type SerializeTuple = Impossible<(), Error>; + type SerializeTupleStruct = Impossible<(), Error>; + type SerializeTupleVariant = Impossible<(), Error>; + type SerializeMap = Impossible<(), Error>; + type SerializeStruct = Impossible<(), Error>; + type SerializeStructVariant = Impossible<(), Error>; + + fn serialize_bool(self, v: bool) -> Result { + todo!() + } + + fn serialize_i8(self, v: i8) -> Result { + todo!() + } + + fn serialize_i16(self, v: i16) -> Result { + todo!() + } + + fn serialize_i32(self, v: i32) -> Result { + todo!() + } + + fn serialize_i64(self, v: i64) -> Result { + todo!() + } + + fn serialize_u8(self, v: u8) -> Result { + todo!() + } + + fn serialize_u16(self, v: u16) -> Result { + todo!() + } + + fn serialize_u32(self, v: u32) -> Result { + todo!() + } + + fn serialize_u64(self, v: u64) -> Result { + todo!() + } + + fn serialize_f32(self, v: f32) -> Result { + todo!() + } + + fn serialize_f64(self, v: f64) -> Result { + todo!() + } + + fn serialize_char(self, v: char) -> Result { + todo!() + } + + fn serialize_str(self, v: &str) -> Result { + write_cstring(&mut self.root_serializer.bytes, v) + } + + fn serialize_bytes(self, v: &[u8]) -> Result { + todo!() + } + + fn serialize_none(self) -> Result { + todo!() + } + + fn serialize_some(self, value: &T) -> Result + where + T: Serialize, + { + todo!() + } + + fn serialize_unit(self) -> Result { + todo!() + } + + fn serialize_unit_struct(self, name: &'static str) -> Result { + todo!() + } + + fn serialize_unit_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + ) -> Result { + todo!() + } + + fn serialize_newtype_struct(self, name: &'static str, value: &T) -> Result + where + T: Serialize, + { + todo!() + } + + fn serialize_newtype_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + value: &T, + ) -> Result + where + T: Serialize, + { + todo!() + } + + fn serialize_seq(self, len: Option) -> Result { + todo!() + } + + fn serialize_tuple(self, len: usize) -> Result { + todo!() + } + + fn serialize_tuple_struct( + self, + name: &'static str, + len: usize, + ) -> Result { + todo!() + } + + fn serialize_tuple_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result { + todo!() + } + + fn serialize_map(self, len: Option) -> Result { + todo!() + } + + fn serialize_struct(self, name: &'static str, len: usize) -> Result { + todo!() + } + + fn serialize_struct_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result { + todo!() + } +} diff --git a/src/ser/raw/mod.rs b/src/ser/raw/mod.rs index da84e098..51878343 100644 --- a/src/ser/raw/mod.rs +++ b/src/ser/raw/mod.rs @@ -1,4 +1,5 @@ -mod document; +mod document_serializer; +mod value_serializer; use std::{borrow::Borrow, convert::TryFrom, io::Write}; @@ -8,6 +9,8 @@ use serde::{ Serializer as SerdeSerializer, }; +use self::value_serializer::ValueSerializer; + use super::{write_cstring, write_f64, write_i32, write_i64, write_string, write_u8}; use crate::{ oid::ObjectId, @@ -16,7 +19,7 @@ use crate::{ Decimal128, Document, }; -use document::DocumentSerializer; +use document_serializer::DocumentSerializer; pub(crate) struct Serializer { bytes: Vec, @@ -270,436 +273,8 @@ impl<'a> serde::Serializer for &'a mut Serializer { } } - -pub(crate) struct BsonTypeSerializer<'a> { - root_serializer: &'a mut Serializer, - state: SerializationStep, -} - -impl<'a> BsonTypeSerializer<'a> { - fn new(rs: &'a mut Serializer, element_type: ElementType) -> Self { - let state = match element_type { - ElementType::DateTime => SerializationStep::DateTime, - ElementType::Binary => SerializationStep::Binary, - ElementType::ObjectId => SerializationStep::Oid, - ElementType::Symbol => SerializationStep::Symbol, - ElementType::RegularExpression => SerializationStep::RegEx, - ElementType::Timestamp => SerializationStep::Timestamp, - ElementType::DbPointer => SerializationStep::DbPointer, - ElementType::JavaScriptCode => SerializationStep::Code, - ElementType::JavaScriptCodeWithScope => SerializationStep::CodeWithScopeCode, - ElementType::MinKey => SerializationStep::MinKey, - ElementType::MaxKey => SerializationStep::MaxKey, - ElementType::Decimal128 => SerializationStep::Decimal128, - ElementType::Undefined => SerializationStep::Undefined, - - _ => todo!(), - }; - Self { - root_serializer: rs, - state, - } - } -} - -impl<'a, 'b, 'c: 'a + 'b> serde::Serializer for &'b mut BsonTypeSerializer<'a> { - type Ok = (); - type Error = Error; - - type SerializeSeq = Impossible<(), Error>; - type SerializeTuple = Impossible<(), Error>; - type SerializeTupleStruct = Impossible<(), Error>; - type SerializeTupleVariant = Impossible<(), Error>; - type SerializeMap = CodeWithScopeSerializer<'b>; - type SerializeStruct = Self; - type SerializeStructVariant = Impossible<(), Error>; - - fn serialize_bool(self, v: bool) -> Result { - todo!() - } - - fn serialize_i8(self, v: i8) -> Result { - todo!() - } - - fn serialize_i16(self, v: i16) -> Result { - todo!() - } - - fn serialize_i32(self, v: i32) -> Result { - todo!() - } - - fn serialize_i64(self, v: i64) -> Result { - match self.state { - SerializationStep::TimestampTime => { - self.state = SerializationStep::TimestampIncrement { time: v }; - } - SerializationStep::TimestampIncrement { time } => { - let t = u32::try_from(time).map_err(Error::custom)?; - let i = u32::try_from(v).map_err(Error::custom)?; - - write_i32(&mut self.root_serializer.bytes, i as i32)?; - write_i32(&mut self.root_serializer.bytes, t as i32)?; - } - _ => todo!(), - } - Ok(()) - } - - fn serialize_u8(self, v: u8) -> Result { - todo!() - } - - fn serialize_u16(self, v: u16) -> Result { - todo!() - } - - fn serialize_u32(self, v: u32) -> Result { - todo!() - } - - fn serialize_u64(self, v: u64) -> Result { - todo!() - } - - fn serialize_f32(self, v: f32) -> Result { - todo!() - } - - fn serialize_f64(self, v: f64) -> Result { - todo!() - } - - fn serialize_char(self, v: char) -> Result { - todo!() - } - - fn serialize_str(self, v: &str) -> Result { - // match self.bson_type { - // ElementType::ObjectId => { - // let oid = ObjectId::parse_str(v).map_err(Error::custom)?; - // self.root_serializer.bytes.write_all(&oid.bytes())?; - // } - // _ => todo!(), - // } - - match &self.state { - SerializationStep::DateTimeNumberLong => { - let millis: i64 = v.parse().map_err(Error::custom)?; - write_i64(&mut self.root_serializer.bytes, millis)?; - } - SerializationStep::Oid => { - let oid = ObjectId::parse_str(v).map_err(Error::custom)?; - self.root_serializer.bytes.write_all(&oid.bytes())?; - } - SerializationStep::BinaryBase64 => { - self.state = SerializationStep::BinarySubType { - base64: v.to_string(), - }; - } - SerializationStep::BinarySubType { base64 } => { - let subtype_byte = hex::decode(v).map_err(Error::custom)?; - let subtype: BinarySubtype = subtype_byte[0].into(); - - let bytes = base64::decode(base64.as_str()).map_err(Error::custom)?; - - write_binary(&mut self.root_serializer.bytes, bytes.as_slice(), subtype)?; - } - SerializationStep::Symbol | SerializationStep::DbPointerRef => { - write_string(&mut self.root_serializer.bytes, v)?; - } - SerializationStep::RegExPattern | SerializationStep::RegExOptions => { - write_cstring(&mut self.root_serializer.bytes, v)?; - } - SerializationStep::Code => { - write_string(&mut self.root_serializer.bytes, v)?; - } - SerializationStep::CodeWithScopeCode => { - self.state = SerializationStep::CodeWithScopeScope { - code: v.to_string(), - }; - } - #[cfg(feature = "decimal128")] - SerializationStep::Decimal128Value => { - let d = Decimal128::from_str(v); - self.root_serializer.write_all(d.to_raw_bytes_le())?; - } - _ => todo!(), - } - Ok(()) - } - - fn serialize_bytes(self, v: &[u8]) -> Result { - match self.state { - SerializationStep::Decimal128Value => { - self.root_serializer.bytes.write_all(v)?; - Ok(()) - } - _ => todo!(), - } - } - - fn serialize_none(self) -> Result { - todo!() - } - - fn serialize_some(self, value: &T) -> Result - where - T: Serialize, - { - todo!() - } - - fn serialize_unit(self) -> Result { - todo!() - } - - fn serialize_unit_struct(self, name: &'static str) -> Result { - todo!() - } - - fn serialize_unit_variant( - self, - name: &'static str, - variant_index: u32, - variant: &'static str, - ) -> Result { - todo!() - } - - fn serialize_newtype_struct(self, name: &'static str, value: &T) -> Result - where - T: Serialize, - { - todo!() - } - - fn serialize_newtype_variant( - self, - name: &'static str, - variant_index: u32, - variant: &'static str, - value: &T, - ) -> Result - where - T: Serialize, - { - todo!() - } - - fn serialize_seq(self, len: Option) -> Result { - todo!() - } - - fn serialize_tuple(self, len: usize) -> Result { - todo!() - } - - fn serialize_tuple_struct( - self, - name: &'static str, - len: usize, - ) -> Result { - todo!() - } - - fn serialize_tuple_variant( - self, - name: &'static str, - variant_index: u32, - variant: &'static str, - len: usize, - ) -> Result { - todo!() - } - - fn serialize_map(self, len: Option) -> Result { - match self.state { - SerializationStep::CodeWithScopeScope { ref code } => { - CodeWithScopeSerializer::start(code.as_str(), self.root_serializer) - } - _ => todo!(), - } - } - - fn serialize_struct(self, name: &'static str, len: usize) -> Result { - Ok(self) - } - - fn serialize_struct_variant( - self, - name: &'static str, - variant_index: u32, - variant: &'static str, - len: usize, - ) -> Result { - todo!() - } -} - -impl<'a, 'b> SerializeStruct for &'b mut BsonTypeSerializer<'a> { - type Ok = (); - type Error = Error; - - fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> - where - T: Serialize, - { - match (&self.state, key) { - (SerializationStep::DateTime, "$date") => { - self.state = SerializationStep::DateTimeNumberLong; - value.serialize(&mut **self)?; - } - (SerializationStep::DateTimeNumberLong, "$numberLong") => { - value.serialize(&mut **self)?; - self.state = SerializationStep::Done; - } - (SerializationStep::Oid, "$oid") => { - value.serialize(&mut **self)?; - self.state = SerializationStep::Done; - } - (SerializationStep::Binary, "$binary") => { - self.state = SerializationStep::BinaryBase64; - value.serialize(&mut **self)?; - } - (SerializationStep::BinaryBase64, "base64") => { - // state is updated in serialize - value.serialize(&mut **self)?; - } - (SerializationStep::BinarySubType { .. }, "subType") => { - value.serialize(&mut **self)?; - self.state = SerializationStep::Done; - } - (SerializationStep::Symbol, "$symbol") => { - value.serialize(&mut **self)?; - self.state = SerializationStep::Done; - } - (SerializationStep::RegEx, "$regularExpression") => { - self.state = SerializationStep::RegExPattern; - value.serialize(&mut **self)?; - } - (SerializationStep::RegExPattern, "pattern") => { - value.serialize(&mut **self)?; - self.state = SerializationStep::RegExOptions; - } - (SerializationStep::RegExOptions, "options") => { - value.serialize(&mut **self)?; - self.state = SerializationStep::Done; - } - (SerializationStep::Timestamp, "$timestamp") => { - self.state = SerializationStep::TimestampTime; - value.serialize(&mut **self)?; - } - (SerializationStep::TimestampTime, "t") => { - // state is updated in serialize - value.serialize(&mut **self)?; - } - (SerializationStep::TimestampIncrement { .. }, "i") => { - value.serialize(&mut **self)?; - self.state = SerializationStep::Done; - } - (SerializationStep::DbPointer, "$dbPointer") => { - self.state = SerializationStep::DbPointerRef; - value.serialize(&mut **self)?; - } - (SerializationStep::DbPointerRef, "$ref") => { - value.serialize(&mut **self)?; - self.state = SerializationStep::DbPointerId; - } - (SerializationStep::DbPointerId, "$id") => { - self.state = SerializationStep::Oid; - value.serialize(&mut **self)?; - } - (SerializationStep::Code, "$code") => { - value.serialize(&mut **self)?; - self.state = SerializationStep::Done; - } - (SerializationStep::CodeWithScopeCode, "$code") => { - value.serialize(&mut **self)?; - } - (SerializationStep::CodeWithScopeScope { .. }, "$scope") => { - value.serialize(&mut **self)?; - self.state = SerializationStep::Done; - } - (SerializationStep::MinKey, "$minKey") => { - self.state = SerializationStep::Done; - } - (SerializationStep::MaxKey, "$maxKey") => { - self.state = SerializationStep::Done; - } - (SerializationStep::Undefined, "$undefined") => { - self.state = SerializationStep::Done; - } - (SerializationStep::Decimal128, "$numberDecimal") - | (SerializationStep::Decimal128, "$numberDecimalBytes") => { - self.state = SerializationStep::Decimal128Value; - value.serialize(&mut **self)?; - } - (SerializationStep::Decimal128Value, "$numberDecimal") => { - value.serialize(&mut **self)?; - self.state = SerializationStep::Done; - } - (state, k) => panic!("bad combo: {:?} + {:?}", state, k), - } - - Ok(()) - } - - fn end(self) -> Result { - Ok(()) - } -} - -#[derive(Debug)] -enum SerializationStep { - Oid, - - DateTime, - DateTimeNumberLong, - - Binary, - BinaryBase64, - BinarySubType { base64: String }, - - Symbol, - - RegEx, - RegExPattern, - RegExOptions, - - Timestamp, - TimestampTime, - TimestampIncrement { time: i64 }, - - DbPointer, - DbPointerRef, - DbPointerId, - - Code, - - CodeWithScopeCode, - CodeWithScopeScope { code: String }, - - MinKey, - - MaxKey, - - Undefined, - - Decimal128, - Decimal128Value, - - Done, -} - -// pub(crate) struct StructSerializer<'a> { -// root_serializer: &'a mut Serializer, -// bson_type: ElementType -// } - pub(crate) enum StructSerializer<'a> { - Value(BsonTypeSerializer<'a>), + Value(ValueSerializer<'a>), Document(DocumentSerializer<'a>), } @@ -708,7 +283,7 @@ impl<'a> StructSerializer<'a> { if let ElementType::EmbeddedDocument = element_type { Ok(Self::Document(DocumentSerializer::start(rs)?)) } else { - Ok(Self::Value(BsonTypeSerializer::new(rs, element_type))) + Ok(Self::Value(ValueSerializer::new(rs, element_type))) } } } @@ -834,214 +409,6 @@ impl<'a> serde::ser::SerializeStructVariant for VariantSerializer<'a> { } } -struct KeySerializer<'a> { - root_serializer: &'a mut Serializer, -} - -impl<'a> serde::Serializer for KeySerializer<'a> { - type Ok = (); - - type Error = Error; - - type SerializeSeq = Impossible<(), Error>; - type SerializeTuple = Impossible<(), Error>; - type SerializeTupleStruct = Impossible<(), Error>; - type SerializeTupleVariant = Impossible<(), Error>; - type SerializeMap = Impossible<(), Error>; - type SerializeStruct = Impossible<(), Error>; - type SerializeStructVariant = Impossible<(), Error>; - - fn serialize_bool(self, v: bool) -> Result { - todo!() - } - - fn serialize_i8(self, v: i8) -> Result { - todo!() - } - - fn serialize_i16(self, v: i16) -> Result { - todo!() - } - - fn serialize_i32(self, v: i32) -> Result { - todo!() - } - - fn serialize_i64(self, v: i64) -> Result { - todo!() - } - - fn serialize_u8(self, v: u8) -> Result { - todo!() - } - - fn serialize_u16(self, v: u16) -> Result { - todo!() - } - - fn serialize_u32(self, v: u32) -> Result { - todo!() - } - - fn serialize_u64(self, v: u64) -> Result { - todo!() - } - - fn serialize_f32(self, v: f32) -> Result { - todo!() - } - - fn serialize_f64(self, v: f64) -> Result { - todo!() - } - - fn serialize_char(self, v: char) -> Result { - todo!() - } - - fn serialize_str(self, v: &str) -> Result { - write_cstring(&mut self.root_serializer.bytes, v) - } - - fn serialize_bytes(self, v: &[u8]) -> Result { - todo!() - } - - fn serialize_none(self) -> Result { - todo!() - } - - fn serialize_some(self, value: &T) -> Result - where - T: Serialize, - { - todo!() - } - - fn serialize_unit(self) -> Result { - todo!() - } - - fn serialize_unit_struct(self, name: &'static str) -> Result { - todo!() - } - - fn serialize_unit_variant( - self, - name: &'static str, - variant_index: u32, - variant: &'static str, - ) -> Result { - todo!() - } - - fn serialize_newtype_struct(self, name: &'static str, value: &T) -> Result - where - T: Serialize, - { - todo!() - } - - fn serialize_newtype_variant( - self, - name: &'static str, - variant_index: u32, - variant: &'static str, - value: &T, - ) -> Result - where - T: Serialize, - { - todo!() - } - - fn serialize_seq(self, len: Option) -> Result { - todo!() - } - - fn serialize_tuple(self, len: usize) -> Result { - todo!() - } - - fn serialize_tuple_struct( - self, - name: &'static str, - len: usize, - ) -> Result { - todo!() - } - - fn serialize_tuple_variant( - self, - name: &'static str, - variant_index: u32, - variant: &'static str, - len: usize, - ) -> Result { - todo!() - } - - fn serialize_map(self, len: Option) -> Result { - todo!() - } - - fn serialize_struct(self, name: &'static str, len: usize) -> Result { - todo!() - } - - fn serialize_struct_variant( - self, - name: &'static str, - variant_index: u32, - variant: &'static str, - len: usize, - ) -> Result { - todo!() - } -} - -pub(crate) struct CodeWithScopeSerializer<'a> { - start: usize, - doc: DocumentSerializer<'a>, -} - -impl<'a> CodeWithScopeSerializer<'a> { - fn start(code: &str, rs: &'a mut Serializer) -> Result { - let start = rs.bytes.len(); - write_i32(&mut rs.bytes, 0)?; // placeholder length - write_string(&mut rs.bytes, code)?; - - let doc = DocumentSerializer::start(rs)?; - Ok(Self { start, doc }) - } -} - -impl<'a> SerializeMap for CodeWithScopeSerializer<'a> { - type Ok = (); - type Error = Error; - - fn serialize_key(&mut self, key: &T) -> Result<()> - where - T: Serialize, - { - self.doc.serialize_key(key) - } - - fn serialize_value(&mut self, value: &T) -> Result<()> - where - T: Serialize, - { - self.doc.serialize_value(value) - } - - fn end(self) -> Result { - let result = self.doc.end_doc()?; - - let total_len = (result.root_serializer.bytes.len() - self.start) as i32; - result.root_serializer.replace_i32(self.start, total_len); - Ok(()) - } -} #[cfg(test)] mod test { diff --git a/src/ser/raw/value_serializer.rs b/src/ser/raw/value_serializer.rs new file mode 100644 index 00000000..f0285b74 --- /dev/null +++ b/src/ser/raw/value_serializer.rs @@ -0,0 +1,491 @@ +use std::convert::TryFrom; +use std::io::Write; + +use serde::ser::{Error as SerdeError, Impossible, SerializeMap, SerializeStruct}; +use serde::Serialize; + +use crate::oid::ObjectId; +use crate::ser::Result; +use crate::ser::{write_binary, write_cstring, write_i32, write_i64, write_string, Error}; +use crate::spec::{BinarySubtype, ElementType}; + +use super::document_serializer::DocumentSerializer; +use super::Serializer; + +/// A serializer used specifically for serializing the serde-data-model form of a BSON type (e.g. `Binary`) to +/// raw bytes. +pub(crate) struct ValueSerializer<'a> { + root_serializer: &'a mut Serializer, + state: SerializationStep, +} + +/// State machine used to track which step in the serialization of a given type the serializer is currently on. +#[derive(Debug)] +enum SerializationStep { + Oid, + + DateTime, + DateTimeNumberLong, + + Binary, + BinaryBase64, + BinarySubType { base64: String }, + + Symbol, + + RegEx, + RegExPattern, + RegExOptions, + + Timestamp, + TimestampTime, + TimestampIncrement { time: i64 }, + + DbPointer, + DbPointerRef, + DbPointerId, + + Code, + + CodeWithScopeCode, + CodeWithScopeScope { code: String }, + + MinKey, + + MaxKey, + + Undefined, + + Decimal128, + Decimal128Value, + + Done, +} + +impl<'a> ValueSerializer<'a> { + pub(super) fn new(rs: &'a mut Serializer, element_type: ElementType) -> Self { + let state = match element_type { + ElementType::DateTime => SerializationStep::DateTime, + ElementType::Binary => SerializationStep::Binary, + ElementType::ObjectId => SerializationStep::Oid, + ElementType::Symbol => SerializationStep::Symbol, + ElementType::RegularExpression => SerializationStep::RegEx, + ElementType::Timestamp => SerializationStep::Timestamp, + ElementType::DbPointer => SerializationStep::DbPointer, + ElementType::JavaScriptCode => SerializationStep::Code, + ElementType::JavaScriptCodeWithScope => SerializationStep::CodeWithScopeCode, + ElementType::MinKey => SerializationStep::MinKey, + ElementType::MaxKey => SerializationStep::MaxKey, + ElementType::Decimal128 => SerializationStep::Decimal128, + ElementType::Undefined => SerializationStep::Undefined, + + _ => todo!(), + }; + Self { + root_serializer: rs, + state, + } + } + + fn invalid_step(&self, primitive_type: &'static str) -> Error { + Error::custom(format!("cannot serialize {} at step {:?}", primitive_type, self.state)) + } +} + +impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> { + type Ok = (); + type Error = Error; + + type SerializeSeq = Impossible<(), Error>; + type SerializeTuple = Impossible<(), Error>; + type SerializeTupleStruct = Impossible<(), Error>; + type SerializeTupleVariant = Impossible<(), Error>; + type SerializeMap = CodeWithScopeSerializer<'b>; + type SerializeStruct = Self; + type SerializeStructVariant = Impossible<(), Error>; + + fn serialize_bool(self, _v: bool) -> Result { + Err(self.invalid_step("bool")) + } + + fn serialize_i8(self, _v: i8) -> Result { + Err(self.invalid_step("i8")) + } + + fn serialize_i16(self, _v: i16) -> Result { + Err(self.invalid_step("i16")) + } + + fn serialize_i32(self, _v: i32) -> Result { + Err(self.invalid_step("i32")) + } + + fn serialize_i64(self, v: i64) -> Result { + match self.state { + SerializationStep::TimestampTime => { + self.state = SerializationStep::TimestampIncrement { time: v }; + Ok(()) + } + SerializationStep::TimestampIncrement { time } => { + let t = u32::try_from(time).map_err(Error::custom)?; + let i = u32::try_from(v).map_err(Error::custom)?; + + write_i32(&mut self.root_serializer.bytes, i as i32)?; + write_i32(&mut self.root_serializer.bytes, t as i32)?; + Ok(()) + } + _ => Err(self.invalid_step("i64")), + } + } + + fn serialize_u8(self, v: u8) -> Result { + todo!() + } + + fn serialize_u16(self, v: u16) -> Result { + todo!() + } + + fn serialize_u32(self, v: u32) -> Result { + todo!() + } + + fn serialize_u64(self, v: u64) -> Result { + todo!() + } + + fn serialize_f32(self, v: f32) -> Result { + todo!() + } + + fn serialize_f64(self, v: f64) -> Result { + todo!() + } + + fn serialize_char(self, v: char) -> Result { + todo!() + } + + fn serialize_str(self, v: &str) -> Result { + match &self.state { + SerializationStep::DateTimeNumberLong => { + let millis: i64 = v.parse().map_err(Error::custom)?; + write_i64(&mut self.root_serializer.bytes, millis)?; + } + SerializationStep::Oid => { + let oid = ObjectId::parse_str(v).map_err(Error::custom)?; + self.root_serializer.bytes.write_all(&oid.bytes())?; + } + SerializationStep::BinaryBase64 => { + self.state = SerializationStep::BinarySubType { + base64: v.to_string(), + }; + } + SerializationStep::BinarySubType { base64 } => { + let subtype_byte = hex::decode(v).map_err(Error::custom)?; + let subtype: BinarySubtype = subtype_byte[0].into(); + + let bytes = base64::decode(base64.as_str()).map_err(Error::custom)?; + + write_binary(&mut self.root_serializer.bytes, bytes.as_slice(), subtype)?; + } + SerializationStep::Symbol | SerializationStep::DbPointerRef => { + write_string(&mut self.root_serializer.bytes, v)?; + } + SerializationStep::RegExPattern | SerializationStep::RegExOptions => { + write_cstring(&mut self.root_serializer.bytes, v)?; + } + SerializationStep::Code => { + write_string(&mut self.root_serializer.bytes, v)?; + } + SerializationStep::CodeWithScopeCode => { + self.state = SerializationStep::CodeWithScopeScope { + code: v.to_string(), + }; + } + #[cfg(feature = "decimal128")] + SerializationStep::Decimal128Value => { + let d = Decimal128::from_str(v); + self.root_serializer.write_all(d.to_raw_bytes_le())?; + } + s => return Err(Error::custom(format!("can't serialize string for step {:?}", s))), + } + Ok(()) + } + + fn serialize_bytes(self, v: &[u8]) -> Result { + match self.state { + SerializationStep::Decimal128Value => { + self.root_serializer.bytes.write_all(v)?; + Ok(()) + } + _ => todo!(), + } + } + + fn serialize_none(self) -> Result { + todo!() + } + + fn serialize_some(self, value: &T) -> Result + where + T: Serialize, + { + todo!() + } + + fn serialize_unit(self) -> Result { + todo!() + } + + fn serialize_unit_struct(self, name: &'static str) -> Result { + todo!() + } + + fn serialize_unit_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + ) -> Result { + todo!() + } + + fn serialize_newtype_struct(self, name: &'static str, value: &T) -> Result + where + T: Serialize, + { + todo!() + } + + fn serialize_newtype_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + value: &T, + ) -> Result + where + T: Serialize, + { + todo!() + } + + fn serialize_seq(self, len: Option) -> Result { + todo!() + } + + fn serialize_tuple(self, len: usize) -> Result { + todo!() + } + + fn serialize_tuple_struct( + self, + name: &'static str, + len: usize, + ) -> Result { + todo!() + } + + fn serialize_tuple_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result { + todo!() + } + + fn serialize_map(self, len: Option) -> Result { + match self.state { + SerializationStep::CodeWithScopeScope { ref code } => { + CodeWithScopeSerializer::start(code.as_str(), self.root_serializer) + } + _ => todo!(), + } + } + + fn serialize_struct(self, name: &'static str, len: usize) -> Result { + Ok(self) + } + + fn serialize_struct_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result { + todo!() + } +} + +impl<'a, 'b> SerializeStruct for &'b mut ValueSerializer<'a> { + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: Serialize, + { + match (&self.state, key) { + (SerializationStep::DateTime, "$date") => { + self.state = SerializationStep::DateTimeNumberLong; + value.serialize(&mut **self)?; + } + (SerializationStep::DateTimeNumberLong, "$numberLong") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::Done; + } + (SerializationStep::Oid, "$oid") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::Done; + } + (SerializationStep::Binary, "$binary") => { + self.state = SerializationStep::BinaryBase64; + value.serialize(&mut **self)?; + } + (SerializationStep::BinaryBase64, "base64") => { + // state is updated in serialize + value.serialize(&mut **self)?; + } + (SerializationStep::BinarySubType { .. }, "subType") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::Done; + } + (SerializationStep::Symbol, "$symbol") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::Done; + } + (SerializationStep::RegEx, "$regularExpression") => { + self.state = SerializationStep::RegExPattern; + value.serialize(&mut **self)?; + } + (SerializationStep::RegExPattern, "pattern") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::RegExOptions; + } + (SerializationStep::RegExOptions, "options") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::Done; + } + (SerializationStep::Timestamp, "$timestamp") => { + self.state = SerializationStep::TimestampTime; + value.serialize(&mut **self)?; + } + (SerializationStep::TimestampTime, "t") => { + // state is updated in serialize + value.serialize(&mut **self)?; + } + (SerializationStep::TimestampIncrement { .. }, "i") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::Done; + } + (SerializationStep::DbPointer, "$dbPointer") => { + self.state = SerializationStep::DbPointerRef; + value.serialize(&mut **self)?; + } + (SerializationStep::DbPointerRef, "$ref") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::DbPointerId; + } + (SerializationStep::DbPointerId, "$id") => { + self.state = SerializationStep::Oid; + value.serialize(&mut **self)?; + } + (SerializationStep::Code, "$code") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::Done; + } + (SerializationStep::CodeWithScopeCode, "$code") => { + // state is updated in serialize + value.serialize(&mut **self)?; + } + (SerializationStep::CodeWithScopeScope { .. }, "$scope") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::Done; + } + (SerializationStep::MinKey, "$minKey") => { + self.state = SerializationStep::Done; + } + (SerializationStep::MaxKey, "$maxKey") => { + self.state = SerializationStep::Done; + } + (SerializationStep::Undefined, "$undefined") => { + self.state = SerializationStep::Done; + } + (SerializationStep::Decimal128, "$numberDecimal") + | (SerializationStep::Decimal128, "$numberDecimalBytes") => { + self.state = SerializationStep::Decimal128Value; + value.serialize(&mut **self)?; + } + (SerializationStep::Decimal128Value, "$numberDecimal") => { + value.serialize(&mut **self)?; + self.state = SerializationStep::Done; + } + (SerializationStep::Done, k) => { + return Err(Error::custom(format!( + "expected to end serialization of type, got extra key \"{}\"", + k + ))); + } + (state, k) => { + return Err(Error::custom(format!( + "mismatched serialization step and next key: {:?} + \"{}\"", + state, + k + ))); + }, + } + + Ok(()) + } + + fn end(self) -> Result { + Ok(()) + } +} + +pub(crate) struct CodeWithScopeSerializer<'a> { + start: usize, + doc: DocumentSerializer<'a>, +} + +impl<'a> CodeWithScopeSerializer<'a> { + fn start(code: &str, rs: &'a mut Serializer) -> Result { + let start = rs.bytes.len(); + write_i32(&mut rs.bytes, 0)?; // placeholder length + write_string(&mut rs.bytes, code)?; + + let doc = DocumentSerializer::start(rs)?; + Ok(Self { start, doc }) + } +} + +impl<'a> SerializeMap for CodeWithScopeSerializer<'a> { + type Ok = (); + type Error = Error; + + fn serialize_key(&mut self, key: &T) -> Result<()> + where + T: Serialize, + { + self.doc.serialize_key(key) + } + + fn serialize_value(&mut self, value: &T) -> Result<()> + where + T: Serialize, + { + self.doc.serialize_value(value) + } + + fn end(self) -> Result { + let result = self.doc.end_doc()?; + + let total_len = (result.root_serializer.bytes.len() - self.start) as i32; + result.root_serializer.replace_i32(self.start, total_len); + Ok(()) + } +} From a8595b14079d4b1bc44a2e83ae5c48df5081bfd6 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Tue, 20 Jul 2021 13:12:20 -0400 Subject: [PATCH 10/21] various cleanup --- src/extjson/models.rs | 1 - src/ser/mod.rs | 2 +- src/ser/raw/document_serializer.rs | 118 ++++----- src/ser/raw/mod.rs | 387 ++++++++++++++++------------- src/ser/raw/value_serializer.rs | 227 ++++++++++------- src/ser/serde.rs | 10 +- 6 files changed, 421 insertions(+), 324 deletions(-) diff --git a/src/extjson/models.rs b/src/extjson/models.rs index 1d179b6c..75fd71f2 100644 --- a/src/extjson/models.rs +++ b/src/extjson/models.rs @@ -3,7 +3,6 @@ use chrono::Utc; use serde::{ de::{Error, Unexpected}, - ser::SerializeStruct, Deserialize, Serialize, }; diff --git a/src/ser/mod.rs b/src/ser/mod.rs index 645b2a70..60dead67 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -221,7 +221,7 @@ where { let mut serializer = raw::Serializer::new(); value.serialize(&mut serializer)?; - writer.write_all(&mut serializer.into_vec())?; + writer.write_all(serializer.into_vec().as_slice())?; Ok(()) } diff --git a/src/ser/raw/document_serializer.rs b/src/ser/raw/document_serializer.rs index 6f297ff6..31bf8890 100644 --- a/src/ser/raw/document_serializer.rs +++ b/src/ser/raw/document_serializer.rs @@ -1,18 +1,18 @@ -use serde::Serialize; -use serde::ser::Impossible; +use serde::{ser::Impossible, Serialize}; -use crate::ser::write_cstring; -use crate::ser::write_i32; -use crate::ser::Result; -use crate::ser::Error; +use crate::{ + ser::{write_cstring, write_i32, Error, Result}, + to_bson, + Bson, +}; use super::Serializer; pub(crate) struct DocumentSerializationResult<'a> { - pub(crate) length: i32, pub(crate) root_serializer: &'a mut Serializer, } +/// Serializer used to serialize document or array bodies. pub(crate) struct DocumentSerializer<'a> { root_serializer: &'a mut Serializer, num_keys_serialized: usize, @@ -50,7 +50,6 @@ impl<'a> DocumentSerializer<'a> { let length = (self.root_serializer.bytes.len() - self.start) as i32; self.root_serializer.replace_i32(self.start, length); Ok(DocumentSerializationResult { - length, root_serializer: self.root_serializer, }) } @@ -151,10 +150,17 @@ impl<'a> serde::ser::SerializeTupleStruct for DocumentSerializer<'a> { } } +/// Serializer used specifically for serializing document keys. struct KeySerializer<'a> { root_serializer: &'a mut Serializer, } +impl<'a> KeySerializer<'a> { + fn invalid_key(v: T) -> Error { + Error::InvalidDocumentKey(to_bson(&v).unwrap_or(Bson::Null)) + } +} + impl<'a> serde::Serializer for KeySerializer<'a> { type Ok = (); @@ -169,51 +175,51 @@ impl<'a> serde::Serializer for KeySerializer<'a> { type SerializeStructVariant = Impossible<(), Error>; fn serialize_bool(self, v: bool) -> Result { - todo!() + Err(Self::invalid_key(v)) } fn serialize_i8(self, v: i8) -> Result { - todo!() + Err(Self::invalid_key(v)) } fn serialize_i16(self, v: i16) -> Result { - todo!() + Err(Self::invalid_key(v)) } fn serialize_i32(self, v: i32) -> Result { - todo!() + Err(Self::invalid_key(v)) } fn serialize_i64(self, v: i64) -> Result { - todo!() + Err(Self::invalid_key(v)) } fn serialize_u8(self, v: u8) -> Result { - todo!() + Err(Self::invalid_key(v)) } fn serialize_u16(self, v: u16) -> Result { - todo!() + Err(Self::invalid_key(v)) } fn serialize_u32(self, v: u32) -> Result { - todo!() + Err(Self::invalid_key(v)) } fn serialize_u64(self, v: u64) -> Result { - todo!() + Err(Self::invalid_key(v)) } fn serialize_f32(self, v: f32) -> Result { - todo!() + Err(Self::invalid_key(v)) } fn serialize_f64(self, v: f64) -> Result { - todo!() + Err(Self::invalid_key(v)) } fn serialize_char(self, v: char) -> Result { - todo!() + Err(Self::invalid_key(v)) } fn serialize_str(self, v: &str) -> Result { @@ -221,98 +227,98 @@ impl<'a> serde::Serializer for KeySerializer<'a> { } fn serialize_bytes(self, v: &[u8]) -> Result { - todo!() + Err(Self::invalid_key(v)) } fn serialize_none(self) -> Result { - todo!() + Err(Self::invalid_key(Bson::Null)) } fn serialize_some(self, value: &T) -> Result where T: Serialize, { - todo!() + value.serialize(self) } fn serialize_unit(self) -> Result { - todo!() + Err(Self::invalid_key(Bson::Null)) } - fn serialize_unit_struct(self, name: &'static str) -> Result { - todo!() + fn serialize_unit_struct(self, _name: &'static str) -> Result { + Err(Self::invalid_key(Bson::Null)) } fn serialize_unit_variant( self, - name: &'static str, - variant_index: u32, + _name: &'static str, + _variant_index: u32, variant: &'static str, ) -> Result { - todo!() + self.serialize_str(variant) } - fn serialize_newtype_struct(self, name: &'static str, value: &T) -> Result + fn serialize_newtype_struct(self, _name: &'static str, value: &T) -> Result where T: Serialize, { - todo!() + value.serialize(self) } fn serialize_newtype_variant( self, - name: &'static str, - variant_index: u32, - variant: &'static str, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, value: &T, ) -> Result where T: Serialize, { - todo!() + Err(Self::invalid_key(value)) } - fn serialize_seq(self, len: Option) -> Result { - todo!() + fn serialize_seq(self, _len: Option) -> Result { + Err(Self::invalid_key(Bson::Array(vec![]))) } - fn serialize_tuple(self, len: usize) -> Result { - todo!() + fn serialize_tuple(self, _len: usize) -> Result { + Err(Self::invalid_key(Bson::Array(vec![]))) } fn serialize_tuple_struct( self, - name: &'static str, - len: usize, + _name: &'static str, + _len: usize, ) -> Result { - todo!() + Err(Self::invalid_key(Bson::Document(doc! {}))) } fn serialize_tuple_variant( self, - name: &'static str, - variant_index: u32, - variant: &'static str, - len: usize, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, ) -> Result { - todo!() + Err(Self::invalid_key(Bson::Array(vec![]))) } - fn serialize_map(self, len: Option) -> Result { - todo!() + fn serialize_map(self, _len: Option) -> Result { + Err(Self::invalid_key(Bson::Document(doc! {}))) } - fn serialize_struct(self, name: &'static str, len: usize) -> Result { - todo!() + fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { + Err(Self::invalid_key(Bson::Document(doc! {}))) } fn serialize_struct_variant( self, - name: &'static str, - variant_index: u32, - variant: &'static str, - len: usize, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, ) -> Result { - todo!() + Err(Self::invalid_key(Bson::Document(doc! {}))) } } diff --git a/src/ser/raw/mod.rs b/src/ser/raw/mod.rs index 51878343..9ddab463 100644 --- a/src/ser/raw/mod.rs +++ b/src/ser/raw/mod.rs @@ -1,28 +1,29 @@ mod document_serializer; mod value_serializer; -use std::{borrow::Borrow, convert::TryFrom, io::Write}; +use std::io::Write; use serde::{ - ser::{Error as SerdeError, Impossible, SerializeMap, SerializeStruct}, + ser::{Error as SerdeError, SerializeMap, SerializeStruct}, Serialize, - Serializer as SerdeSerializer, }; -use self::value_serializer::ValueSerializer; +use self::value_serializer::{ValueSerializer, ValueType}; use super::{write_cstring, write_f64, write_i32, write_i64, write_string, write_u8}; use crate::{ - oid::ObjectId, - ser::{write_binary, Error, Result}, + ser::{Error, Result}, spec::{BinarySubtype, ElementType}, - Decimal128, - Document, }; use document_serializer::DocumentSerializer; +/// Serializer used to convert a type `T` into raw BSON bytes. pub(crate) struct Serializer { bytes: Vec, + + /// The index into `bytes` where the current element type will need to be stored. + /// This needs to be set retroactively because in BSON, the element type comes before the key, + /// but in serde, the serializer learns of the type after serializing the key. type_index: usize, } @@ -34,10 +35,12 @@ impl Serializer { } } + /// Convert this serializer into the vec of the serialized bytes. pub(crate) fn into_vec(self) -> Vec { self.bytes } + /// Retroactively set the element type of the most recently serialized element. fn update_element_type(&mut self, t: ElementType) -> Result<()> { if self.type_index == 0 { if matches!(t, ElementType::EmbeddedDocument) { @@ -55,6 +58,7 @@ impl Serializer { Ok(()) } + /// Replace an i32 value at the given index with the given value. fn replace_i32(&mut self, at: usize, with: i32) { self.bytes .splice(at..at + 4, with.to_le_bytes().iter().cloned()); @@ -104,19 +108,48 @@ impl<'a> serde::Serializer for &'a mut Serializer { } fn serialize_u8(self, v: u8) -> Result { - todo!() + #[cfg(feature = "u2i")] + { + self.serialize_i32(v.into()) + } + + #[cfg(not(feature = "u2i"))] + Err(Error::UnsupportedUnsignedInteger(v.into())) } fn serialize_u16(self, v: u16) -> Result { - todo!() + #[cfg(feature = "u2i")] + { + self.serialize_i32(v.into()) + } + + #[cfg(not(feature = "u2i"))] + Err(Error::UnsupportedUnsignedInteger(v.into())) } fn serialize_u32(self, v: u32) -> Result { - todo!() + #[cfg(feature = "u2i")] + { + self.serialize_i64(v.into()) + } + + #[cfg(not(feature = "u2i"))] + Err(Error::UnsupportedUnsignedInteger(v.into())) } fn serialize_u64(self, v: u64) -> Result { - todo!() + #[cfg(feature = "u2i")] + { + use std::convert::TryFrom; + + match i64::try_from(v) { + Ok(ivalue) => self.serialize_i64(ivalue), + Err(_) => Err(Error::UnsignedIntegerExceededRange(v)), + } + } + + #[cfg(not(feature = "u2i"))] + Err(Error::UnsupportedUnsignedInteger(v)) } fn serialize_f32(self, v: f32) -> Result { @@ -126,11 +159,13 @@ impl<'a> serde::Serializer for &'a mut Serializer { fn serialize_f64(self, v: f64) -> Result { self.update_element_type(ElementType::Double)?; - write_f64(&mut self.bytes, v.into()) + write_f64(&mut self.bytes, v) } fn serialize_char(self, v: char) -> Result { - todo!() + let mut s = String::new(); + s.push(v); + self.serialize_str(&s) } fn serialize_str(self, v: &str) -> Result { @@ -165,7 +200,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { } #[inline] - fn serialize_unit_struct(self, name: &'static str) -> Result { + fn serialize_unit_struct(self, _name: &'static str) -> Result { self.serialize_unit() } @@ -226,40 +261,47 @@ impl<'a> serde::Serializer for &'a mut Serializer { #[inline] fn serialize_tuple_variant( self, - name: &'static str, - variant_index: u32, + _name: &'static str, + _variant_index: u32, variant: &'static str, - len: usize, + _len: usize, ) -> Result { self.update_element_type(ElementType::EmbeddedDocument)?; VariantSerializer::start(&mut *self, variant, VariantInnerType::Tuple) } - fn serialize_map(self, len: Option) -> Result { + fn serialize_map(self, _len: Option) -> Result { self.update_element_type(ElementType::EmbeddedDocument)?; DocumentSerializer::start(&mut *self) } fn serialize_struct(self, name: &'static str, _len: usize) -> Result { - let element_type = match name { - "$oid" => ElementType::ObjectId, - "$date" => ElementType::DateTime, - "$binary" => ElementType::Binary, - "$timestamp" => ElementType::Timestamp, - "$minKey" => ElementType::MinKey, - "$maxKey" => ElementType::MaxKey, - "$code" => ElementType::JavaScriptCode, - "$codeWithScope" => ElementType::JavaScriptCodeWithScope, - "$symbol" => ElementType::Symbol, - "$undefined" => ElementType::Undefined, - "$regularExpression" => ElementType::RegularExpression, - "$dbPointer" => ElementType::DbPointer, - "$numberDecimal" => ElementType::Decimal128, - _ => ElementType::EmbeddedDocument, + let value_type = match name { + "$oid" => Some(ValueType::ObjectId), + "$date" => Some(ValueType::DateTime), + "$binary" => Some(ValueType::Binary), + "$timestamp" => Some(ValueType::Timestamp), + "$minKey" => Some(ValueType::MinKey), + "$maxKey" => Some(ValueType::MaxKey), + "$code" => Some(ValueType::JavaScriptCode), + "$codeWithScope" => Some(ValueType::JavaScriptCodeWithScope), + "$symbol" => Some(ValueType::Symbol), + "$undefined" => Some(ValueType::Undefined), + "$regularExpression" => Some(ValueType::RegularExpression), + "$dbPointer" => Some(ValueType::DbPointer), + "$numberDecimal" => Some(ValueType::Decimal128), + _ => None, }; - self.update_element_type(element_type)?; - StructSerializer::new(&mut *self, element_type) + self.update_element_type( + value_type + .map(Into::into) + .unwrap_or(ElementType::EmbeddedDocument), + )?; + match value_type { + Some(vt) => Ok(StructSerializer::Value(ValueSerializer::new(self, vt))), + None => Ok(StructSerializer::Document(DocumentSerializer::start(self)?)), + } } fn serialize_struct_variant( @@ -274,18 +316,11 @@ impl<'a> serde::Serializer for &'a mut Serializer { } pub(crate) enum StructSerializer<'a> { + /// Serialize a BSON value currently represented in serde as a struct (e.g. ObjectId) Value(ValueSerializer<'a>), - Document(DocumentSerializer<'a>), -} -impl<'a> StructSerializer<'a> { - fn new(rs: &'a mut Serializer, element_type: ElementType) -> Result { - if let ElementType::EmbeddedDocument = element_type { - Ok(Self::Document(DocumentSerializer::start(rs)?)) - } else { - Ok(Self::Value(ValueSerializer::new(rs, element_type))) - } - } + /// Serialize the struct as a document. + Document(DocumentSerializer<'a>), } impl<'a> SerializeStruct for StructSerializer<'a> { @@ -315,10 +350,19 @@ enum VariantInnerType { Struct, } +/// Serializer used for enum variants, including both tuple (e.g. Foo::Bar(1, 2, 3)) and +/// struct (e.g. Foo::Bar { a: 1 }). pub(crate) struct VariantSerializer<'a> { root_serializer: &'a mut Serializer, + + /// Variants are serialized as documents of the form `{ : }`, + /// and `doc_start` indicates the index at which the outer document begins. doc_start: usize, + + /// `inner_start` indicates the index at which the inner document or array begins. inner_start: usize, + + /// How many elements have been serialized in the inner document / array so far. num_elements_serialized: usize, } @@ -409,129 +453,128 @@ impl<'a> serde::ser::SerializeStructVariant for VariantSerializer<'a> { } } - -#[cfg(test)] -mod test { - use crate::{doc, Binary, DateTime, JavaScriptCodeWithScope}; - use serde::Serialize; - - #[test] - fn raw_serialize() { - let binary = Binary { - subtype: crate::spec::BinarySubtype::BinaryOld, - bytes: Vec::new(), - }; - let doc = doc! { - // "a": JavaScriptCodeWithScope { - // code: "".to_string(), - // scope: doc! {} - // } - "o": ObjectId::new(), - "d": DateTime::now(), - "b": binary, - // "x": { "y": "ok" }, - // "a": true, - // "b": 1i32, - // "c": 2i64, - // "d": 5.5, - // "e": [ true, "aaa", { "ok": 1.0 } ] - }; - println!("{}", doc); - // let mut v = Vec::new(); - // doc.to_writer(&mut v).unwrap(); - - let raw_v = crate::ser::to_vec(&doc).unwrap(); - // assert_eq!(raw_v, v); - let d = Document::from_reader(raw_v.as_slice()).unwrap(); - println!("{:#?}", d); - } - use std::time::Instant; - - use serde::Deserialize; - - use crate::{oid::ObjectId, Document}; - - #[derive(Debug, Deserialize)] - struct D { - x: i32, - y: i32, - i: I, - // oid: ObjectId, - null: Option, - b: bool, - d: f32, - } - - #[derive(Debug, Deserialize)] - struct I { - a: i32, - b: i32, - } - - #[derive(Debug, Serialize)] - struct Code { - c: JavaScriptCodeWithScope, - } - - // #[test] - // fn raw_serialize() { - // let c = Code { - // c: JavaScriptCodeWithScope { - // code: "".to_string(), - // scope: doc! {}, - // } - // }; - - // let v = crate::ser::to_vec(&c).unwrap(); - - // let doc = crate::to_document(&c).unwrap(); - // let mut v2 = Vec::new(); - // doc.to_writer(&mut v2).unwrap(); - - // assert_eq!(v, v2); - // } - - #[test] - fn raw_bench() { - let binary = Binary { - subtype: crate::spec::BinarySubtype::Generic, - bytes: vec![1, 2, 3, 4, 5], - }; - let doc = doc! { - "ok": 1, - "x": 1, - "y": 2, - "i": { "a": 300, "b": 12345 }, - // "oid": ObjectId::new(), - "null": crate::Bson::Null, - "b": true, - "dt": DateTime::now(), - "d": 12.5, - "b": binary, - }; - - let raw_start = Instant::now(); - for _ in 0..10_000 { - let _b = crate::ser::to_vec(&doc).unwrap(); - } - let raw_time = raw_start.elapsed(); - println!("raw time: {}", raw_time.as_secs_f32()); - - let normal_start = Instant::now(); - for _ in 0..10_000 { - let d: Document = crate::to_document(&doc).unwrap(); - let mut v = Vec::new(); - d.to_writer(&mut v).unwrap(); - } - let normal_time = normal_start.elapsed(); - println!("normal time: {}", normal_time.as_secs_f32()); - - let normal_start = Instant::now(); - for _ in 0..10_000 { - let mut v = Vec::new(); - doc.to_writer(&mut v).unwrap(); - } - let normal_time = normal_start.elapsed(); - println!("decode time: {}", normal_time.as_secs_f32()); - } -} +// #[cfg(test)] +// mod test { +// use crate::{doc, Binary, DateTime, JavaScriptCodeWithScope}; +// use serde::Serialize; + +// #[test] +// fn raw_serialize() { +// let binary = Binary { +// subtype: crate::spec::BinarySubtype::BinaryOld, +// bytes: Vec::new(), +// }; +// let doc = doc! { +// // "a": JavaScriptCodeWithScope { +// // code: "".to_string(), +// // scope: doc! {} +// // } +// "o": ObjectId::new(), +// "d": DateTime::now(), +// "b": binary, +// // "x": { "y": "ok" }, +// // "a": true, +// // "b": 1i32, +// // "c": 2i64, +// // "d": 5.5, +// // "e": [ true, "aaa", { "ok": 1.0 } ] +// }; +// println!("{}", doc); +// // let mut v = Vec::new(); +// // doc.to_writer(&mut v).unwrap(); + +// let raw_v = crate::ser::to_vec(&doc).unwrap(); +// // assert_eq!(raw_v, v); +// let d = Document::from_reader(raw_v.as_slice()).unwrap(); +// println!("{:#?}", d); +// } +// use std::time::Instant; + +// use serde::Deserialize; + +// use crate::{oid::ObjectId, Document}; + +// #[derive(Debug, Deserialize)] +// struct D { +// x: i32, +// y: i32, +// i: I, +// // oid: ObjectId, +// null: Option, +// b: bool, +// d: f32, +// } + +// #[derive(Debug, Deserialize)] +// struct I { +// a: i32, +// b: i32, +// } + +// #[derive(Debug, Serialize)] +// struct Code { +// c: JavaScriptCodeWithScope, +// } + +// // #[test] +// // fn raw_serialize() { +// // let c = Code { +// // c: JavaScriptCodeWithScope { +// // code: "".to_string(), +// // scope: doc! {}, +// // } +// // }; + +// // let v = crate::ser::to_vec(&c).unwrap(); + +// // let doc = crate::to_document(&c).unwrap(); +// // let mut v2 = Vec::new(); +// // doc.to_writer(&mut v2).unwrap(); + +// // assert_eq!(v, v2); +// // } + +// #[test] +// fn raw_bench() { +// let binary = Binary { +// subtype: crate::spec::BinarySubtype::Generic, +// bytes: vec![1, 2, 3, 4, 5], +// }; +// let doc = doc! { +// "ok": 1, +// "x": 1, +// "y": 2, +// "i": { "a": 300, "b": 12345 }, +// // "oid": ObjectId::new(), +// "null": crate::Bson::Null, +// "b": true, +// "dt": DateTime::now(), +// "d": 12.5, +// "b": binary, +// }; + +// let raw_start = Instant::now(); +// for _ in 0..10_000 { +// let _b = crate::ser::to_vec(&doc).unwrap(); +// } +// let raw_time = raw_start.elapsed(); +// println!("raw time: {}", raw_time.as_secs_f32()); + +// let normal_start = Instant::now(); +// for _ in 0..10_000 { +// let d: Document = crate::to_document(&doc).unwrap(); +// let mut v = Vec::new(); +// d.to_writer(&mut v).unwrap(); +// } +// let normal_time = normal_start.elapsed(); +// println!("normal time: {}", normal_time.as_secs_f32()); + +// let normal_start = Instant::now(); +// for _ in 0..10_000 { +// let mut v = Vec::new(); +// doc.to_writer(&mut v).unwrap(); +// } +// let normal_time = normal_start.elapsed(); +// println!("decode time: {}", normal_time.as_secs_f32()); +// } +// } diff --git a/src/ser/raw/value_serializer.rs b/src/ser/raw/value_serializer.rs index f0285b74..d410bb44 100644 --- a/src/ser/raw/value_serializer.rs +++ b/src/ser/raw/value_serializer.rs @@ -1,25 +1,29 @@ -use std::convert::TryFrom; -use std::io::Write; +use std::{convert::TryFrom, io::Write}; -use serde::ser::{Error as SerdeError, Impossible, SerializeMap, SerializeStruct}; -use serde::Serialize; +use serde::{ + ser::{Error as SerdeError, Impossible, SerializeMap, SerializeStruct}, + Serialize, +}; -use crate::oid::ObjectId; -use crate::ser::Result; -use crate::ser::{write_binary, write_cstring, write_i32, write_i64, write_string, Error}; -use crate::spec::{BinarySubtype, ElementType}; +#[cfg(feature = "decimal128")] +use crate::Decimal128; +use crate::{ + oid::ObjectId, + ser::{write_binary, write_cstring, write_i32, write_i64, write_string, Error, Result}, + spec::{BinarySubtype, ElementType}, +}; -use super::document_serializer::DocumentSerializer; -use super::Serializer; +use super::{document_serializer::DocumentSerializer, Serializer}; -/// A serializer used specifically for serializing the serde-data-model form of a BSON type (e.g. `Binary`) to -/// raw bytes. +/// A serializer used specifically for serializing the serde-data-model form of a BSON type (e.g. +/// `Binary`) to raw bytes. pub(crate) struct ValueSerializer<'a> { root_serializer: &'a mut Serializer, state: SerializationStep, } -/// State machine used to track which step in the serialization of a given type the serializer is currently on. +/// State machine used to track which step in the serialization of a given type the serializer is +/// currently on. #[derive(Debug)] enum SerializationStep { Oid, @@ -62,24 +66,60 @@ enum SerializationStep { Done, } +/// Enum of BSON "value" types that this serializer can serialize. +#[derive(Debug, Clone, Copy)] +pub(super) enum ValueType { + DateTime, + Binary, + ObjectId, + Symbol, + RegularExpression, + Timestamp, + DbPointer, + JavaScriptCode, + JavaScriptCodeWithScope, + MinKey, + MaxKey, + Decimal128, + Undefined, +} + +impl From for ElementType { + fn from(vt: ValueType) -> Self { + match vt { + ValueType::Binary => ElementType::Binary, + ValueType::DateTime => ElementType::DateTime, + ValueType::DbPointer => ElementType::DbPointer, + ValueType::Decimal128 => ElementType::Decimal128, + ValueType::Symbol => ElementType::Symbol, + ValueType::RegularExpression => ElementType::RegularExpression, + ValueType::Timestamp => ElementType::Timestamp, + ValueType::JavaScriptCode => ElementType::JavaScriptCode, + ValueType::JavaScriptCodeWithScope => ElementType::JavaScriptCodeWithScope, + ValueType::MaxKey => ElementType::MaxKey, + ValueType::MinKey => ElementType::MinKey, + ValueType::Undefined => ElementType::Undefined, + ValueType::ObjectId => ElementType::ObjectId, + } + } +} + impl<'a> ValueSerializer<'a> { - pub(super) fn new(rs: &'a mut Serializer, element_type: ElementType) -> Self { - let state = match element_type { - ElementType::DateTime => SerializationStep::DateTime, - ElementType::Binary => SerializationStep::Binary, - ElementType::ObjectId => SerializationStep::Oid, - ElementType::Symbol => SerializationStep::Symbol, - ElementType::RegularExpression => SerializationStep::RegEx, - ElementType::Timestamp => SerializationStep::Timestamp, - ElementType::DbPointer => SerializationStep::DbPointer, - ElementType::JavaScriptCode => SerializationStep::Code, - ElementType::JavaScriptCodeWithScope => SerializationStep::CodeWithScopeCode, - ElementType::MinKey => SerializationStep::MinKey, - ElementType::MaxKey => SerializationStep::MaxKey, - ElementType::Decimal128 => SerializationStep::Decimal128, - ElementType::Undefined => SerializationStep::Undefined, - - _ => todo!(), + pub(super) fn new(rs: &'a mut Serializer, value_type: ValueType) -> Self { + let state = match value_type { + ValueType::DateTime => SerializationStep::DateTime, + ValueType::Binary => SerializationStep::Binary, + ValueType::ObjectId => SerializationStep::Oid, + ValueType::Symbol => SerializationStep::Symbol, + ValueType::RegularExpression => SerializationStep::RegEx, + ValueType::Timestamp => SerializationStep::Timestamp, + ValueType::DbPointer => SerializationStep::DbPointer, + ValueType::JavaScriptCode => SerializationStep::Code, + ValueType::JavaScriptCodeWithScope => SerializationStep::CodeWithScopeCode, + ValueType::MinKey => SerializationStep::MinKey, + ValueType::MaxKey => SerializationStep::MaxKey, + ValueType::Decimal128 => SerializationStep::Decimal128, + ValueType::Undefined => SerializationStep::Undefined, }; Self { root_serializer: rs, @@ -88,7 +128,10 @@ impl<'a> ValueSerializer<'a> { } fn invalid_step(&self, primitive_type: &'static str) -> Error { - Error::custom(format!("cannot serialize {} at step {:?}", primitive_type, self.state)) + Error::custom(format!( + "cannot serialize {} at step {:?}", + primitive_type, self.state + )) } } @@ -138,32 +181,32 @@ impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> { } } - fn serialize_u8(self, v: u8) -> Result { - todo!() + fn serialize_u8(self, _v: u8) -> Result { + Err(self.invalid_step("u8")) } - fn serialize_u16(self, v: u16) -> Result { - todo!() + fn serialize_u16(self, _v: u16) -> Result { + Err(self.invalid_step("u16")) } - fn serialize_u32(self, v: u32) -> Result { - todo!() + fn serialize_u32(self, _v: u32) -> Result { + Err(self.invalid_step("u32")) } - fn serialize_u64(self, v: u64) -> Result { - todo!() + fn serialize_u64(self, _v: u64) -> Result { + Err(self.invalid_step("u64")) } - fn serialize_f32(self, v: f32) -> Result { - todo!() + fn serialize_f32(self, _v: f32) -> Result { + Err(self.invalid_step("f32")) } - fn serialize_f64(self, v: f64) -> Result { - todo!() + fn serialize_f64(self, _v: f64) -> Result { + Err(self.invalid_step("f64")) } - fn serialize_char(self, v: char) -> Result { - todo!() + fn serialize_char(self, _v: char) -> Result { + Err(self.invalid_step("char")) } fn serialize_str(self, v: &str) -> Result { @@ -206,9 +249,14 @@ impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> { #[cfg(feature = "decimal128")] SerializationStep::Decimal128Value => { let d = Decimal128::from_str(v); - self.root_serializer.write_all(d.to_raw_bytes_le())?; + self.root_serializer.bytes.write_all(&d.to_raw_bytes_le())?; + } + s => { + return Err(Error::custom(format!( + "can't serialize string for step {:?}", + s + ))) } - s => return Err(Error::custom(format!("can't serialize string for step {:?}", s))), } Ok(()) } @@ -219,105 +267,109 @@ impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> { self.root_serializer.bytes.write_all(v)?; Ok(()) } - _ => todo!(), + _ => Err(self.invalid_step("&[u8]")), } } fn serialize_none(self) -> Result { - todo!() + Err(self.invalid_step("none")) } - fn serialize_some(self, value: &T) -> Result + fn serialize_some(self, _value: &T) -> Result where T: Serialize, { - todo!() + Err(self.invalid_step("some")) } fn serialize_unit(self) -> Result { - todo!() + Err(self.invalid_step("unit")) } - fn serialize_unit_struct(self, name: &'static str) -> Result { - todo!() + fn serialize_unit_struct(self, _name: &'static str) -> Result { + Err(self.invalid_step("unit_struct")) } fn serialize_unit_variant( self, - name: &'static str, - variant_index: u32, - variant: &'static str, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, ) -> Result { - todo!() + Err(self.invalid_step("unit_variant")) } - fn serialize_newtype_struct(self, name: &'static str, value: &T) -> Result + fn serialize_newtype_struct( + self, + _name: &'static str, + _value: &T, + ) -> Result where T: Serialize, { - todo!() + Err(self.invalid_step("newtype_struct")) } fn serialize_newtype_variant( self, - name: &'static str, - variant_index: u32, - variant: &'static str, - value: &T, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, ) -> Result where T: Serialize, { - todo!() + Err(self.invalid_step("newtype_variant")) } - fn serialize_seq(self, len: Option) -> Result { - todo!() + fn serialize_seq(self, _len: Option) -> Result { + Err(self.invalid_step("newtype_seq")) } - fn serialize_tuple(self, len: usize) -> Result { - todo!() + fn serialize_tuple(self, _len: usize) -> Result { + Err(self.invalid_step("newtype_tuple")) } fn serialize_tuple_struct( self, - name: &'static str, - len: usize, + _name: &'static str, + _len: usize, ) -> Result { - todo!() + Err(self.invalid_step("tuple_struct")) } fn serialize_tuple_variant( self, - name: &'static str, - variant_index: u32, - variant: &'static str, - len: usize, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, ) -> Result { - todo!() + Err(self.invalid_step("tuple_variant")) } - fn serialize_map(self, len: Option) -> Result { + fn serialize_map(self, _len: Option) -> Result { match self.state { SerializationStep::CodeWithScopeScope { ref code } => { CodeWithScopeSerializer::start(code.as_str(), self.root_serializer) } - _ => todo!(), + _ => Err(self.invalid_step("tuple_map")), } } - fn serialize_struct(self, name: &'static str, len: usize) -> Result { + fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { Ok(self) } fn serialize_struct_variant( self, - name: &'static str, - variant_index: u32, - variant: &'static str, - len: usize, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, ) -> Result { - todo!() + Err(self.invalid_step("struct_variant")) } } @@ -433,10 +485,9 @@ impl<'a, 'b> SerializeStruct for &'b mut ValueSerializer<'a> { (state, k) => { return Err(Error::custom(format!( "mismatched serialization step and next key: {:?} + \"{}\"", - state, - k + state, k ))); - }, + } } Ok(()) diff --git a/src/ser/serde.rs b/src/ser/serde.rs index 3d200561..234d935d 100644 --- a/src/ser/serde.rs +++ b/src/ser/serde.rs @@ -9,6 +9,7 @@ use serde::ser::{ SerializeTupleStruct, SerializeTupleVariant, }; +#[cfg(not(feature = "decimal128"))] use serde_bytes::Bytes; #[cfg(feature = "decimal128")] @@ -16,7 +17,7 @@ use crate::decimal128::Decimal128; use crate::{ bson::{Array, Binary, Bson, DbPointer, Document, JavaScriptCodeWithScope, Regex, Timestamp}, datetime::DateTime, - extjson::{self, models::DateTimeBody}, + extjson, oid::ObjectId, spec::BinarySubtype, }; @@ -608,11 +609,8 @@ impl Serialize for Decimal128 { where S: ser::Serializer, { - let mut state = serializer.serialize_struct()?; - let body = extjson::models::Decimal128 { - value: self.to_string(), - }; - state.serialize_field("$numberDecimal", body)?; + let mut state = serializer.serialize_struct("$numberDecimal", 1)?; + state.serialize_field("$numberDecimal", self.to_string().as_str())?; state.end() } } From b04122816afdb9fddfabe312987be4d9681a1e4e Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Tue, 20 Jul 2021 13:29:42 -0400 Subject: [PATCH 11/21] wip serde tests --- serde-tests/Cargo.toml | 1 + serde-tests/test.rs | 10 ++++++++++ src/ser/raw/mod.rs | 1 - src/ser/serde.rs | 2 +- 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/serde-tests/Cargo.toml b/serde-tests/Cargo.toml index 62be8bd2..2e3298ae 100644 --- a/serde-tests/Cargo.toml +++ b/serde-tests/Cargo.toml @@ -8,6 +8,7 @@ edition = "2018" bson = { path = "..", features = ["decimal128"] } serde = { version = "1.0", features = ["derive"] } pretty_assertions = "0.6.1" +hex = "0.4.2" [lib] name = "serde_tests" diff --git a/serde-tests/test.rs b/serde-tests/test.rs index d8e2df01..f24208ee 100644 --- a/serde-tests/test.rs +++ b/serde-tests/test.rs @@ -41,10 +41,20 @@ fn run_test(expected_value: &T, expected_doc: &Document, description: &str) where T: Serialize + DeserializeOwned + PartialEq + std::fmt::Debug, { + println!("{}", description); + println!("{:#?}", expected_value); + println!("{:#?}", expected_doc); let mut expected_bytes = Vec::new(); expected_doc .to_writer(&mut expected_bytes) .expect(description); + println!("expected bytes: {}", hex::encode(expected_bytes.as_slice())); + + let mut expected_bytes_serde = Vec::new(); + bson::to_writer(&expected_value, &mut expected_bytes_serde).expect(description); + println!("expected bytes serde: {}", hex::encode(expected_bytes_serde.as_slice())); + + assert_eq!(expected_bytes_serde, expected_bytes, "{}", description); let serialized_doc = bson::to_document(&expected_value).expect(description); assert_eq!(&serialized_doc, expected_doc, "{}", description); diff --git a/src/ser/raw/mod.rs b/src/ser/raw/mod.rs index 9ddab463..d59b5221 100644 --- a/src/ser/raw/mod.rs +++ b/src/ser/raw/mod.rs @@ -372,7 +372,6 @@ impl<'a> VariantSerializer<'a> { variant: &'static str, inner_type: VariantInnerType, ) -> Result { - rs.update_element_type(ElementType::EmbeddedDocument)?; let doc_start = rs.bytes.len(); write_i32(&mut rs.bytes, 0)?; diff --git a/src/ser/serde.rs b/src/ser/serde.rs index 234d935d..4d9e4287 100644 --- a/src/ser/serde.rs +++ b/src/ser/serde.rs @@ -92,7 +92,7 @@ impl Serialize for Bson { Bson::Decimal128(d) => d.serialize(serializer), Bson::Undefined => { let mut state = serializer.serialize_struct("$undefined", 1)?; - state.serialize_field("$undefined", &1)?; + state.serialize_field("$undefined", &true)?; state.end() } Bson::MaxKey => { From 88aeaebae34dec57edb1341320dc86772881c1a5 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Tue, 20 Jul 2021 18:02:19 -0400 Subject: [PATCH 12/21] fix serde tests --- .evergreen/run-tests-decimal128.sh | 3 + .evergreen/run-tests-u2i.sh | 3 + .evergreen/run-tests.sh | 3 + serde-tests/Cargo.toml | 6 +- serde-tests/test.rs | 28 ++++-- src/ser/raw/document_serializer.rs | 3 +- src/ser/raw/mod.rs | 138 +++-------------------------- 7 files changed, 45 insertions(+), 139 deletions(-) diff --git a/.evergreen/run-tests-decimal128.sh b/.evergreen/run-tests-decimal128.sh index 10a30b01..51f62cbf 100755 --- a/.evergreen/run-tests-decimal128.sh +++ b/.evergreen/run-tests-decimal128.sh @@ -4,3 +4,6 @@ set -o errexit . ~/.cargo/env RUST_BACKTRACE=1 cargo test --features decimal128 + +cd serde-tests +RUST_BACKTRACE=1 cargo test --features decimal128 diff --git a/.evergreen/run-tests-u2i.sh b/.evergreen/run-tests-u2i.sh index 373adb93..a3aba15c 100755 --- a/.evergreen/run-tests-u2i.sh +++ b/.evergreen/run-tests-u2i.sh @@ -4,3 +4,6 @@ set -o errexit . ~/.cargo/env RUST_BACKTRACE=1 cargo test --features u2i + +cd serde-tests +RUST_BACKTRACE=1 cargo test --features u2i diff --git a/.evergreen/run-tests.sh b/.evergreen/run-tests.sh index 71e4230f..6d56e8f6 100755 --- a/.evergreen/run-tests.sh +++ b/.evergreen/run-tests.sh @@ -5,3 +5,6 @@ set -o errexit . ~/.cargo/env RUST_BACKTRACE=1 cargo test RUST_BACKTRACE=1 cargo test --features chrono-0_4,uuid-0_8 + +cd serde-tests +RUST_BACKTRACE=1 cargo test diff --git a/serde-tests/Cargo.toml b/serde-tests/Cargo.toml index 2e3298ae..45bdfce3 100644 --- a/serde-tests/Cargo.toml +++ b/serde-tests/Cargo.toml @@ -4,8 +4,12 @@ version = "0.1.0" authors = ["Kevin Yeh "] edition = "2018" +[features] +u2i = ["bson/u2i"] +decimal128 = ["bson/decimal128"] + [dependencies] -bson = { path = "..", features = ["decimal128"] } +bson = { path = ".." } serde = { version = "1.0", features = ["derive"] } pretty_assertions = "0.6.1" hex = "0.4.2" diff --git a/serde-tests/test.rs b/serde-tests/test.rs index f24208ee..e7adab18 100644 --- a/serde-tests/test.rs +++ b/serde-tests/test.rs @@ -12,6 +12,7 @@ use serde::{ use std::{ borrow::Cow, collections::{BTreeMap, HashSet}, + convert::TryInto, }; use bson::{ @@ -37,25 +38,24 @@ use bson::{ /// - deserializing a `T` from the raw BSON version of `expected_doc` produces `expected_value` /// - deserializing a `Document` from the raw BSON version of `expected_doc` produces /// `expected_doc` +/// - `bson::to_writer` and `Document::to_writer` produce the same result given the same input fn run_test(expected_value: &T, expected_doc: &Document, description: &str) where T: Serialize + DeserializeOwned + PartialEq + std::fmt::Debug, { - println!("{}", description); - println!("{:#?}", expected_value); - println!("{:#?}", expected_doc); let mut expected_bytes = Vec::new(); expected_doc .to_writer(&mut expected_bytes) .expect(description); - println!("expected bytes: {}", hex::encode(expected_bytes.as_slice())); let mut expected_bytes_serde = Vec::new(); bson::to_writer(&expected_value, &mut expected_bytes_serde).expect(description); - println!("expected bytes serde: {}", hex::encode(expected_bytes_serde.as_slice())); - assert_eq!(expected_bytes_serde, expected_bytes, "{}", description); + let mut expected_bytes_from_doc_serde = Vec::new(); + bson::to_writer(&expected_doc, &mut expected_bytes_from_doc_serde).expect(description); + assert_eq!(expected_bytes_from_doc_serde, expected_bytes, "{}", description); + let serialized_doc = bson::to_document(&expected_value).expect(description); assert_eq!(&serialized_doc, expected_doc, "{}", description); assert_eq!( @@ -712,7 +712,7 @@ fn all_types() { undefined: Bson, code: Bson, code_w_scope: JavaScriptCodeWithScope, - decimal: Decimal128, + decimal: Bson, symbol: Bson, min_key: Bson, max_key: Bson, @@ -747,6 +747,16 @@ fn all_types() { let oid = ObjectId::new(); let subdoc = doc! { "k": true, "b": { "hello": "world" } }; + #[cfg(not(feature = "decimal128"))] + let decimal = { + let bytes = hex::decode("18000000136400D0070000000000000000000000003A3000").unwrap(); + let d = Document::from_reader(bytes.as_slice()).unwrap(); + d.get("d").unwrap().clone() + }; + + #[cfg(feature = "decimal128")] + let decimal = Bson::Decimal128(Decimal128::from_str("2.000")); + let doc = doc! { "x": 1, "y": 2_i64, @@ -768,7 +778,7 @@ fn all_types() { "undefined": Bson::Undefined, "code": code.clone(), "code_w_scope": code_w_scope.clone(), - "decimal": Bson::Decimal128(Decimal128::from_i32(5)), + "decimal": decimal.clone(), "symbol": Bson::Symbol("ok".to_string()), "min_key": Bson::MinKey, "max_key": Bson::MaxKey, @@ -799,7 +809,7 @@ fn all_types() { undefined: Bson::Undefined, code, code_w_scope, - decimal: Decimal128::from_i32(5), + decimal, symbol: Bson::Symbol("ok".to_string()), min_key: Bson::MinKey, max_key: Bson::MaxKey, diff --git a/src/ser/raw/document_serializer.rs b/src/ser/raw/document_serializer.rs index 31bf8890..7350ba27 100644 --- a/src/ser/raw/document_serializer.rs +++ b/src/ser/raw/document_serializer.rs @@ -35,8 +35,7 @@ impl<'a> DocumentSerializer<'a> { T: serde::Serialize + ?Sized, { // push a dummy element type for now, will update this once we serialize the value - self.root_serializer.type_index = self.root_serializer.bytes.len(); - self.root_serializer.bytes.push(0); + self.root_serializer.reserve_element_type(); key.serialize(KeySerializer { root_serializer: &mut *self.root_serializer, })?; diff --git a/src/ser/raw/mod.rs b/src/ser/raw/mod.rs index d59b5221..11170617 100644 --- a/src/ser/raw/mod.rs +++ b/src/ser/raw/mod.rs @@ -40,6 +40,12 @@ impl Serializer { self.bytes } + /// Reserve a spot for the element type to be set retroactively via `update_element_type`. + fn reserve_element_type(&mut self) { + self.type_index = self.bytes.len(); // record index + self.bytes.push(0); // push temporary placeholder + } + /// Retroactively set the element type of the most recently serialized element. fn update_element_type(&mut self, t: ElementType) -> Result<()> { if self.type_index == 0 { @@ -311,6 +317,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { variant: &'static str, _len: usize, ) -> Result { + self.update_element_type(ElementType::EmbeddedDocument)?; VariantSerializer::start(&mut *self, variant, VariantInnerType::Struct) } } @@ -373,6 +380,7 @@ impl<'a> VariantSerializer<'a> { inner_type: VariantInnerType, ) -> Result { let doc_start = rs.bytes.len(); + // write placeholder length for document, will be updated at end write_i32(&mut rs.bytes, 0)?; let inner = match inner_type { @@ -382,6 +390,8 @@ impl<'a> VariantSerializer<'a> { rs.bytes.push(inner as u8); write_cstring(&mut rs.bytes, variant)?; let inner_start = rs.bytes.len(); + // write placeholder length for inner, will be updated at end + write_i32(&mut rs.bytes, 0)?; Ok(Self { root_serializer: rs, @@ -395,7 +405,7 @@ impl<'a> VariantSerializer<'a> { where T: Serialize + ?Sized, { - self.root_serializer.bytes.push(0); + self.root_serializer.reserve_element_type(); write_cstring(&mut self.root_serializer.bytes, k)?; v.serialize(&mut *self.root_serializer)?; @@ -451,129 +461,3 @@ impl<'a> serde::ser::SerializeStructVariant for VariantSerializer<'a> { self.end_both() } } - -// #[cfg(test)] -// mod test { -// use crate::{doc, Binary, DateTime, JavaScriptCodeWithScope}; -// use serde::Serialize; - -// #[test] -// fn raw_serialize() { -// let binary = Binary { -// subtype: crate::spec::BinarySubtype::BinaryOld, -// bytes: Vec::new(), -// }; -// let doc = doc! { -// // "a": JavaScriptCodeWithScope { -// // code: "".to_string(), -// // scope: doc! {} -// // } -// "o": ObjectId::new(), -// "d": DateTime::now(), -// "b": binary, -// // "x": { "y": "ok" }, -// // "a": true, -// // "b": 1i32, -// // "c": 2i64, -// // "d": 5.5, -// // "e": [ true, "aaa", { "ok": 1.0 } ] -// }; -// println!("{}", doc); -// // let mut v = Vec::new(); -// // doc.to_writer(&mut v).unwrap(); - -// let raw_v = crate::ser::to_vec(&doc).unwrap(); -// // assert_eq!(raw_v, v); -// let d = Document::from_reader(raw_v.as_slice()).unwrap(); -// println!("{:#?}", d); -// } -// use std::time::Instant; - -// use serde::Deserialize; - -// use crate::{oid::ObjectId, Document}; - -// #[derive(Debug, Deserialize)] -// struct D { -// x: i32, -// y: i32, -// i: I, -// // oid: ObjectId, -// null: Option, -// b: bool, -// d: f32, -// } - -// #[derive(Debug, Deserialize)] -// struct I { -// a: i32, -// b: i32, -// } - -// #[derive(Debug, Serialize)] -// struct Code { -// c: JavaScriptCodeWithScope, -// } - -// // #[test] -// // fn raw_serialize() { -// // let c = Code { -// // c: JavaScriptCodeWithScope { -// // code: "".to_string(), -// // scope: doc! {}, -// // } -// // }; - -// // let v = crate::ser::to_vec(&c).unwrap(); - -// // let doc = crate::to_document(&c).unwrap(); -// // let mut v2 = Vec::new(); -// // doc.to_writer(&mut v2).unwrap(); - -// // assert_eq!(v, v2); -// // } - -// #[test] -// fn raw_bench() { -// let binary = Binary { -// subtype: crate::spec::BinarySubtype::Generic, -// bytes: vec![1, 2, 3, 4, 5], -// }; -// let doc = doc! { -// "ok": 1, -// "x": 1, -// "y": 2, -// "i": { "a": 300, "b": 12345 }, -// // "oid": ObjectId::new(), -// "null": crate::Bson::Null, -// "b": true, -// "dt": DateTime::now(), -// "d": 12.5, -// "b": binary, -// }; - -// let raw_start = Instant::now(); -// for _ in 0..10_000 { -// let _b = crate::ser::to_vec(&doc).unwrap(); -// } -// let raw_time = raw_start.elapsed(); -// println!("raw time: {}", raw_time.as_secs_f32()); - -// let normal_start = Instant::now(); -// for _ in 0..10_000 { -// let d: Document = crate::to_document(&doc).unwrap(); -// let mut v = Vec::new(); -// d.to_writer(&mut v).unwrap(); -// } -// let normal_time = normal_start.elapsed(); -// println!("normal time: {}", normal_time.as_secs_f32()); - -// let normal_start = Instant::now(); -// for _ in 0..10_000 { -// let mut v = Vec::new(); -// doc.to_writer(&mut v).unwrap(); -// } -// let normal_time = normal_start.elapsed(); -// println!("decode time: {}", normal_time.as_secs_f32()); -// } -// } From efe286751830c5c16448e8ed04a85a68e91fc314 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Tue, 20 Jul 2021 19:56:26 -0400 Subject: [PATCH 13/21] lossless decimal128 serialization via bytes --- serde-tests/test.rs | 1 - src/bson.rs | 16 ++++++++- src/ser/serde.rs | 5 ++- src/tests/spec/corpus.rs | 74 +++++++++++++++++++++++++--------------- 4 files changed, 65 insertions(+), 31 deletions(-) diff --git a/serde-tests/test.rs b/serde-tests/test.rs index e7adab18..a2300d45 100644 --- a/serde-tests/test.rs +++ b/serde-tests/test.rs @@ -12,7 +12,6 @@ use serde::{ use std::{ borrow::Cow, collections::{BTreeMap, HashSet}, - convert::TryInto, }; use bson::{ diff --git a/src/bson.rs b/src/bson.rs index 03b8bee0..c70fdb94 100644 --- a/src/bson.rs +++ b/src/bson.rs @@ -22,7 +22,7 @@ //! BSON definition use std::{ - convert::TryFrom, + convert::{TryFrom, TryInto}, fmt::{self, Debug, Display, Formatter}, }; @@ -698,6 +698,20 @@ impl Bson { } } + ["$numberDecimalBytes"] => { + if let Ok(bytes) = doc.get_binary_generic("$numberDecimalBytes") { + if let Ok(b) = bytes.clone().try_into() { + #[cfg(not(feature = "decimal128"))] + return Bson::Decimal128(Decimal128 { bytes: b }); + + #[cfg(feature = "decimal128")] + unsafe { + return Bson::Decimal128(Decimal128::from_raw_bytes_le(b)); + } + } + } + } + ["$binary"] => { if let Some(binary) = Binary::from_extended_doc(&doc) { return Bson::Binary(binary); diff --git a/src/ser/serde.rs b/src/ser/serde.rs index 4d9e4287..4e8c587c 100644 --- a/src/ser/serde.rs +++ b/src/ser/serde.rs @@ -610,7 +610,10 @@ impl Serialize for Decimal128 { S: ser::Serializer, { let mut state = serializer.serialize_struct("$numberDecimal", 1)?; - state.serialize_field("$numberDecimal", self.to_string().as_str())?; + state.serialize_field( + "$numberDecimalBytes", + serde_bytes::Bytes::new(&self.to_raw_bytes_le()), + )?; state.end() } } diff --git a/src/tests/spec/corpus.rs b/src/tests/spec/corpus.rs index b23494d0..3291da00 100644 --- a/src/tests/spec/corpus.rs +++ b/src/tests/spec/corpus.rs @@ -61,62 +61,78 @@ fn run_test(test: TestFile) { let canonical_bson = hex::decode(&valid.canonical_bson).expect(&description); - let bson_to_native_cb = + // these four cover the four ways to create a `Document` from the provided BSON. + let documentfromreader_cb = Document::from_reader(canonical_bson.as_slice()).expect(&description); - let bson_to_native_cb_serde: Document = + let fromreader_cb: Document = crate::from_reader(canonical_bson.as_slice()).expect(&description); - let native_to_native_cb_serde: Document = - crate::from_document(bson_to_native_cb.clone()).expect(&description); + let fromdocument_documentfromreader_cb: Document = + crate::from_document(documentfromreader_cb.clone()).expect(&description); - let mut native_to_bson_bson_to_native_cb = Vec::new(); - bson_to_native_cb - .to_writer(&mut native_to_bson_bson_to_native_cb) + let todocument_documentfromreader_cb: Document = + crate::to_document(&documentfromreader_cb).expect(&description); + + // These cover the ways to serialize those `Documents` back to BSON. + let mut documenttowriter_documentfromreader_cb = Vec::new(); + documentfromreader_cb + .to_writer(&mut documenttowriter_documentfromreader_cb) + .expect(&description); + + let mut documenttowriter_fromreader_cb = Vec::new(); + fromreader_cb + .to_writer(&mut documenttowriter_fromreader_cb) .expect(&description); - let mut native_to_bson_bson_to_native_cb_serde = Vec::new(); - bson_to_native_cb_serde - .to_writer(&mut native_to_bson_bson_to_native_cb_serde) + let mut documenttowriter_fromdocument_documentfromreader_cb = Vec::new(); + fromdocument_documentfromreader_cb + .to_writer(&mut documenttowriter_fromdocument_documentfromreader_cb) .expect(&description); - let mut native_to_bson_native_to_native_cb_serde = Vec::new(); - native_to_native_cb_serde - .to_writer(&mut native_to_bson_native_to_native_cb_serde) + let mut documenttowriter_todocument_documentfromreader_cb = Vec::new(); + todocument_documentfromreader_cb + .to_writer(&mut documenttowriter_todocument_documentfromreader_cb) .expect(&description); - let mut native_to_bson_serde_bson_to_native_cb = Vec::new(); - crate::to_writer( - &bson_to_native_cb, - &mut native_to_bson_serde_bson_to_native_cb, - ) - .expect(&description); + let mut towriter_documentfromreader_cb = Vec::new(); + crate::to_writer(&documentfromreader_cb, &mut towriter_documentfromreader_cb) + .expect(&description); // native_to_bson( bson_to_native(cB) ) = cB + // now we ensure the hex for all 5 are equivalent to the canonical BSON provided by the + // test. assert_eq!( - hex::encode(native_to_bson_bson_to_native_cb).to_lowercase(), + hex::encode(documenttowriter_documentfromreader_cb).to_lowercase(), valid.canonical_bson.to_lowercase(), "{}", description, ); assert_eq!( - hex::encode(native_to_bson_bson_to_native_cb_serde).to_lowercase(), + hex::encode(documenttowriter_fromreader_cb).to_lowercase(), valid.canonical_bson.to_lowercase(), "{}", description, ); assert_eq!( - hex::encode(native_to_bson_serde_bson_to_native_cb).to_lowercase(), + hex::encode(documenttowriter_fromdocument_documentfromreader_cb).to_lowercase(), valid.canonical_bson.to_lowercase(), "{}", description, ); assert_eq!( - hex::encode(native_to_bson_native_to_native_cb_serde).to_lowercase(), + hex::encode(documenttowriter_todocument_documentfromreader_cb).to_lowercase(), + valid.canonical_bson.to_lowercase(), + "{}", + description, + ); + + assert_eq!( + hex::encode(towriter_documentfromreader_cb).to_lowercase(), valid.canonical_bson.to_lowercase(), "{}", description, @@ -124,14 +140,16 @@ fn run_test(test: TestFile) { // NaN == NaN is false, so we skip document comparisons that contain NaN if !description.to_ascii_lowercase().contains("nan") && !description.contains("decq541") { + assert_eq!(documentfromreader_cb, fromreader_cb, "{}", description); + assert_eq!( - bson_to_native_cb, bson_to_native_cb_serde, + documentfromreader_cb, fromdocument_documentfromreader_cb, "{}", description ); assert_eq!( - bson_to_native_cb, native_to_native_cb_serde, + documentfromreader_cb, todocument_documentfromreader_cb, "{}", description ); @@ -170,7 +188,7 @@ fn run_test(test: TestFile) { // NaN == NaN is false, so we skip document comparisons that contain NaN if !description.contains("NaN") { assert_eq!( - bson_to_native_db_serde, bson_to_native_cb, + bson_to_native_db_serde, documentfromreader_cb, "{}", description ); @@ -215,7 +233,7 @@ fn run_test(test: TestFile) { // TODO RUST-36: Enable decimal128 tests. if test.bson_type != "0x13" { assert_eq!( - Bson::Document(bson_to_native_cb.clone()).into_canonical_extjson(), + Bson::Document(documentfromreader_cb.clone()).into_canonical_extjson(), cej_updated_float, "{}", description @@ -228,7 +246,7 @@ fn run_test(test: TestFile) { let rej: serde_json::Value = serde_json::from_str(relaxed_extjson).expect(&description); assert_eq!( - Bson::Document(bson_to_native_cb.clone()).into_relaxed_extjson(), + Bson::Document(documentfromreader_cb.clone()).into_relaxed_extjson(), rej, "{}", description From d9ccb018caf8780006f9445445c69862c015ae02 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Wed, 21 Jul 2021 19:09:21 -0400 Subject: [PATCH 14/21] various cleanup --- .evergreen/config.yml | 15 --------------- src/extjson/models.rs | 12 ------------ src/ser/raw/mod.rs | 11 +++-------- src/spec.rs | 8 -------- 4 files changed, 3 insertions(+), 43 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index f1f871f2..8fe823de 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -92,16 +92,6 @@ functions: ${PREPARE_SHELL} .evergreen/run-tests-u2i.sh - "run serde tests": - - command: shell.exec - type: test - params: - shell: bash - working_dir: "src" - script: | - ${PREPARE_SHELL} - .evergreen/run-tests-serde.sh - "run decimal128 tests": - command: shell.exec type: test @@ -170,10 +160,6 @@ tasks: commands: - func: "run u2i tests" - - name: "test-serde" - commands: - - func: "run serde tests" - - name: "test-decimal128" commands: - func: "run decimal128 tests" @@ -211,7 +197,6 @@ buildvariants: tasks: - name: "test" - name: "test-u2i" - - name: "test-serde" - name: "test-decimal128" - matrix_name: "compile only" diff --git a/src/extjson/models.rs b/src/extjson/models.rs index 75fd71f2..79065c19 100644 --- a/src/extjson/models.rs +++ b/src/extjson/models.rs @@ -233,18 +233,6 @@ impl Timestamp { } } -// impl Serialize for TimestampBody { -// fn serialize(&self, serializer: S) -> Result -// where -// S: serde::Serializer -// { -// let mut state = serializer.serialize_struct("TimestampBody", 2)?; -// state.serialize_field("t", Bson::from(self.t))?; -// state.serialize_field("i", Bson::from(self.i))?; -// state.end() -// } -// } - #[derive(Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub(crate) struct DateTime { diff --git a/src/ser/raw/mod.rs b/src/ser/raw/mod.rs index 11170617..6f472dd0 100644 --- a/src/ser/raw/mod.rs +++ b/src/ser/raw/mod.rs @@ -90,15 +90,11 @@ impl<'a> serde::Serializer for &'a mut Serializer { } fn serialize_i8(self, v: i8) -> Result { - self.update_element_type(ElementType::Int32)?; - write_i32(&mut self.bytes, v.into())?; - Ok(()) + self.serialize_i32(v.into()) } fn serialize_i16(self, v: i16) -> Result { - self.update_element_type(ElementType::Int32)?; - write_i32(&mut self.bytes, v.into())?; - Ok(()) + self.serialize_i32(v.into()) } fn serialize_i32(self, v: i32) -> Result { @@ -159,8 +155,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { } fn serialize_f32(self, v: f32) -> Result { - self.update_element_type(ElementType::Double)?; - write_f64(&mut self.bytes, v.into()) + self.serialize_f64(v.into()) } fn serialize_f64(self, v: f64) -> Result { diff --git a/src/spec.rs b/src/spec.rs index fb05f81b..7fed83bb 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -140,14 +140,6 @@ impl ElementType { } } -// impl From for u8 { -// fn from(et: ElementType) -> Self { -// match et { -// ElementType::Array => -// } -// } -// } - /// The available binary subtypes, plus a user-defined slot. #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] #[non_exhaustive] From 7b9eb51410991a96c4621a47369e47fda0fd4fce Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Thu, 22 Jul 2021 16:10:57 -0400 Subject: [PATCH 15/21] add u2i tests to serde-tests --- serde-tests/test.rs | 80 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/serde-tests/test.rs b/serde-tests/test.rs index a2300d45..43693fd4 100644 --- a/serde-tests/test.rs +++ b/serde-tests/test.rs @@ -870,3 +870,83 @@ fn borrowed() { bson::from_slice(bson.as_slice()).expect("deserialization should succeed"); assert_eq!(deserialized, v); } + +#[cfg(feature = "u2i")] +#[test] +fn u2i() { + #[derive(Serialize, Deserialize, Debug, PartialEq)] + struct Foo { + u_8: u8, + u_16: u16, + u_32: u32, + u_32_max: u32, + u_64: u64, + i_64_max: u64, + } + + let v = Foo { + u_8: 15, + u_16: 123, + u_32: 1234, + u_32_max: u32::MAX, + u_64: 12345, + i_64_max: i64::MAX as u64, + }; + + let expected = doc! { + "u_8": 15_i32, + "u_16": 123_i32, + "u_32": 1234_i64, + "u_32_max": u32::MAX as i64, + "u_64": 12345_i64, + "i_64_max": i64::MAX as u64, + }; + + run_test(&v, &expected, "u2i - valid"); + + #[derive(Serialize, Debug)] + struct TooBig { + u_64: u64 + } + let v = TooBig { + u_64: i64::MAX as u64 + 1, + }; + bson::to_document(&v).unwrap_err(); + bson::to_vec(&v).unwrap_err(); +} + +#[cfg(not(feature = "u2i"))] +#[test] +fn unsigned() { + #[derive(Serialize, Debug)] + struct U8 { + v: u8, + } + let v = U8 { v: 1 }; + bson::to_document(&v).unwrap_err(); + bson::to_vec(&v).unwrap_err(); + + #[derive(Serialize, Debug)] + struct U16 { + v: u16, + } + let v = U16 { v: 1 }; + bson::to_document(&v).unwrap_err(); + bson::to_vec(&v).unwrap_err(); + + #[derive(Serialize, Debug)] + struct U32 { + v: u32, + } + let v = U32 { v: 1 }; + bson::to_document(&v).unwrap_err(); + bson::to_vec(&v).unwrap_err(); + + #[derive(Serialize, Debug)] + struct U64 { + v: u64, + } + let v = U64 { v: 1 }; + bson::to_document(&v).unwrap_err(); + bson::to_vec(&v).unwrap_err(); +} From 3617b48396df88aba5eecfa2ec3b8afd66721331 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Thu, 22 Jul 2021 16:14:44 -0400 Subject: [PATCH 16/21] add docstrings --- src/ser/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ser/mod.rs b/src/ser/mod.rs index 60dead67..b13840f8 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -214,6 +214,7 @@ where } } +/// Serialize the given `T` as BSON bytes into the provided writer. pub fn to_writer(value: &T, mut writer: W) -> Result<()> where T: Serialize, @@ -225,6 +226,7 @@ where Ok(()) } +/// Serialize the given `T` as a BSON byte vector. pub fn to_vec(value: &T) -> Result> where T: Serialize, From acfd4bc9c89cc10d046927ee23e21d01f9baa939 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Thu, 22 Jul 2021 16:29:15 -0400 Subject: [PATCH 17/21] cleanup --- src/ser/serde.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/ser/serde.rs b/src/ser/serde.rs index 4e8c587c..04f61ba0 100644 --- a/src/ser/serde.rs +++ b/src/ser/serde.rs @@ -557,8 +557,6 @@ impl Serialize for Regex { where S: ser::Serializer, { - // let value = Bson::RegularExpression(self.clone()); - // value.serialize(serializer) let mut state = serializer.serialize_struct("$regularExpression", 1)?; let body = extjson::models::RegexBody { pattern: self.pattern.clone(), From d2b22120aa4e823088416aba00995ac9b1fafc1c Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Thu, 22 Jul 2021 16:57:29 -0400 Subject: [PATCH 18/21] unify binary serialization impls --- src/ser/mod.rs | 15 +++++++++------ src/ser/raw/mod.rs | 9 ++------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/ser/mod.rs b/src/ser/mod.rs index b13840f8..6ece46ca 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -36,9 +36,10 @@ use std::{io::Write, iter::FromIterator, mem}; use crate::decimal128::Decimal128; use crate::{ bson::{Binary, Bson, DbPointer, Document, JavaScriptCodeWithScope, Regex}, + de::MAX_BSON_SIZE, spec::BinarySubtype, }; -use ::serde::Serialize; +use ::serde::{ser::Error as SerdeError, Serialize}; fn write_string(writer: &mut W, s: &str) -> Result<()> { writer.write_all(&(s.len() as i32 + 1).to_le_bytes())?; @@ -53,11 +54,6 @@ fn write_cstring(writer: &mut W, s: &str) -> Result<()> { Ok(()) } -#[inline] -pub(crate) fn write_u8(writer: &mut W, val: u8) -> Result<()> { - writer.write_all(&[val]).map(|_| ()).map_err(From::from) -} - #[inline] pub(crate) fn write_i32(writer: &mut W, val: i32) -> Result<()> { writer @@ -97,6 +93,13 @@ fn write_binary(mut writer: W, bytes: &[u8], subtype: BinarySubtype) - bytes.len() }; + if len > MAX_BSON_SIZE as usize { + return Err(Error::custom(format!( + "binary length {} exceeded maximum size", + bytes.len() + ))); + } + write_i32(&mut writer, len as i32)?; writer.write_all(&[subtype.into()])?; diff --git a/src/ser/raw/mod.rs b/src/ser/raw/mod.rs index 6f472dd0..095de874 100644 --- a/src/ser/raw/mod.rs +++ b/src/ser/raw/mod.rs @@ -1,8 +1,6 @@ mod document_serializer; mod value_serializer; -use std::io::Write; - use serde::{ ser::{Error as SerdeError, SerializeMap, SerializeStruct}, Serialize, @@ -10,7 +8,7 @@ use serde::{ use self::value_serializer::{ValueSerializer, ValueType}; -use super::{write_cstring, write_f64, write_i32, write_i64, write_string, write_u8}; +use super::{write_binary, write_cstring, write_f64, write_i32, write_i64, write_string}; use crate::{ ser::{Error, Result}, spec::{BinarySubtype, ElementType}, @@ -176,10 +174,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { fn serialize_bytes(self, v: &[u8]) -> Result { self.update_element_type(ElementType::Binary)?; - let len = v.len() as i32; - write_i32(&mut self.bytes, len)?; - write_u8(&mut self.bytes, BinarySubtype::Generic.into())?; - self.bytes.write_all(v)?; + write_binary(&mut self.bytes, v, BinarySubtype::Generic)?; Ok(()) } From 4ce691332b96814a1349409cf79b7d6399ed4fcb Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Thu, 22 Jul 2021 16:57:40 -0400 Subject: [PATCH 19/21] inline everything --- src/ser/mod.rs | 2 ++ src/ser/raw/document_serializer.rs | 38 ++++++++++++++++++++++++++++++ src/ser/raw/mod.rs | 32 +++++++++++++++++++++++++ src/ser/raw/value_serializer.rs | 32 +++++++++++++++++++++++++ 4 files changed, 104 insertions(+) diff --git a/src/ser/mod.rs b/src/ser/mod.rs index 6ece46ca..34904365 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -218,6 +218,7 @@ where } /// Serialize the given `T` as BSON bytes into the provided writer. +#[inline] pub fn to_writer(value: &T, mut writer: W) -> Result<()> where T: Serialize, @@ -230,6 +231,7 @@ where } /// Serialize the given `T` as a BSON byte vector. +#[inline] pub fn to_vec(value: &T) -> Result> where T: Serialize, diff --git a/src/ser/raw/document_serializer.rs b/src/ser/raw/document_serializer.rs index 7350ba27..ba2447f0 100644 --- a/src/ser/raw/document_serializer.rs +++ b/src/ser/raw/document_serializer.rs @@ -58,6 +58,7 @@ impl<'a> serde::ser::SerializeSeq for DocumentSerializer<'a> { type Ok = (); type Error = Error; + #[inline] fn serialize_element(&mut self, value: &T) -> Result<()> where T: serde::Serialize, @@ -66,6 +67,7 @@ impl<'a> serde::ser::SerializeSeq for DocumentSerializer<'a> { value.serialize(&mut *self.root_serializer) } + #[inline] fn end(self) -> Result { self.end_doc().map(|_| ()) } @@ -76,6 +78,7 @@ impl<'a> serde::ser::SerializeMap for DocumentSerializer<'a> { type Error = Error; + #[inline] fn serialize_key(&mut self, key: &T) -> Result<()> where T: serde::Serialize, @@ -83,6 +86,7 @@ impl<'a> serde::ser::SerializeMap for DocumentSerializer<'a> { self.serialize_doc_key(key) } + #[inline] fn serialize_value(&mut self, value: &T) -> Result<()> where T: serde::Serialize, @@ -100,6 +104,7 @@ impl<'a> serde::ser::SerializeStruct for DocumentSerializer<'a> { type Error = Error; + #[inline] fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> where T: serde::Serialize, @@ -108,6 +113,7 @@ impl<'a> serde::ser::SerializeStruct for DocumentSerializer<'a> { value.serialize(&mut *self.root_serializer) } + #[inline] fn end(self) -> Result { self.end_doc().map(|_| ()) } @@ -118,6 +124,7 @@ impl<'a> serde::ser::SerializeTuple for DocumentSerializer<'a> { type Error = Error; + #[inline] fn serialize_element(&mut self, value: &T) -> Result<()> where T: serde::Serialize, @@ -126,6 +133,7 @@ impl<'a> serde::ser::SerializeTuple for DocumentSerializer<'a> { value.serialize(&mut *self.root_serializer) } + #[inline] fn end(self) -> Result { self.end_doc().map(|_| ()) } @@ -136,6 +144,7 @@ impl<'a> serde::ser::SerializeTupleStruct for DocumentSerializer<'a> { type Error = Error; + #[inline] fn serialize_field(&mut self, value: &T) -> Result<()> where T: serde::Serialize, @@ -144,6 +153,7 @@ impl<'a> serde::ser::SerializeTupleStruct for DocumentSerializer<'a> { value.serialize(&mut *self.root_serializer) } + #[inline] fn end(self) -> Result { self.end_doc().map(|_| ()) } @@ -173,66 +183,82 @@ impl<'a> serde::Serializer for KeySerializer<'a> { type SerializeStruct = Impossible<(), Error>; type SerializeStructVariant = Impossible<(), Error>; + #[inline] fn serialize_bool(self, v: bool) -> Result { Err(Self::invalid_key(v)) } + #[inline] fn serialize_i8(self, v: i8) -> Result { Err(Self::invalid_key(v)) } + #[inline] fn serialize_i16(self, v: i16) -> Result { Err(Self::invalid_key(v)) } + #[inline] fn serialize_i32(self, v: i32) -> Result { Err(Self::invalid_key(v)) } + #[inline] fn serialize_i64(self, v: i64) -> Result { Err(Self::invalid_key(v)) } + #[inline] fn serialize_u8(self, v: u8) -> Result { Err(Self::invalid_key(v)) } + #[inline] fn serialize_u16(self, v: u16) -> Result { Err(Self::invalid_key(v)) } + #[inline] fn serialize_u32(self, v: u32) -> Result { Err(Self::invalid_key(v)) } + #[inline] fn serialize_u64(self, v: u64) -> Result { Err(Self::invalid_key(v)) } + #[inline] fn serialize_f32(self, v: f32) -> Result { Err(Self::invalid_key(v)) } + #[inline] fn serialize_f64(self, v: f64) -> Result { Err(Self::invalid_key(v)) } + #[inline] fn serialize_char(self, v: char) -> Result { Err(Self::invalid_key(v)) } + #[inline] fn serialize_str(self, v: &str) -> Result { write_cstring(&mut self.root_serializer.bytes, v) } + #[inline] fn serialize_bytes(self, v: &[u8]) -> Result { Err(Self::invalid_key(v)) } + #[inline] fn serialize_none(self) -> Result { Err(Self::invalid_key(Bson::Null)) } + #[inline] fn serialize_some(self, value: &T) -> Result where T: Serialize, @@ -240,14 +266,17 @@ impl<'a> serde::Serializer for KeySerializer<'a> { value.serialize(self) } + #[inline] fn serialize_unit(self) -> Result { Err(Self::invalid_key(Bson::Null)) } + #[inline] fn serialize_unit_struct(self, _name: &'static str) -> Result { Err(Self::invalid_key(Bson::Null)) } + #[inline] fn serialize_unit_variant( self, _name: &'static str, @@ -257,6 +286,7 @@ impl<'a> serde::Serializer for KeySerializer<'a> { self.serialize_str(variant) } + #[inline] fn serialize_newtype_struct(self, _name: &'static str, value: &T) -> Result where T: Serialize, @@ -264,6 +294,7 @@ impl<'a> serde::Serializer for KeySerializer<'a> { value.serialize(self) } + #[inline] fn serialize_newtype_variant( self, _name: &'static str, @@ -277,14 +308,17 @@ impl<'a> serde::Serializer for KeySerializer<'a> { Err(Self::invalid_key(value)) } + #[inline] fn serialize_seq(self, _len: Option) -> Result { Err(Self::invalid_key(Bson::Array(vec![]))) } + #[inline] fn serialize_tuple(self, _len: usize) -> Result { Err(Self::invalid_key(Bson::Array(vec![]))) } + #[inline] fn serialize_tuple_struct( self, _name: &'static str, @@ -293,6 +327,7 @@ impl<'a> serde::Serializer for KeySerializer<'a> { Err(Self::invalid_key(Bson::Document(doc! {}))) } + #[inline] fn serialize_tuple_variant( self, _name: &'static str, @@ -303,14 +338,17 @@ impl<'a> serde::Serializer for KeySerializer<'a> { Err(Self::invalid_key(Bson::Array(vec![]))) } + #[inline] fn serialize_map(self, _len: Option) -> Result { Err(Self::invalid_key(Bson::Document(doc! {}))) } + #[inline] fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { Err(Self::invalid_key(Bson::Document(doc! {}))) } + #[inline] fn serialize_struct_variant( self, _name: &'static str, diff --git a/src/ser/raw/mod.rs b/src/ser/raw/mod.rs index 095de874..e38d5ac2 100644 --- a/src/ser/raw/mod.rs +++ b/src/ser/raw/mod.rs @@ -39,12 +39,14 @@ impl Serializer { } /// Reserve a spot for the element type to be set retroactively via `update_element_type`. + #[inline] fn reserve_element_type(&mut self) { self.type_index = self.bytes.len(); // record index self.bytes.push(0); // push temporary placeholder } /// Retroactively set the element type of the most recently serialized element. + #[inline] fn update_element_type(&mut self, t: ElementType) -> Result<()> { if self.type_index == 0 { if matches!(t, ElementType::EmbeddedDocument) { @@ -63,6 +65,7 @@ impl Serializer { } /// Replace an i32 value at the given index with the given value. + #[inline] fn replace_i32(&mut self, at: usize, with: i32) { self.bytes .splice(at..at + 4, with.to_le_bytes().iter().cloned()); @@ -81,32 +84,38 @@ impl<'a> serde::Serializer for &'a mut Serializer { type SerializeStruct = StructSerializer<'a>; type SerializeStructVariant = VariantSerializer<'a>; + #[inline] fn serialize_bool(self, v: bool) -> Result { self.update_element_type(ElementType::Boolean)?; self.bytes.push(if v { 1 } else { 0 }); Ok(()) } + #[inline] fn serialize_i8(self, v: i8) -> Result { self.serialize_i32(v.into()) } + #[inline] fn serialize_i16(self, v: i16) -> Result { self.serialize_i32(v.into()) } + #[inline] fn serialize_i32(self, v: i32) -> Result { self.update_element_type(ElementType::Int32)?; write_i32(&mut self.bytes, v)?; Ok(()) } + #[inline] fn serialize_i64(self, v: i64) -> Result { self.update_element_type(ElementType::Int64)?; write_i64(&mut self.bytes, v)?; Ok(()) } + #[inline] fn serialize_u8(self, v: u8) -> Result { #[cfg(feature = "u2i")] { @@ -117,6 +126,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { Err(Error::UnsupportedUnsignedInteger(v.into())) } + #[inline] fn serialize_u16(self, v: u16) -> Result { #[cfg(feature = "u2i")] { @@ -127,6 +137,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { Err(Error::UnsupportedUnsignedInteger(v.into())) } + #[inline] fn serialize_u32(self, v: u32) -> Result { #[cfg(feature = "u2i")] { @@ -137,6 +148,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { Err(Error::UnsupportedUnsignedInteger(v.into())) } + #[inline] fn serialize_u64(self, v: u64) -> Result { #[cfg(feature = "u2i")] { @@ -152,37 +164,44 @@ impl<'a> serde::Serializer for &'a mut Serializer { Err(Error::UnsupportedUnsignedInteger(v)) } + #[inline] fn serialize_f32(self, v: f32) -> Result { self.serialize_f64(v.into()) } + #[inline] fn serialize_f64(self, v: f64) -> Result { self.update_element_type(ElementType::Double)?; write_f64(&mut self.bytes, v) } + #[inline] fn serialize_char(self, v: char) -> Result { let mut s = String::new(); s.push(v); self.serialize_str(&s) } + #[inline] fn serialize_str(self, v: &str) -> Result { self.update_element_type(ElementType::String)?; write_string(&mut self.bytes, v) } + #[inline] fn serialize_bytes(self, v: &[u8]) -> Result { self.update_element_type(ElementType::Binary)?; write_binary(&mut self.bytes, v, BinarySubtype::Generic)?; Ok(()) } + #[inline] fn serialize_none(self) -> Result { self.update_element_type(ElementType::Null)?; Ok(()) } + #[inline] fn serialize_some(self, value: &T) -> Result where T: serde::Serialize, @@ -210,6 +229,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { self.serialize_str(variant) } + #[inline] fn serialize_newtype_struct(self, _name: &'static str, value: &T) -> Result where T: serde::Serialize, @@ -217,6 +237,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { value.serialize(self) } + #[inline] fn serialize_newtype_variant( self, _name: &'static str, @@ -266,11 +287,13 @@ impl<'a> serde::Serializer for &'a mut Serializer { VariantSerializer::start(&mut *self, variant, VariantInnerType::Tuple) } + #[inline] fn serialize_map(self, _len: Option) -> Result { self.update_element_type(ElementType::EmbeddedDocument)?; DocumentSerializer::start(&mut *self) } + #[inline] fn serialize_struct(self, name: &'static str, _len: usize) -> Result { let value_type = match name { "$oid" => Some(ValueType::ObjectId), @@ -300,6 +323,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { } } + #[inline] fn serialize_struct_variant( self, _name: &'static str, @@ -324,6 +348,7 @@ impl<'a> SerializeStruct for StructSerializer<'a> { type Ok = (); type Error = Error; + #[inline] fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> where T: Serialize, @@ -334,6 +359,7 @@ impl<'a> SerializeStruct for StructSerializer<'a> { } } + #[inline] fn end(self) -> Result { match self { StructSerializer::Document(d) => SerializeStruct::end(d), @@ -391,6 +417,7 @@ impl<'a> VariantSerializer<'a> { }) } + #[inline] fn serialize_element(&mut self, k: &str, v: &T) -> Result<()> where T: Serialize + ?Sized, @@ -403,6 +430,7 @@ impl<'a> VariantSerializer<'a> { Ok(()) } + #[inline] fn end_both(self) -> Result<()> { // null byte for the inner self.root_serializer.bytes.push(0); @@ -423,6 +451,7 @@ impl<'a> serde::ser::SerializeTupleVariant for VariantSerializer<'a> { type Error = Error; + #[inline] fn serialize_field(&mut self, value: &T) -> Result<()> where T: Serialize, @@ -430,6 +459,7 @@ impl<'a> serde::ser::SerializeTupleVariant for VariantSerializer<'a> { self.serialize_element(format!("{}", self.num_elements_serialized).as_str(), value) } + #[inline] fn end(self) -> Result { self.end_both() } @@ -440,6 +470,7 @@ impl<'a> serde::ser::SerializeStructVariant for VariantSerializer<'a> { type Error = Error; + #[inline] fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> where T: Serialize, @@ -447,6 +478,7 @@ impl<'a> serde::ser::SerializeStructVariant for VariantSerializer<'a> { self.serialize_element(key, value) } + #[inline] fn end(self) -> Result { self.end_both() } diff --git a/src/ser/raw/value_serializer.rs b/src/ser/raw/value_serializer.rs index d410bb44..ace276b1 100644 --- a/src/ser/raw/value_serializer.rs +++ b/src/ser/raw/value_serializer.rs @@ -147,22 +147,27 @@ impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> { type SerializeStruct = Self; type SerializeStructVariant = Impossible<(), Error>; + #[inline] fn serialize_bool(self, _v: bool) -> Result { Err(self.invalid_step("bool")) } + #[inline] fn serialize_i8(self, _v: i8) -> Result { Err(self.invalid_step("i8")) } + #[inline] fn serialize_i16(self, _v: i16) -> Result { Err(self.invalid_step("i16")) } + #[inline] fn serialize_i32(self, _v: i32) -> Result { Err(self.invalid_step("i32")) } + #[inline] fn serialize_i64(self, v: i64) -> Result { match self.state { SerializationStep::TimestampTime => { @@ -181,30 +186,37 @@ impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> { } } + #[inline] fn serialize_u8(self, _v: u8) -> Result { Err(self.invalid_step("u8")) } + #[inline] fn serialize_u16(self, _v: u16) -> Result { Err(self.invalid_step("u16")) } + #[inline] fn serialize_u32(self, _v: u32) -> Result { Err(self.invalid_step("u32")) } + #[inline] fn serialize_u64(self, _v: u64) -> Result { Err(self.invalid_step("u64")) } + #[inline] fn serialize_f32(self, _v: f32) -> Result { Err(self.invalid_step("f32")) } + #[inline] fn serialize_f64(self, _v: f64) -> Result { Err(self.invalid_step("f64")) } + #[inline] fn serialize_char(self, _v: char) -> Result { Err(self.invalid_step("char")) } @@ -261,6 +273,7 @@ impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> { Ok(()) } + #[inline] fn serialize_bytes(self, v: &[u8]) -> Result { match self.state { SerializationStep::Decimal128Value => { @@ -271,10 +284,12 @@ impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> { } } + #[inline] fn serialize_none(self) -> Result { Err(self.invalid_step("none")) } + #[inline] fn serialize_some(self, _value: &T) -> Result where T: Serialize, @@ -282,14 +297,17 @@ impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> { Err(self.invalid_step("some")) } + #[inline] fn serialize_unit(self) -> Result { Err(self.invalid_step("unit")) } + #[inline] fn serialize_unit_struct(self, _name: &'static str) -> Result { Err(self.invalid_step("unit_struct")) } + #[inline] fn serialize_unit_variant( self, _name: &'static str, @@ -299,6 +317,7 @@ impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> { Err(self.invalid_step("unit_variant")) } + #[inline] fn serialize_newtype_struct( self, _name: &'static str, @@ -310,6 +329,7 @@ impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> { Err(self.invalid_step("newtype_struct")) } + #[inline] fn serialize_newtype_variant( self, _name: &'static str, @@ -323,14 +343,17 @@ impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> { Err(self.invalid_step("newtype_variant")) } + #[inline] fn serialize_seq(self, _len: Option) -> Result { Err(self.invalid_step("newtype_seq")) } + #[inline] fn serialize_tuple(self, _len: usize) -> Result { Err(self.invalid_step("newtype_tuple")) } + #[inline] fn serialize_tuple_struct( self, _name: &'static str, @@ -339,6 +362,7 @@ impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> { Err(self.invalid_step("tuple_struct")) } + #[inline] fn serialize_tuple_variant( self, _name: &'static str, @@ -349,6 +373,7 @@ impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> { Err(self.invalid_step("tuple_variant")) } + #[inline] fn serialize_map(self, _len: Option) -> Result { match self.state { SerializationStep::CodeWithScopeScope { ref code } => { @@ -358,10 +383,12 @@ impl<'a, 'b> serde::Serializer for &'b mut ValueSerializer<'a> { } } + #[inline] fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { Ok(self) } + #[inline] fn serialize_struct_variant( self, _name: &'static str, @@ -493,6 +520,7 @@ impl<'a, 'b> SerializeStruct for &'b mut ValueSerializer<'a> { Ok(()) } + #[inline] fn end(self) -> Result { Ok(()) } @@ -504,6 +532,7 @@ pub(crate) struct CodeWithScopeSerializer<'a> { } impl<'a> CodeWithScopeSerializer<'a> { + #[inline] fn start(code: &str, rs: &'a mut Serializer) -> Result { let start = rs.bytes.len(); write_i32(&mut rs.bytes, 0)?; // placeholder length @@ -518,6 +547,7 @@ impl<'a> SerializeMap for CodeWithScopeSerializer<'a> { type Ok = (); type Error = Error; + #[inline] fn serialize_key(&mut self, key: &T) -> Result<()> where T: Serialize, @@ -525,6 +555,7 @@ impl<'a> SerializeMap for CodeWithScopeSerializer<'a> { self.doc.serialize_key(key) } + #[inline] fn serialize_value(&mut self, value: &T) -> Result<()> where T: Serialize, @@ -532,6 +563,7 @@ impl<'a> SerializeMap for CodeWithScopeSerializer<'a> { self.doc.serialize_value(value) } + #[inline] fn end(self) -> Result { let result = self.doc.end_doc()?; From a55c519178a7fd3ae53ff5579c7dfcfa3300e85d Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Thu, 22 Jul 2021 17:26:31 -0400 Subject: [PATCH 20/21] fix fmt in serde-tests --- serde-tests/test.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/serde-tests/test.rs b/serde-tests/test.rs index 43693fd4..9386b370 100644 --- a/serde-tests/test.rs +++ b/serde-tests/test.rs @@ -53,7 +53,11 @@ where let mut expected_bytes_from_doc_serde = Vec::new(); bson::to_writer(&expected_doc, &mut expected_bytes_from_doc_serde).expect(description); - assert_eq!(expected_bytes_from_doc_serde, expected_bytes, "{}", description); + assert_eq!( + expected_bytes_from_doc_serde, expected_bytes, + "{}", + description + ); let serialized_doc = bson::to_document(&expected_value).expect(description); assert_eq!(&serialized_doc, expected_doc, "{}", description); @@ -906,7 +910,7 @@ fn u2i() { #[derive(Serialize, Debug)] struct TooBig { - u_64: u64 + u_64: u64, } let v = TooBig { u_64: i64::MAX as u64 + 1, From f5125906d0b979584feb5dbfbda961e8ee4ca536 Mon Sep 17 00:00:00 2001 From: Patrick Freed Date: Mon, 26 Jul 2021 19:14:19 -0400 Subject: [PATCH 21/21] remove bson::to_writer --- serde-tests/test.rs | 9 ++++----- src/lib.rs | 2 +- src/ser/mod.rs | 13 ------------- src/tests/spec/corpus.rs | 7 +++---- 4 files changed, 8 insertions(+), 23 deletions(-) diff --git a/serde-tests/test.rs b/serde-tests/test.rs index 9386b370..6e46dddd 100644 --- a/serde-tests/test.rs +++ b/serde-tests/test.rs @@ -14,6 +14,8 @@ use std::{ collections::{BTreeMap, HashSet}, }; +#[cfg(feature = "decimal128")] +use bson::Decimal128; use bson::{ doc, oid::ObjectId, @@ -21,7 +23,6 @@ use bson::{ Binary, Bson, DateTime, - Decimal128, Deserializer, Document, JavaScriptCodeWithScope, @@ -47,12 +48,10 @@ where .to_writer(&mut expected_bytes) .expect(description); - let mut expected_bytes_serde = Vec::new(); - bson::to_writer(&expected_value, &mut expected_bytes_serde).expect(description); + let expected_bytes_serde = bson::to_vec(&expected_value).expect(description); assert_eq!(expected_bytes_serde, expected_bytes, "{}", description); - let mut expected_bytes_from_doc_serde = Vec::new(); - bson::to_writer(&expected_doc, &mut expected_bytes_from_doc_serde).expect(description); + let expected_bytes_from_doc_serde = bson::to_vec(&expected_doc).expect(description); assert_eq!( expected_bytes_from_doc_serde, expected_bytes, "{}", diff --git a/src/lib.rs b/src/lib.rs index 86466c37..a0ae75ba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -198,7 +198,7 @@ pub use self::{ Deserializer, }, decimal128::Decimal128, - ser::{to_bson, to_document, to_vec, to_writer, Serializer}, + ser::{to_bson, to_document, to_vec, Serializer}, }; #[macro_use] diff --git a/src/ser/mod.rs b/src/ser/mod.rs index 34904365..f18dd155 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -217,19 +217,6 @@ where } } -/// Serialize the given `T` as BSON bytes into the provided writer. -#[inline] -pub fn to_writer(value: &T, mut writer: W) -> Result<()> -where - T: Serialize, - W: Write, -{ - let mut serializer = raw::Serializer::new(); - value.serialize(&mut serializer)?; - writer.write_all(serializer.into_vec().as_slice())?; - Ok(()) -} - /// Serialize the given `T` as a BSON byte vector. #[inline] pub fn to_vec(value: &T) -> Result> diff --git a/src/tests/spec/corpus.rs b/src/tests/spec/corpus.rs index 3291da00..4623f5dc 100644 --- a/src/tests/spec/corpus.rs +++ b/src/tests/spec/corpus.rs @@ -95,9 +95,8 @@ fn run_test(test: TestFile) { .to_writer(&mut documenttowriter_todocument_documentfromreader_cb) .expect(&description); - let mut towriter_documentfromreader_cb = Vec::new(); - crate::to_writer(&documentfromreader_cb, &mut towriter_documentfromreader_cb) - .expect(&description); + let tovec_documentfromreader_cb = + crate::to_vec(&documentfromreader_cb).expect(&description); // native_to_bson( bson_to_native(cB) ) = cB @@ -132,7 +131,7 @@ fn run_test(test: TestFile) { ); assert_eq!( - hex::encode(towriter_documentfromreader_cb).to_lowercase(), + hex::encode(tovec_documentfromreader_cb).to_lowercase(), valid.canonical_bson.to_lowercase(), "{}", description,