diff --git a/Cargo.toml b/Cargo.toml index b653b668..414ec8d9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,8 +55,10 @@ serde_bytes = "0.11.5" [dev-dependencies] assert_matches = "1.2" -serde_bytes = "0.11" +criterion = "0.3.0" pretty_assertions = "0.6.1" +proptest = "1.0.0" +serde_bytes = "0.11" chrono = { version = "0.4", features = ["serde"] } [package.metadata.docs.rs] diff --git a/src/bson.rs b/src/bson.rs index 6582bb7b..e52ef744 100644 --- a/src/bson.rs +++ b/src/bson.rs @@ -730,13 +730,10 @@ impl Bson { if let Ok(regex) = doc.get_document("$regularExpression") { if let Ok(pattern) = regex.get_str("pattern") { if let Ok(options) = regex.get_str("options") { - let mut options: Vec<_> = options.chars().collect(); - options.sort_unstable(); - - return Bson::RegularExpression(Regex { - pattern: pattern.into(), - options: options.into_iter().collect(), - }); + return Bson::RegularExpression(Regex::new( + pattern.into(), + options.into(), + )); } } } @@ -1014,6 +1011,15 @@ pub struct Regex { pub options: String, } +impl Regex { + pub(crate) fn new(pattern: String, options: String) -> Self { + let mut chars: Vec<_> = options.chars().collect(); + chars.sort_unstable(); + let options: String = chars.into_iter().collect(); + Self { pattern, options } + } +} + impl Display for Regex { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { write!(fmt, "/{}/{}", self.pattern, self.options) diff --git a/src/de/mod.rs b/src/de/mod.rs index 8bb977a2..8a20a342 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -108,7 +108,7 @@ pub(crate) fn read_string(reader: &mut R, utf8_lossy: bool) -> Ok(s) } -fn read_bool(mut reader: R) -> Result { +pub(crate) fn read_bool(mut reader: R) -> Result { let val = read_u8(&mut reader)?; if val > 1 { return Err(Error::invalid_value( diff --git a/src/de/raw.rs b/src/de/raw.rs index 9fafda5c..c48ba3c7 100644 --- a/src/de/raw.rs +++ b/src/de/raw.rs @@ -625,7 +625,7 @@ impl<'de> serde::de::MapAccess<'de> for Decimal128Access { where V: serde::de::DeserializeSeed<'de>, { - seed.deserialize(Decimal128Deserializer(self.decimal.clone())) + seed.deserialize(Decimal128Deserializer(self.decimal)) } } diff --git a/src/decimal128.rs b/src/decimal128.rs index e4f343c0..c217bb5d 100644 --- a/src/decimal128.rs +++ b/src/decimal128.rs @@ -6,7 +6,7 @@ use std::fmt; /// /// Currently, this type can only be used to round-trip through BSON. See /// [RUST-36](https://jira.mongodb.org/browse/RUST-36) to track the progress towards a complete implementation. -#[derive(Clone, PartialEq)] +#[derive(Copy, Clone, PartialEq)] pub struct Decimal128 { /// BSON bytes containing the decimal128. Stored for round tripping. pub(crate) bytes: [u8; 128 / 8], diff --git a/src/extjson/models.rs b/src/extjson/models.rs index 41851f21..0b98f0f3 100644 --- a/src/extjson/models.rs +++ b/src/extjson/models.rs @@ -116,14 +116,7 @@ pub(crate) struct RegexBody { impl Regex { pub(crate) fn parse(self) -> crate::Regex { - let mut chars: Vec<_> = self.body.options.chars().collect(); - chars.sort_unstable(); - let options: String = chars.into_iter().collect(); - - crate::Regex { - pattern: self.body.pattern, - options, - } + crate::Regex::new(self.body.pattern, self.body.options) } } diff --git a/src/lib.rs b/src/lib.rs index 6795bef0..d9379dca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -280,6 +280,7 @@ pub use self::{ Deserializer, }, decimal128::Decimal128, + raw::{RawDocument, RawDocumentBuf, RawArray}, ser::{to_bson, to_document, to_vec, Serializer}, uuid::{Uuid, UuidRepresentation}, }; @@ -293,6 +294,7 @@ pub mod decimal128; pub mod document; pub mod extjson; pub mod oid; +pub mod raw; pub mod ser; pub mod serde_helpers; pub mod spec; diff --git a/src/raw/array.rs b/src/raw/array.rs new file mode 100644 index 00000000..684a4a4c --- /dev/null +++ b/src/raw/array.rs @@ -0,0 +1,242 @@ +use std::convert::TryFrom; + +use super::{ + error::{ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, + Error, + Iter, + RawBinary, + RawBson, + RawDocument, + RawRegex, + Result, +}; +use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp}; + +/// A slice of a BSON document containing a BSON array value (akin to [`std::str`]). This can be +/// retrieved from a [`RawDocument`] via [`RawDocument::get`]. +/// +/// This is an _unsized_ type, meaning that it must always be used behind a pointer like `&`. +/// +/// Accessing elements within a [`RawArray`] is similar to element access in [`crate::Document`], +/// but because the contents are parsed during iteration instead of at creation time, format errors +/// can happen at any time during use. +/// +/// Iterating over a [`RawArray`] yields either an error or a value that borrows from the +/// original document without making any additional allocations. +/// +/// ``` +/// use bson::{doc, raw::RawDocument}; +/// +/// let doc = doc! { +/// "x": [1, true, "two", 5.5] +/// }; +/// let bytes = bson::to_vec(&doc)?; +/// +/// let rawdoc = RawDocument::new(bytes.as_slice())?; +/// let rawarray = rawdoc.get_array("x")?; +/// +/// for v in rawarray { +/// println!("{:?}", v?); +/// } +/// # Ok::<(), Box>(()) +/// ``` +/// +/// Individual elements can be accessed using [`RawArray::get`] or any of +/// the type-specific getters, such as [`RawArray::get_object_id`] or +/// [`RawArray::get_str`]. Note that accessing elements is an O(N) operation, as it +/// requires iterating through the array from the beginning to find the requested index. +/// +/// ``` +/// # use bson::raw::{ValueAccessError}; +/// use bson::{doc, raw::RawDocument}; +/// +/// let doc = doc! { +/// "x": [1, true, "two", 5.5] +/// }; +/// let bytes = bson::to_vec(&doc)?; +/// +/// let rawdoc = RawDocument::new(bytes.as_slice())?; +/// let rawarray = rawdoc.get_array("x")?; +/// +/// assert_eq!(rawarray.get_bool(1)?, true); +/// # Ok::<(), Box>(()) +/// ``` +#[derive(PartialEq)] +#[repr(transparent)] +pub struct RawArray { + pub(crate) doc: RawDocument, +} + +impl RawArray { + pub(crate) fn from_doc(doc: &RawDocument) -> &RawArray { + // SAFETY: + // + // Dereferencing a raw pointer requires unsafe due to the potential that the pointer is + // null, dangling, or misaligned. We know the pointer is not null or dangling due to the + // fact that it's created by a safe reference. Converting &RawDocument to *const + // RawDocument will be properly aligned due to them being references to the same type, + // and converting *const RawDocument to *const RawArray is aligned due to the fact that + // the only field in a RawArray is a RawDocument, meaning the structs are represented + // identically at the byte level. + unsafe { &*(doc as *const RawDocument as *const RawArray) } + } + + /// Gets a reference to the value at the given index. + pub fn get(&self, index: usize) -> Result>> { + self.into_iter().nth(index).transpose() + } + + fn get_with<'a, T>( + &'a self, + index: usize, + expected_type: ElementType, + f: impl FnOnce(RawBson<'a>) -> Option, + ) -> ValueAccessResult { + let bson = self + .get(index) + .map_err(|e| ValueAccessError { + key: index.to_string(), + kind: ValueAccessErrorKind::InvalidBson(e), + })? + .ok_or(ValueAccessError { + key: index.to_string(), + kind: ValueAccessErrorKind::NotPresent, + })?; + match f(bson) { + Some(t) => Ok(t), + None => Err(ValueAccessError { + key: index.to_string(), + kind: ValueAccessErrorKind::UnexpectedType { + expected: expected_type, + actual: bson.element_type(), + }, + }), + } + } + + /// Gets the BSON double at the given index or returns an error if the value at that index isn't + /// a double. + pub fn get_f64(&self, index: usize) -> ValueAccessResult { + self.get_with(index, ElementType::Double, RawBson::as_f64) + } + + /// Gets a reference to the string at the given index or returns an error if the + /// value at that index isn't a string. + pub fn get_str(&self, index: usize) -> ValueAccessResult<&str> { + self.get_with(index, ElementType::String, RawBson::as_str) + } + + /// Gets a reference to the document at the given index or returns an error if the + /// value at that index isn't a document. + pub fn get_document(&self, index: usize) -> ValueAccessResult<&RawDocument> { + self.get_with(index, ElementType::EmbeddedDocument, RawBson::as_document) + } + + /// Gets a reference to the array at the given index or returns an error if the + /// value at that index isn't a array. + pub fn get_array(&self, index: usize) -> ValueAccessResult<&RawArray> { + self.get_with(index, ElementType::Array, RawBson::as_array) + } + + /// Gets a reference to the BSON binary value at the given index or returns an error if the + /// value at that index isn't a binary. + pub fn get_binary(&self, index: usize) -> ValueAccessResult> { + self.get_with(index, ElementType::Binary, RawBson::as_binary) + } + + /// Gets the ObjectId at the given index or returns an error if the value at that index isn't an + /// ObjectId. + pub fn get_object_id(&self, index: usize) -> ValueAccessResult { + self.get_with(index, ElementType::ObjectId, RawBson::as_object_id) + } + + /// Gets the boolean at the given index or returns an error if the value at that index isn't a + /// boolean. + pub fn get_bool(&self, index: usize) -> ValueAccessResult { + self.get_with(index, ElementType::Boolean, RawBson::as_bool) + } + + /// Gets the DateTime at the given index or returns an error if the value at that index isn't a + /// DateTime. + pub fn get_datetime(&self, index: usize) -> ValueAccessResult { + self.get_with(index, ElementType::DateTime, RawBson::as_datetime) + } + + /// Gets a reference to the BSON regex at the given index or returns an error if the + /// value at that index isn't a regex. + pub fn get_regex(&self, index: usize) -> ValueAccessResult> { + self.get_with(index, ElementType::RegularExpression, RawBson::as_regex) + } + + /// Gets a reference to the BSON timestamp at the given index or returns an error if the + /// value at that index isn't a timestamp. + pub fn get_timestamp(&self, index: usize) -> ValueAccessResult { + self.get_with(index, ElementType::Timestamp, RawBson::as_timestamp) + } + + /// Gets the BSON int32 at the given index or returns an error if the value at that index isn't + /// a 32-bit integer. + pub fn get_i32(&self, index: usize) -> ValueAccessResult { + self.get_with(index, ElementType::Int32, RawBson::as_i32) + } + + /// Gets BSON int64 at the given index or returns an error if the value at that index isn't a + /// 64-bit integer. + pub fn get_i64(&self, index: usize) -> ValueAccessResult { + self.get_with(index, ElementType::Int64, RawBson::as_i64) + } + + /// Gets a reference to the raw bytes of the [`RawArray`]. + pub fn as_bytes(&self) -> &[u8] { + self.doc.as_bytes() + } +} + +impl std::fmt::Debug for RawArray { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RawArray") + .field("data", &hex::encode(self.doc.as_bytes())) + .finish() + } +} + +impl TryFrom<&RawArray> for Vec { + type Error = Error; + + fn try_from(arr: &RawArray) -> Result> { + arr.into_iter() + .map(|result| { + let rawbson = result?; + Bson::try_from(rawbson) + }) + .collect() + } +} + +impl<'a> IntoIterator for &'a RawArray { + type IntoIter = RawArrayIter<'a>; + type Item = Result>; + + fn into_iter(self) -> RawArrayIter<'a> { + RawArrayIter { + inner: self.doc.into_iter(), + } + } +} + +/// An iterator over borrowed raw BSON array values. +pub struct RawArrayIter<'a> { + inner: Iter<'a>, +} + +impl<'a> Iterator for RawArrayIter<'a> { + type Item = Result>; + + fn next(&mut self) -> Option>> { + match self.inner.next() { + Some(Ok((_, v))) => Some(Ok(v)), + Some(Err(e)) => Some(Err(e)), + None => None, + } + } +} diff --git a/src/raw/bson.rs b/src/raw/bson.rs new file mode 100644 index 00000000..05ae4e19 --- /dev/null +++ b/src/raw/bson.rs @@ -0,0 +1,364 @@ +use std::convert::{TryFrom, TryInto}; + +use super::{Error, RawArray, RawDocument, Result}; +use crate::{ + oid::{self, ObjectId}, + spec::{BinarySubtype, ElementType}, + Bson, + DbPointer, + Decimal128, + Timestamp, +}; + +/// A BSON value referencing raw bytes stored elsewhere. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum RawBson<'a> { + /// 64-bit binary floating point + Double(f64), + /// UTF-8 string + String(&'a str), + /// Array + Array(&'a RawArray), + /// Embedded document + Document(&'a RawDocument), + /// Boolean value + Boolean(bool), + /// Null value + Null, + /// Regular expression + RegularExpression(RawRegex<'a>), + /// JavaScript code + JavaScriptCode(&'a str), + /// JavaScript code w/ scope + JavaScriptCodeWithScope(RawJavaScriptCodeWithScope<'a>), + /// 32-bit signed integer + Int32(i32), + /// 64-bit signed integer + Int64(i64), + /// Timestamp + Timestamp(Timestamp), + /// Binary data + Binary(RawBinary<'a>), + /// [ObjectId](http://dochub.mongodb.org/core/objectids) + ObjectId(oid::ObjectId), + /// UTC datetime + DateTime(crate::DateTime), + /// Symbol (Deprecated) + Symbol(&'a str), + /// [128-bit decimal floating point](https://github.com/mongodb/specifications/blob/master/source/bson-decimal128/decimal128.rst) + Decimal128(Decimal128), + /// Undefined value (Deprecated) + Undefined, + /// Max key + MaxKey, + /// Min key + MinKey, + /// DBPointer (Deprecated) + DbPointer(RawDbPointer<'a>), +} + +impl<'a> RawBson<'a> { + /// Get the [`ElementType`] of this value. + pub fn element_type(&self) -> ElementType { + match *self { + RawBson::Double(..) => ElementType::Double, + RawBson::String(..) => ElementType::String, + RawBson::Array(..) => ElementType::Array, + RawBson::Document(..) => ElementType::EmbeddedDocument, + RawBson::Boolean(..) => ElementType::Boolean, + RawBson::Null => ElementType::Null, + RawBson::RegularExpression(..) => ElementType::RegularExpression, + RawBson::JavaScriptCode(..) => ElementType::JavaScriptCode, + RawBson::JavaScriptCodeWithScope(..) => ElementType::JavaScriptCodeWithScope, + RawBson::Int32(..) => ElementType::Int32, + RawBson::Int64(..) => ElementType::Int64, + RawBson::Timestamp(..) => ElementType::Timestamp, + RawBson::Binary(..) => ElementType::Binary, + RawBson::ObjectId(..) => ElementType::ObjectId, + RawBson::DateTime(..) => ElementType::DateTime, + RawBson::Symbol(..) => ElementType::Symbol, + RawBson::Decimal128(..) => ElementType::Decimal128, + RawBson::Undefined => ElementType::Undefined, + RawBson::MaxKey => ElementType::MaxKey, + RawBson::MinKey => ElementType::MinKey, + RawBson::DbPointer(..) => ElementType::DbPointer, + } + } + + /// Gets the `f64` that's referenced or returns `None` if the referenced value isn't a BSON + /// double. + pub fn as_f64(self) -> Option { + match self { + RawBson::Double(d) => Some(d), + _ => None, + } + } + + /// Gets the `&str` that's referenced or returns `None` if the referenced value isn't a BSON + /// String. + pub fn as_str(self) -> Option<&'a str> { + match self { + RawBson::String(s) => Some(s), + _ => None, + } + } + + /// Gets the [`RawArray`] that's referenced or returns `None` if the referenced value + /// isn't a BSON array. + pub fn as_array(self) -> Option<&'a RawArray> { + match self { + RawBson::Array(v) => Some(v), + _ => None, + } + } + + /// Gets the [`RawDocument`] that's referenced or returns `None` if the referenced value + /// isn't a BSON document. + pub fn as_document(self) -> Option<&'a RawDocument> { + match self { + RawBson::Document(v) => Some(v), + _ => None, + } + } + + /// Gets the `bool` that's referenced or returns `None` if the referenced value isn't a BSON + /// boolean. + pub fn as_bool(self) -> Option { + match self { + RawBson::Boolean(v) => Some(v), + _ => None, + } + } + + /// Gets the `i32` that's referenced or returns `None` if the referenced value isn't a BSON + /// Int32. + pub fn as_i32(self) -> Option { + match self { + RawBson::Int32(v) => Some(v), + _ => None, + } + } + + /// Gets the `i64` that's referenced or returns `None` if the referenced value isn't a BSON + /// Int64. + pub fn as_i64(self) -> Option { + match self { + RawBson::Int64(v) => Some(v), + _ => None, + } + } + + /// Gets the [`crate::oid::ObjectId`] that's referenced or returns `None` if the referenced + /// value isn't a BSON ObjectID. + pub fn as_object_id(self) -> Option { + match self { + RawBson::ObjectId(v) => Some(v), + _ => None, + } + } + + /// Gets the [`RawBinary`] that's referenced or returns `None` if the referenced value isn't a + /// BSON binary. + pub fn as_binary(self) -> Option> { + match self { + RawBson::Binary(v) => Some(v), + _ => None, + } + } + + /// Gets the [`RawRegex`] that's referenced or returns `None` if the referenced value isn't a + /// BSON regular expression. + pub fn as_regex(self) -> Option> { + match self { + RawBson::RegularExpression(v) => Some(v), + _ => None, + } + } + + /// Gets the [`crate::DateTime`] that's referenced or returns `None` if the referenced value + /// isn't a BSON datetime. + pub fn as_datetime(self) -> Option { + match self { + RawBson::DateTime(v) => Some(v), + _ => None, + } + } + + /// Gets the symbol that's referenced or returns `None` if the referenced value isn't a BSON + /// symbol. + pub fn as_symbol(self) -> Option<&'a str> { + match self { + RawBson::Symbol(v) => Some(v), + _ => None, + } + } + + /// Gets the [`crate::Timestamp`] that's referenced or returns `None` if the referenced value + /// isn't a BSON timestamp. + pub fn as_timestamp(self) -> Option { + match self { + RawBson::Timestamp(timestamp) => Some(timestamp), + _ => None, + } + } + + /// Gets the null value that's referenced or returns `None` if the referenced value isn't a BSON + /// null. + pub fn as_null(self) -> Option<()> { + match self { + RawBson::Null => Some(()), + _ => None, + } + } + + /// Gets the [`RawDbPointer`] that's referenced or returns `None` if the referenced value isn't + /// a BSON DB pointer. + pub fn as_db_pointer(self) -> Option> { + match self { + RawBson::DbPointer(d) => Some(d), + _ => None, + } + } + + /// Gets the code that's referenced or returns `None` if the referenced value isn't a BSON + /// JavaScript. + pub fn as_javascript(self) -> Option<&'a str> { + match self { + RawBson::JavaScriptCode(s) => Some(s), + _ => None, + } + } + + /// Gets the [`RawJavaScriptCodeWithScope`] that's referenced or returns `None` if the + /// referenced value isn't a BSON JavaScript with scope. + pub fn as_javascript_with_scope(self) -> Option> { + match self { + RawBson::JavaScriptCodeWithScope(s) => Some(s), + _ => None, + } + } +} + +impl<'a> TryFrom> for Bson { + type Error = Error; + + fn try_from(rawbson: RawBson<'a>) -> Result { + Ok(match rawbson { + RawBson::Double(d) => Bson::Double(d), + RawBson::String(s) => Bson::String(s.to_string()), + RawBson::Document(rawdoc) => { + let doc = rawdoc.try_into()?; + Bson::Document(doc) + } + RawBson::Array(rawarray) => { + let mut items = Vec::new(); + for v in rawarray { + let bson: Bson = v?.try_into()?; + items.push(bson); + } + Bson::Array(items) + } + RawBson::Binary(rawbson) => { + let RawBinary { + subtype, + bytes: data, + } = rawbson; + Bson::Binary(crate::Binary { + subtype, + bytes: data.to_vec(), + }) + } + RawBson::ObjectId(rawbson) => Bson::ObjectId(rawbson), + RawBson::Boolean(rawbson) => Bson::Boolean(rawbson), + RawBson::DateTime(rawbson) => Bson::DateTime(rawbson), + RawBson::Null => Bson::Null, + RawBson::RegularExpression(rawregex) => Bson::RegularExpression(crate::Regex::new( + rawregex.pattern.to_string(), + rawregex.options.to_string(), + )), + RawBson::JavaScriptCode(rawbson) => Bson::JavaScriptCode(rawbson.to_string()), + RawBson::Int32(rawbson) => Bson::Int32(rawbson), + RawBson::Timestamp(rawbson) => Bson::Timestamp(rawbson), + RawBson::Int64(rawbson) => Bson::Int64(rawbson), + RawBson::Undefined => Bson::Undefined, + RawBson::DbPointer(rawbson) => Bson::DbPointer(DbPointer { + namespace: rawbson.namespace.to_string(), + id: rawbson.id, + }), + RawBson::Symbol(rawbson) => Bson::Symbol(rawbson.to_string()), + RawBson::JavaScriptCodeWithScope(rawbson) => { + Bson::JavaScriptCodeWithScope(crate::JavaScriptCodeWithScope { + code: rawbson.code.to_string(), + scope: rawbson.scope.try_into()?, + }) + } + RawBson::Decimal128(rawbson) => Bson::Decimal128(rawbson), + RawBson::MaxKey => Bson::MaxKey, + RawBson::MinKey => Bson::MinKey, + }) + } +} + +/// A BSON binary value referencing raw bytes stored elsewhere. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct RawBinary<'a> { + pub(super) subtype: BinarySubtype, + pub(super) bytes: &'a [u8], +} + +impl<'a> RawBinary<'a> { + /// Gets the subtype of the binary value. + pub fn subtype(self) -> BinarySubtype { + self.subtype + } + + /// Gets the contained bytes of the binary value. + pub fn as_bytes(self) -> &'a [u8] { + self.bytes + } +} + +/// A BSON regex referencing raw bytes stored elsewhere. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct RawRegex<'a> { + pub(super) pattern: &'a str, + pub(super) options: &'a str, +} + +impl<'a> RawRegex<'a> { + /// Gets the pattern portion of the regex. + pub fn pattern(self) -> &'a str { + self.pattern + } + + /// Gets the options portion of the regex. + pub fn options(self) -> &'a str { + self.options + } +} + +/// A BSON "code with scope" value referencing raw bytes stored elsewhere. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct RawJavaScriptCodeWithScope<'a> { + pub(crate) code: &'a str, + pub(crate) scope: &'a RawDocument, +} + +impl<'a> RawJavaScriptCodeWithScope<'a> { + /// Gets the code in the value. + pub fn code(self) -> &'a str { + self.code + } + + /// Gets the scope in the value. + pub fn scope(self) -> &'a RawDocument { + self.scope + } +} + +/// A BSON DB pointer value referencing raw bytes stored elesewhere. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct RawDbPointer<'a> { + pub(crate) namespace: &'a str, + pub(crate) id: ObjectId, +} diff --git a/src/raw/document.rs b/src/raw/document.rs new file mode 100644 index 00000000..e2141bcc --- /dev/null +++ b/src/raw/document.rs @@ -0,0 +1,531 @@ +use std::{ + borrow::Cow, + convert::{TryFrom, TryInto}, +}; + +use crate::{raw::error::ErrorKind, DateTime, Timestamp}; + +use super::{ + error::{ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, + i32_from_slice, + Error, + Iter, + RawArray, + RawBinary, + RawBson, + RawDocumentBuf, + RawRegex, + Result, +}; +use crate::{oid::ObjectId, spec::ElementType, Document}; + +/// A slice of a BSON document (akin to [`std::str`]). This can be created from a +/// [`RawDocumentBuf`] or any type that contains valid BSON data, including static binary literals, +/// [Vec](std::vec::Vec), or arrays. +/// +/// This is an _unsized_ type, meaning that it must always be used behind a pointer like `&`. For an +/// owned version of this type, see [`RawDocumentBuf`]. +/// +/// Accessing elements within a [`RawDocument`] is similar to element access in [`crate::Document`], +/// but because the contents are parsed during iteration instead of at creation time, format errors +/// can happen at any time during use. +/// +/// Iterating over a [`RawDocument`] yields either an error or a key-value pair that borrows from +/// the original document without making any additional allocations. +/// ``` +/// # use bson::raw::{Error}; +/// use bson::raw::RawDocument; +/// +/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00")?; +/// let mut iter = doc.into_iter(); +/// let (key, value) = iter.next().unwrap()?; +/// assert_eq!(key, "hi"); +/// assert_eq!(value.as_str(), Some("y'all")); +/// assert!(iter.next().is_none()); +/// # Ok::<(), Error>(()) +/// ``` +/// +/// Individual elements can be accessed using [`RawDocument::get`] or any of +/// the type-specific getters, such as [`RawDocument::get_object_id`] or +/// [`RawDocument::get_str`]. Note that accessing elements is an O(N) operation, as it +/// requires iterating through the document from the beginning to find the requested key. +/// +/// ``` +/// use bson::raw::RawDocument; +/// +/// let doc = RawDocument::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00")?; +/// assert_eq!(doc.get_str("hi")?, "y'all"); +/// # Ok::<(), Box>(()) +/// ``` +#[derive(PartialEq)] +#[repr(transparent)] +pub struct RawDocument { + data: [u8], +} + +impl RawDocument { + /// Constructs a new [`RawDocument`], validating _only_ the + /// following invariants: + /// * `data` is at least five bytes long (the minimum for a valid BSON document) + /// * the initial four bytes of `data` accurately represent the length of the bytes as + /// required by the BSON spec. + /// * the last byte of `data` is a 0 + /// + /// Note that the internal structure of the bytes representing the + /// BSON elements is _not_ validated at all by this method. If the + /// bytes do not conform to the BSON spec, then method calls on + /// the [`RawDocument`] will return Errors where appropriate. + /// + /// ``` + /// use bson::raw::RawDocument; + /// + /// let doc = RawDocument::new(b"\x05\0\0\0\0")?; + /// # Ok::<(), bson::raw::Error>(()) + /// ``` + pub fn new + ?Sized>(data: &D) -> Result<&RawDocument> { + let data = data.as_ref(); + + if data.len() < 5 { + return Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "document too short".into(), + }, + }); + } + + let length = i32_from_slice(data)?; + + if data.len() as i32 != length { + return Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "document length incorrect".into(), + }, + }); + } + + if data[data.len() - 1] != 0 { + return Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "document not null-terminated".into(), + }, + }); + } + + Ok(RawDocument::new_unchecked(data)) + } + + /// Creates a new `RawDocument` referencing the provided data slice. + pub(crate) fn new_unchecked + ?Sized>(data: &D) -> &RawDocument { + // SAFETY: + // + // Dereferencing a raw pointer requires unsafe due to the potential that the pointer is + // null, dangling, or misaligned. We know the pointer is not null or dangling due to the + // fact that it's created by a safe reference. Converting &[u8] to *const [u8] will be + // properly aligned due to them being references to the same type, and converting *const + // [u8] to *const RawDocument is aligned due to the fact that the only field in a + // RawDocument is a [u8] and it is #[repr(transparent), meaning the structs are represented + // identically at the byte level. + unsafe { &*(data.as_ref() as *const [u8] as *const RawDocument) } + } + + /// Creates a new [`RawDocument`] with an owned copy of the BSON bytes. + /// + /// ``` + /// use bson::raw::{RawDocument, RawDocumentBuf, Error}; + /// + /// let data = b"\x05\0\0\0\0"; + /// let doc_ref = RawDocument::new(data)?; + /// let doc: RawDocumentBuf = doc_ref.to_raw_document_buf(); + /// # Ok::<(), Error>(()) + pub fn to_raw_document_buf(&self) -> RawDocumentBuf { + // unwrap is ok here because we already verified the bytes in `RawDocumentRef::new` + RawDocumentBuf::new(self.data.to_owned()).unwrap() + } + + /// Gets a reference to the value corresponding to the given key by iterating until the key is + /// found. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, oid::ObjectId, raw::{RawDocumentBuf, RawBson}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "_id": ObjectId::new(), + /// "f64": 2.5, + /// })?; + /// + /// let element = doc.get("f64")?.expect("finding key f64"); + /// assert_eq!(element.as_f64(), Some(2.5)); + /// assert!(doc.get("unknown")?.is_none()); + /// # Ok::<(), Error>(()) + /// ``` + pub fn get(&self, key: impl AsRef) -> Result>> { + for result in self.into_iter() { + let (k, v) = result?; + if key.as_ref() == k { + return Ok(Some(v)); + } + } + Ok(None) + } + + fn get_with<'a, T>( + &'a self, + key: impl AsRef, + expected_type: ElementType, + f: impl FnOnce(RawBson<'a>) -> Option, + ) -> ValueAccessResult { + let key = key.as_ref(); + + let bson = self + .get(key) + .map_err(|e| ValueAccessError { + key: key.to_string(), + kind: ValueAccessErrorKind::InvalidBson(e), + })? + .ok_or(ValueAccessError { + key: key.to_string(), + kind: ValueAccessErrorKind::NotPresent, + })?; + match f(bson) { + Some(t) => Ok(t), + None => Err(ValueAccessError { + key: key.to_string(), + kind: ValueAccessErrorKind::UnexpectedType { + expected: expected_type, + actual: bson.element_type(), + }, + }), + } + } + + /// Gets a reference to the BSON double value corresponding to a given key or returns an error + /// if the key corresponds to a value which isn't a double. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::raw::{ValueAccessErrorKind, RawDocumentBuf}; + /// use bson::doc; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "bool": true, + /// "f64": 2.5, + /// })?; + /// + /// assert_eq!(doc.get_f64("f64")?, 2.5); + /// assert!(matches!(doc.get_f64("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_f64("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_f64(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::Double, RawBson::as_f64) + } + + /// Gets a reference to the string value corresponding to a given key or returns an error if the + /// key corresponds to a value which isn't a string. + /// + /// ``` + /// use bson::{doc, raw::{RawDocumentBuf, ValueAccessErrorKind}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "string": "hello", + /// "bool": true, + /// })?; + /// + /// assert_eq!(doc.get_str("string")?, "hello"); + /// assert!(matches!(doc.get_str("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_str("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_str(&self, key: impl AsRef) -> ValueAccessResult<&'_ str> { + self.get_with(key, ElementType::String, RawBson::as_str) + } + + /// Gets a reference to the document value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a document. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::{ValueAccessErrorKind, RawDocumentBuf}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "doc": { "key": "value"}, + /// "bool": true, + /// })?; + /// + /// assert_eq!(doc.get_document("doc")?.get_str("key")?, "value"); + /// assert!(matches!(doc.get_document("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_document("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_document(&self, key: impl AsRef) -> ValueAccessResult<&'_ RawDocument> { + self.get_with(key, ElementType::EmbeddedDocument, RawBson::as_document) + } + + /// Gets a reference to the array value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't an array. + /// + /// ``` + /// use bson::{doc, raw::{RawDocumentBuf, ValueAccessErrorKind}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "array": [true, 3], + /// "bool": true, + /// })?; + /// + /// let mut arr_iter = doc.get_array("array")?.into_iter(); + /// let _: bool = arr_iter.next().unwrap()?.as_bool().unwrap(); + /// let _: i32 = arr_iter.next().unwrap()?.as_i32().unwrap(); + /// + /// assert!(arr_iter.next().is_none()); + /// assert!(doc.get_array("bool").is_err()); + /// assert!(matches!(doc.get_array("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_array(&self, key: impl AsRef) -> ValueAccessResult<&'_ RawArray> { + self.get_with(key, ElementType::Array, RawBson::as_array) + } + + /// Gets a reference to the BSON binary value corresponding to a given key or returns an error + /// if the key corresponds to a value which isn't a binary value. + /// + /// ``` + /// use bson::{ + /// doc, + /// raw::{ValueAccessErrorKind, RawDocumentBuf, RawBinary}, + /// spec::BinarySubtype, + /// Binary, + /// }; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, + /// "bool": true, + /// })?; + /// + /// assert_eq!(doc.get_binary("binary")?.as_bytes(), &[1, 2, 3][..]); + /// assert!(matches!(doc.get_binary("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_binary("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_binary(&self, key: impl AsRef) -> ValueAccessResult> { + self.get_with(key, ElementType::Binary, RawBson::as_binary) + } + + /// Gets a reference to the ObjectId value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't an ObjectId. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, oid::ObjectId, raw::{ValueAccessErrorKind, RawDocumentBuf}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "_id": ObjectId::new(), + /// "bool": true, + /// })?; + /// + /// let oid = doc.get_object_id("_id")?; + /// assert!(matches!(doc.get_object_id("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_object_id("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_object_id(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::ObjectId, RawBson::as_object_id) + } + + /// Gets a reference to the boolean value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a boolean. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, oid::ObjectId, raw::{RawDocumentBuf, ValueAccessErrorKind}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "_id": ObjectId::new(), + /// "bool": true, + /// })?; + /// + /// assert!(doc.get_bool("bool")?); + /// assert!(matches!(doc.get_bool("_id").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_bool("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_bool(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::Boolean, RawBson::as_bool) + } + + /// Gets a reference to the BSON DateTime value corresponding to a given key or returns an + /// error if the key corresponds to a value which isn't a DateTime. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::{ValueAccessErrorKind, RawDocumentBuf}, DateTime}; + /// + /// let dt = DateTime::now(); + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "created_at": dt, + /// "bool": true, + /// })?; + /// + /// assert_eq!(doc.get_datetime("created_at")?, dt); + /// assert!(matches!(doc.get_datetime("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_datetime("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_datetime(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::DateTime, RawBson::as_datetime) + } + + /// Gets a reference to the BSON regex value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a regex. + /// + /// ``` + /// use bson::{doc, Regex, raw::{RawDocumentBuf, ValueAccessErrorKind}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "regex": Regex { + /// pattern: r"end\s*$".into(), + /// options: "i".into(), + /// }, + /// "bool": true, + /// })?; + /// + /// assert_eq!(doc.get_regex("regex")?.pattern(), r"end\s*$"); + /// assert_eq!(doc.get_regex("regex")?.options(), "i"); + /// assert!(matches!(doc.get_regex("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_regex("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_regex(&self, key: impl AsRef) -> ValueAccessResult> { + self.get_with(key, ElementType::RegularExpression, RawBson::as_regex) + } + + /// Gets a reference to the BSON timestamp value corresponding to a given key or returns an + /// error if the key corresponds to a value which isn't a timestamp. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, Timestamp, raw::{RawDocumentBuf, ValueAccessErrorKind}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "bool": true, + /// "ts": Timestamp { time: 649876543, increment: 9 }, + /// })?; + /// + /// let timestamp = doc.get_timestamp("ts")?; + /// + /// assert_eq!(timestamp.time, 649876543); + /// assert_eq!(timestamp.increment, 9); + /// assert!(matches!(doc.get_timestamp("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_timestamp("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_timestamp(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::Timestamp, RawBson::as_timestamp) + } + + /// Gets a reference to the BSON int32 value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a 32-bit integer. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::{RawDocumentBuf, ValueAccessErrorKind}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "bool": true, + /// "i32": 1_000_000, + /// })?; + /// + /// assert_eq!(doc.get_i32("i32")?, 1_000_000); + /// assert!(matches!(doc.get_i32("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { ..})); + /// assert!(matches!(doc.get_i32("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_i32(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::Int32, RawBson::as_i32) + } + + /// Gets a reference to the BSON int64 value corresponding to a given key or returns an error if + /// the key corresponds to a value which isn't a 64-bit integer. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::{ValueAccessErrorKind, RawDocumentBuf}}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { + /// "bool": true, + /// "i64": 9223372036854775807_i64, + /// })?; + /// + /// assert_eq!(doc.get_i64("i64")?, 9223372036854775807); + /// assert!(matches!(doc.get_i64("bool").unwrap_err().kind, ValueAccessErrorKind::UnexpectedType { .. })); + /// assert!(matches!(doc.get_i64("unknown").unwrap_err().kind, ValueAccessErrorKind::NotPresent)); + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_i64(&self, key: impl AsRef) -> ValueAccessResult { + self.get_with(key, ElementType::Int64, RawBson::as_i64) + } + + /// Return a reference to the contained data as a `&[u8]` + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::RawDocumentBuf}; + /// let docbuf = RawDocumentBuf::from_document(&doc!{})?; + /// assert_eq!(docbuf.as_bytes(), b"\x05\x00\x00\x00\x00"); + /// # Ok::<(), Error>(()) + /// ``` + pub fn as_bytes(&self) -> &[u8] { + &self.data + } +} + +impl std::fmt::Debug for RawDocument { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RawDocument") + .field("data", &hex::encode(&self.data)) + .finish() + } +} + +impl AsRef for RawDocument { + fn as_ref(&self) -> &RawDocument { + self + } +} + +impl ToOwned for RawDocument { + type Owned = RawDocumentBuf; + + fn to_owned(&self) -> Self::Owned { + self.to_raw_document_buf() + } +} + +impl<'a> From<&'a RawDocument> for Cow<'a, RawDocument> { + fn from(rdr: &'a RawDocument) -> Self { + Cow::Borrowed(rdr) + } +} + +impl TryFrom<&RawDocument> for crate::Document { + type Error = Error; + + fn try_from(rawdoc: &RawDocument) -> Result { + rawdoc + .into_iter() + .map(|res| res.and_then(|(k, v)| Ok((k.to_owned(), v.try_into()?)))) + .collect() + } +} + +impl<'a> IntoIterator for &'a RawDocument { + type IntoIter = Iter<'a>; + type Item = Result<(&'a str, RawBson<'a>)>; + + fn into_iter(self) -> Iter<'a> { + Iter::new(self) + } +} diff --git a/src/raw/document_buf.rs b/src/raw/document_buf.rs new file mode 100644 index 00000000..019a6e25 --- /dev/null +++ b/src/raw/document_buf.rs @@ -0,0 +1,197 @@ +use std::{ + borrow::{Borrow, Cow}, + convert::TryFrom, + ops::Deref, +}; + +use crate::Document; + +use super::{Error, ErrorKind, Iter, RawBson, RawDocument, Result}; + +/// An owned BSON document (akin to [`std::path::PathBuf`]), backed by a buffer of raw BSON bytes. +/// This can be created from a `Vec` or a [`crate::Document`]. +/// +/// Accessing elements within a [`RawDocumentBuf`] is similar to element access in +/// [`crate::Document`], but because the contents are parsed during iteration instead of at creation +/// time, format errors can happen at any time during use. +/// +/// Iterating over a [`RawDocumentBuf`] yields either an error or a key-value pair that borrows from +/// the original document without making any additional allocations. +/// +/// ``` +/// # use bson::raw::Error; +/// use bson::raw::RawDocumentBuf; +/// +/// let doc = RawDocumentBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +/// let mut iter = doc.iter(); +/// let (key, value) = iter.next().unwrap()?; +/// assert_eq!(key, "hi"); +/// assert_eq!(value.as_str(), Some("y'all")); +/// assert!(iter.next().is_none()); +/// # Ok::<(), Error>(()) +/// ``` +/// +/// This type implements `Deref` to [`RawDocument`], meaning that all methods on [`RawDocument`] are +/// available on [`RawDocumentBuf`] values as well. This includes [`RawDocument::get`] or any of the +/// type-specific getters, such as [`RawDocument::get_object_id`] or [`RawDocument::get_str`]. Note +/// that accessing elements is an O(N) operation, as it requires iterating through the document from +/// the beginning to find the requested key. +/// +/// ``` +/// use bson::raw::RawDocumentBuf; +/// +/// let doc = RawDocumentBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +/// assert_eq!(doc.get_str("hi")?, "y'all"); +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, PartialEq)] +pub struct RawDocumentBuf { + data: Vec, +} + +impl RawDocumentBuf { + /// Constructs a new [`RawDocumentBuf`], validating _only_ the + /// following invariants: + /// * `data` is at least five bytes long (the minimum for a valid BSON document) + /// * the initial four bytes of `data` accurately represent the length of the bytes as + /// required by the BSON spec. + /// * the last byte of `data` is a 0 + /// + /// Note that the internal structure of the bytes representing the + /// BSON elements is _not_ validated at all by this method. If the + /// bytes do not conform to the BSON spec, then method calls on + /// the RawDocument will return Errors where appropriate. + /// + /// ``` + /// # use bson::raw::{RawDocumentBuf, Error}; + /// let doc = RawDocumentBuf::new(b"\x05\0\0\0\0".to_vec())?; + /// # Ok::<(), Error>(()) + /// ``` + pub fn new(data: Vec) -> Result { + let _ = RawDocument::new(data.as_slice())?; + Ok(Self { data }) + } + + /// Create a [`RawDocumentBuf`] from a [`Document`]. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, oid::ObjectId, raw::RawDocumentBuf}; + /// + /// let document = doc! { + /// "_id": ObjectId::new(), + /// "name": "Herman Melville", + /// "title": "Moby-Dick", + /// }; + /// let doc = RawDocumentBuf::from_document(&document)?; + /// # Ok::<(), Error>(()) + /// ``` + pub fn from_document(doc: &Document) -> Result { + let mut data = Vec::new(); + doc.to_writer(&mut data).map_err(|e| Error { + key: None, + kind: ErrorKind::MalformedValue { + message: e.to_string(), + }, + })?; + + Ok(Self { data }) + } + + /// Gets an iterator over the elements in the [`RawDocumentBuf`], which yields + /// `Result<(&str, RawBson<'_>)>`. + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::RawDocumentBuf}; + /// + /// let doc = RawDocumentBuf::from_document(&doc! { "ferris": true })?; + /// + /// for element in doc.iter() { + /// let (key, value) = element?; + /// assert_eq!(key, "ferris"); + /// assert_eq!(value.as_bool(), Some(true)); + /// } + /// # Ok::<(), Error>(()) + /// ``` + /// + /// # Note: + /// + /// There is no owning iterator for [`RawDocumentBuf`]. If you need ownership over + /// elements that might need to allocate, you must explicitly convert + /// them to owned types yourself. + pub fn iter(&self) -> Iter<'_> { + self.into_iter() + } + + /// Return the contained data as a `Vec` + /// + /// ``` + /// # use bson::raw::Error; + /// use bson::{doc, raw::RawDocumentBuf}; + /// + /// let doc = RawDocumentBuf::from_document(&doc!{})?; + /// assert_eq!(doc.into_vec(), b"\x05\x00\x00\x00\x00".to_vec()); + /// # Ok::<(), Error>(()) + /// ``` + pub fn into_vec(self) -> Vec { + self.data + } +} + +impl std::fmt::Debug for RawDocumentBuf { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RawDocumentBuf") + .field("data", &hex::encode(&self.data)) + .finish() + } +} + +impl<'a> From for Cow<'a, RawDocument> { + fn from(rd: RawDocumentBuf) -> Self { + Cow::Owned(rd) + } +} + +impl<'a> From<&'a RawDocumentBuf> for Cow<'a, RawDocument> { + fn from(rd: &'a RawDocumentBuf) -> Self { + Cow::Borrowed(rd.as_ref()) + } +} + +impl TryFrom for Document { + type Error = Error; + + fn try_from(raw: RawDocumentBuf) -> Result { + Document::try_from(raw.as_ref()) + } +} + +impl<'a> IntoIterator for &'a RawDocumentBuf { + type IntoIter = Iter<'a>; + type Item = Result<(&'a str, RawBson<'a>)>; + + fn into_iter(self) -> Iter<'a> { + Iter::new(self) + } +} + +impl AsRef for RawDocumentBuf { + fn as_ref(&self) -> &RawDocument { + RawDocument::new_unchecked(&self.data) + } +} + +impl Deref for RawDocumentBuf { + type Target = RawDocument; + + fn deref(&self) -> &Self::Target { + RawDocument::new_unchecked(&self.data) + } +} + +impl Borrow for RawDocumentBuf { + fn borrow(&self) -> &RawDocument { + &*self + } +} diff --git a/src/raw/error.rs b/src/raw/error.rs new file mode 100644 index 00000000..556b7fa0 --- /dev/null +++ b/src/raw/error.rs @@ -0,0 +1,139 @@ +use std::str::Utf8Error; + +use crate::spec::ElementType; + +/// An error that occurs when attempting to parse raw BSON bytes. +#[derive(Debug, PartialEq, Clone)] +#[non_exhaustive] +pub struct Error { + /// The type of error that was encountered. + pub kind: ErrorKind, + + /// They key associated with the error, if any. + pub(crate) key: Option, +} + +impl Error { + pub(crate) fn new_with_key(key: impl Into, kind: ErrorKind) -> Self { + Self { + kind, + key: Some(key.into()), + } + } + + pub(crate) fn new_without_key(kind: ErrorKind) -> Self { + Self { key: None, kind } + } + + pub(crate) fn with_key(mut self, key: impl AsRef) -> Self { + self.key = Some(key.as_ref().to_string()); + self + } + + /// The key at which the error was encountered, if any. + pub fn key(&self) -> Option<&str> { + self.key.as_deref() + } +} + +/// The different categories of errors that can be returned when reading from raw BSON. +#[derive(Clone, Debug, PartialEq)] +#[non_exhaustive] +pub enum ErrorKind { + /// A BSON value did not fit the proper format. + #[non_exhaustive] + MalformedValue { message: String }, + + /// Improper UTF-8 bytes were found when proper UTF-8 was expected. + Utf8EncodingError(Utf8Error), +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let p = self + .key + .as_ref() + .map(|k| format!("error at key \"{}\": ", k)); + + let prefix = p.as_ref().map_or("", |p| p.as_str()); + + match &self.kind { + ErrorKind::MalformedValue { message } => { + write!(f, "{}malformed value: {:?}", prefix, message) + } + ErrorKind::Utf8EncodingError(e) => write!(f, "{}utf-8 encoding error: {}", prefix, e), + } + } +} + +impl std::error::Error for Error {} + +pub type Result = std::result::Result; + +/// Execute the provided closure, mapping the key of the returned error (if any) to the provided +/// key. +pub(crate) fn try_with_key Result>(key: impl AsRef, f: F) -> Result { + f().map_err(|e| e.with_key(key)) +} + +pub type ValueAccessResult = std::result::Result; + +/// Error to indicate that either a value was empty or it contained an unexpected +/// type, for use with the direct getters (e.g. [`crate::RawDocument::get_str`]). +#[derive(Debug, PartialEq, Clone)] +#[non_exhaustive] +pub struct ValueAccessError { + /// The type of error that was encountered. + pub kind: ValueAccessErrorKind, + + /// The key at which the error was encountered. + pub(crate) key: String, +} + +impl ValueAccessError { + /// The key at which the error was encountered. + pub fn key(&self) -> &str { + self.key.as_str() + } +} + +/// The type of error encountered when using a direct getter (e.g. [`crate::RawDocument::get_str`]). +#[derive(Debug, PartialEq, Clone)] +#[non_exhaustive] +pub enum ValueAccessErrorKind { + /// Cannot find the expected field with the specified key + NotPresent, + + /// Found a Bson value with the specified key, but not with the expected type + #[non_exhaustive] + UnexpectedType { + /// The type that was expected. + expected: ElementType, + + /// The actual type that was encountered. + actual: ElementType, + }, + + /// An error was encountered attempting to decode the document. + InvalidBson(super::Error), +} + +impl std::fmt::Display for ValueAccessError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let prefix = format!("error at key: \"{}\": ", self.key); + + match &self.kind { + ValueAccessErrorKind::UnexpectedType { actual, expected } => write!( + f, + "{} unexpected element type: {:?}, expected: {:?}", + prefix, actual, expected + ), + ValueAccessErrorKind::InvalidBson(error) => { + write!(f, "{}: {}", prefix, error) + } + ValueAccessErrorKind::NotPresent => write!(f, "{}value not present", prefix), + } + } +} + +impl std::error::Error for ValueAccessError {} diff --git a/src/raw/iter.rs b/src/raw/iter.rs new file mode 100644 index 00000000..2e46da52 --- /dev/null +++ b/src/raw/iter.rs @@ -0,0 +1,314 @@ +use std::convert::TryInto; + +use crate::{ + de::{read_bool, MIN_BSON_DOCUMENT_SIZE, MIN_CODE_WITH_SCOPE_SIZE}, + oid::ObjectId, + raw::{Error, ErrorKind, Result}, + spec::{BinarySubtype, ElementType}, + DateTime, + Decimal128, + Timestamp, +}; + +use super::{ + bson::RawDbPointer, + checked_add, + error::try_with_key, + f64_from_slice, + i32_from_slice, + i64_from_slice, + read_lenencoded, + read_nullterminated, + RawArray, + RawBinary, + RawBson, + RawDocument, + RawJavaScriptCodeWithScope, + RawRegex, +}; + +/// An iterator over the document's entries. +pub struct Iter<'a> { + doc: &'a RawDocument, + offset: usize, + + /// Whether the underlying doc is assumed to be valid or if an error has been encountered. + /// After an error, all subsequent iterations will return None. + valid: bool, +} + +impl<'a> Iter<'a> { + pub(crate) fn new(doc: &'a RawDocument) -> Self { + Self { + doc, + offset: 4, + valid: true, + } + } + + fn verify_enough_bytes(&self, start: usize, num_bytes: usize) -> Result<()> { + let end = checked_add(start, num_bytes)?; + if self.doc.as_bytes().get(start..end).is_none() { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: format!( + "length exceeds remaining length of buffer: {} vs {}", + num_bytes, + self.doc.as_bytes().len() - start + ), + })); + } + Ok(()) + } + + fn next_oid(&self, starting_at: usize) -> Result { + self.verify_enough_bytes(starting_at, 12)?; + let oid = ObjectId::from_bytes( + self.doc.as_bytes()[starting_at..(starting_at + 12)] + .try_into() + .unwrap(), // ok because we know slice is 12 bytes long + ); + Ok(oid) + } + + fn next_document(&self, starting_at: usize) -> Result<&'a RawDocument> { + self.verify_enough_bytes(starting_at, MIN_BSON_DOCUMENT_SIZE as usize)?; + let size = i32_from_slice(&self.doc.as_bytes()[starting_at..])? as usize; + + if size < MIN_BSON_DOCUMENT_SIZE as usize { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: format!("document too small: {} bytes", size), + })); + } + + self.verify_enough_bytes(starting_at, size)?; + let end = starting_at + size; + + if self.doc.as_bytes()[end - 1] != 0 { + return Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "not null terminated".into(), + }, + }); + } + RawDocument::new(&self.doc.as_bytes()[starting_at..end]) + } +} + +impl<'a> Iterator for Iter<'a> { + type Item = Result<(&'a str, RawBson<'a>)>; + + fn next(&mut self) -> Option)>> { + if !self.valid { + return None; + } else if self.offset == self.doc.as_bytes().len() - 1 { + if self.doc.as_bytes()[self.offset] == 0 { + // end of document marker + return None; + } else { + self.valid = false; + return Some(Err(Error { + key: None, + kind: ErrorKind::MalformedValue { + message: "document not null terminated".into(), + }, + })); + } + } else if self.offset >= self.doc.as_bytes().len() { + self.valid = false; + return Some(Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "iteration overflowed document".to_string(), + }))); + } + + let key = match read_nullterminated(&self.doc.as_bytes()[self.offset + 1..]) { + Ok(k) => k, + Err(e) => { + self.valid = false; + return Some(Err(e)); + } + }; + + let kvp_result = try_with_key(key, || { + let valueoffset = self.offset + 1 + key.len() + 1; // type specifier + key + \0 + + let element_type = match ElementType::from(self.doc.as_bytes()[self.offset]) { + Some(et) => et, + None => { + return Err(Error::new_with_key( + key, + ErrorKind::MalformedValue { + message: format!("invalid tag: {}", self.doc.as_bytes()[self.offset]), + }, + )) + } + }; + + let (element, element_size) = match element_type { + ElementType::Int32 => { + let i = i32_from_slice(&self.doc.as_bytes()[valueoffset..])?; + (RawBson::Int32(i), 4) + } + ElementType::Int64 => { + let i = i64_from_slice(&self.doc.as_bytes()[valueoffset..])?; + (RawBson::Int64(i), 8) + } + ElementType::Double => { + let f = f64_from_slice(&self.doc.as_bytes()[valueoffset..])?; + (RawBson::Double(f), 8) + } + ElementType::String => { + let s = read_lenencoded(&self.doc.as_bytes()[valueoffset..])?; + (RawBson::String(s), 4 + s.len() + 1) + } + ElementType::EmbeddedDocument => { + let doc = self.next_document(valueoffset)?; + (RawBson::Document(doc), doc.as_bytes().len()) + } + ElementType::Array => { + let doc = self.next_document(valueoffset)?; + ( + RawBson::Array(RawArray::from_doc(doc)), + doc.as_bytes().len(), + ) + } + ElementType::Binary => { + let len = i32_from_slice(&self.doc.as_bytes()[valueoffset..])? as usize; + let data_start = valueoffset + 4 + 1; + self.verify_enough_bytes(valueoffset, len)?; + let subtype = BinarySubtype::from(self.doc.as_bytes()[valueoffset + 4]); + let data = match subtype { + BinarySubtype::BinaryOld => { + if len < 4 { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "old binary subtype has no inner declared length" + .into(), + })); + } + let oldlength = + i32_from_slice(&self.doc.as_bytes()[data_start..])? as usize; + if checked_add(oldlength, 4)? != len { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "old binary subtype has wrong inner declared length" + .into(), + })); + } + &self.doc.as_bytes()[(data_start + 4)..(data_start + len)] + } + _ => &self.doc.as_bytes()[data_start..(data_start + len)], + }; + ( + RawBson::Binary(RawBinary { + subtype, + bytes: data, + }), + 4 + 1 + len, + ) + } + ElementType::ObjectId => { + let oid = self.next_oid(valueoffset)?; + (RawBson::ObjectId(oid), 12) + } + ElementType::Boolean => { + let b = read_bool(&self.doc.as_bytes()[valueoffset..]).map_err(|e| { + Error::new_with_key( + key, + ErrorKind::MalformedValue { + message: e.to_string(), + }, + ) + })?; + (RawBson::Boolean(b), 1) + } + ElementType::DateTime => { + let ms = i64_from_slice(&self.doc.as_bytes()[valueoffset..])?; + (RawBson::DateTime(DateTime::from_millis(ms)), 8) + } + ElementType::RegularExpression => { + let pattern = read_nullterminated(&self.doc.as_bytes()[valueoffset..])?; + let options = read_nullterminated( + &self.doc.as_bytes()[(valueoffset + pattern.len() + 1)..], + )?; + ( + RawBson::RegularExpression(RawRegex { pattern, options }), + pattern.len() + 1 + options.len() + 1, + ) + } + ElementType::Null => (RawBson::Null, 0), + ElementType::Undefined => (RawBson::Undefined, 0), + ElementType::Timestamp => { + let ts = Timestamp::from_reader(&self.doc.as_bytes()[valueoffset..]).map_err( + |e| { + Error::new_without_key(ErrorKind::MalformedValue { + message: e.to_string(), + }) + }, + )?; + (RawBson::Timestamp(ts), 8) + } + ElementType::JavaScriptCode => { + let code = read_lenencoded(&self.doc.as_bytes()[valueoffset..])?; + (RawBson::JavaScriptCode(code), 4 + code.len() + 1) + } + ElementType::JavaScriptCodeWithScope => { + let length = i32_from_slice(&self.doc.as_bytes()[valueoffset..])? as usize; + + if length < MIN_CODE_WITH_SCOPE_SIZE as usize { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "code with scope length too small".to_string(), + })); + } + + self.verify_enough_bytes(valueoffset, length)?; + let slice = &&self.doc.as_bytes()[valueoffset..(valueoffset + length)]; + let code = read_lenencoded(&slice[4..])?; + let scope_start = 4 + 4 + code.len() + 1; + let scope = RawDocument::new(&slice[scope_start..])?; + ( + RawBson::JavaScriptCodeWithScope(RawJavaScriptCodeWithScope { + code, + scope, + }), + length, + ) + } + ElementType::DbPointer => { + let namespace = read_lenencoded(&self.doc.as_bytes()[valueoffset..])?; + let id = self.next_oid(valueoffset + 4 + namespace.len() + 1)?; + ( + RawBson::DbPointer(RawDbPointer { namespace, id }), + 4 + namespace.len() + 1 + 12, + ) + } + ElementType::Symbol => { + let s = read_lenencoded(&self.doc.as_bytes()[valueoffset..])?; + (RawBson::Symbol(s), 4 + s.len() + 1) + } + ElementType::Decimal128 => { + self.verify_enough_bytes(valueoffset, 16)?; + ( + RawBson::Decimal128(Decimal128::from_bytes( + self.doc.as_bytes()[valueoffset..(valueoffset + 16)] + .try_into() + .unwrap(), + )), + 16, + ) + } + ElementType::MinKey => (RawBson::MinKey, 0), + ElementType::MaxKey => (RawBson::MaxKey, 0), + }; + + self.offset = valueoffset + element_size; + self.verify_enough_bytes(valueoffset, element_size)?; + + Ok((key, element)) + }); + + if kvp_result.is_err() { + self.valid = false; + } + + Some(kvp_result) + } +} diff --git a/src/raw/mod.rs b/src/raw/mod.rs new file mode 100644 index 00000000..59f36595 --- /dev/null +++ b/src/raw/mod.rs @@ -0,0 +1,246 @@ +//! An API for interacting with raw BSON bytes. +//! +//! This module provides two document types, [`RawDocumentBuf`] and [`RawDocument`] (akin to +//! [`std::string::String`] and [`str`]), for working with raw BSON documents. These types differ +//! from the regular [`crate::Document`] type in that their storage is BSON bytes rather than a +//! hash-map like Rust type. In certain circumstances, these types can be leveraged for increased +//! performance. +//! +//! This module also provides a [`RawBson`] type for modeling any borrowed BSON element and a +//! [`RawArray`] type for modeling a borrowed slice of a document containing a BSON array element. +//! +//! A [`RawDocumentBuf`] can be created from a `Vec` containing raw BSON data. A +//! [`RawDocument`] can be created from anything that can be borrowed as a `&[u8]`. Both types +//! can access elements via methods similar to those available on the [`crate::Document`] type. +//! Note that [`RawDocument::get`] (which [`RawDocument`] calls through to via its `Deref` +//! implementation) returns a `Result`, since the bytes contained in the document are not fully +//! validated until trying to access the contained data. +//! +//! ```rust +//! use bson::raw::{ +//! RawBson, +//! RawDocumentBuf, +//! }; +//! +//! // See http://bsonspec.org/spec.html for details on the binary encoding of BSON. +//! let doc = RawDocumentBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?; +//! let elem = doc.get("hi")?.unwrap(); +//! +//! assert_eq!( +//! elem.as_str(), +//! Some("y'all"), +//! ); +//! # Ok::<(), bson::raw::Error>(()) +//! ``` +//! +//! ### [`crate::Document`] interop +//! +//! A [`RawDocument`] can be created from a [`crate::Document`]. Internally, this +//! serializes the [`crate::Document`] to a `Vec`, and then includes those bytes in the +//! [`RawDocument`]. +//! +//! ```rust +//! use bson::{ +//! raw::RawDocumentBuf, +//! doc, +//! }; +//! +//! let document = doc! { +//! "goodbye": { +//! "cruel": "world" +//! } +//! }; +//! +//! let raw = RawDocumentBuf::from_document(&document)?; +//! let value = raw +//! .get_document("goodbye")? +//! .get_str("cruel")?; +//! +//! assert_eq!( +//! value, +//! "world", +//! ); +//! # Ok::<(), Box>(()) +//! ``` +//! +//! ### Reference type ([`RawDocument`]) +//! +//! A BSON document can also be accessed with the [`RawDocument`] type, which is an +//! unsized type that represents the BSON payload as a `[u8]`. This allows accessing nested +//! documents without reallocation. [`RawDocument`] must always be accessed via a pointer type, +//! similar to `[T]` and `str`. +//! +//! The below example constructs a bson document in a stack-based array, +//! and extracts a `&str` from it, performing no heap allocation. +//! ```rust +//! use bson::raw::RawDocument; +//! +//! let bytes = b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00"; +//! assert_eq!(RawDocument::new(bytes)?.get_str("hi")?, "y'all"); +//! # Ok::<(), Box>(()) +//! ``` +//! +//! ### Iteration +//! +//! [`RawDocument`] implements [`IntoIterator`](std::iter::IntoIterator), which can also be +//! accessed via [`RawDocumentBuf::iter`]. + +//! ```rust +//! use bson::{ +//! raw::{ +//! RawBson, +//! RawDocumentBuf, +//! }, +//! doc, +//! }; +//! +//! let original_doc = doc! { +//! "crate": "bson", +//! "year": "2021", +//! }; +//! +//! let doc = RawDocumentBuf::from_document(&original_doc)?; +//! let mut doc_iter = doc.iter(); +//! +//! let (key, value): (&str, RawBson) = doc_iter.next().unwrap()?; +//! assert_eq!(key, "crate"); +//! assert_eq!(value.as_str(), Some("bson")); +//! +//! let (key, value): (&str, RawBson) = doc_iter.next().unwrap()?; +//! assert_eq!(key, "year"); +//! assert_eq!(value.as_str(), Some("2021")); +//! # Ok::<(), bson::raw::Error>(()) +//! ``` + +mod array; +mod bson; +mod document; +mod document_buf; +mod error; +mod iter; +#[cfg(test)] +mod test; + +use std::convert::{TryFrom, TryInto}; + +use crate::de::MIN_BSON_STRING_SIZE; + +pub use self::{ + array::{RawArray, RawArrayIter}, + bson::{RawBinary, RawBson, RawDbPointer, RawJavaScriptCodeWithScope, RawRegex}, + document::RawDocument, + document_buf::RawDocumentBuf, + error::{Error, ErrorKind, Result, ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, + iter::Iter, +}; + +/// Given a u8 slice, return an i32 calculated from the first four bytes in +/// little endian order. +fn f64_from_slice(val: &[u8]) -> Result { + let arr = val + .get(0..8) + .and_then(|s| s.try_into().ok()) + .ok_or_else(|| { + Error::new_without_key(ErrorKind::MalformedValue { + message: format!("expected 8 bytes to read double, instead got {}", val.len()), + }) + })?; + Ok(f64::from_le_bytes(arr)) +} + +/// Given a u8 slice, return an i32 calculated from the first four bytes in +/// little endian order. +fn i32_from_slice(val: &[u8]) -> Result { + let arr = val + .get(0..4) + .and_then(|s| s.try_into().ok()) + .ok_or_else(|| { + Error::new_without_key(ErrorKind::MalformedValue { + message: format!("expected 4 bytes to read i32, instead got {}", val.len()), + }) + })?; + Ok(i32::from_le_bytes(arr)) +} + +/// Given an u8 slice, return an i64 calculated from the first 8 bytes in +/// little endian order. +fn i64_from_slice(val: &[u8]) -> Result { + let arr = val + .get(0..8) + .and_then(|s| s.try_into().ok()) + .ok_or_else(|| { + Error::new_without_key(ErrorKind::MalformedValue { + message: format!("expected 8 bytes to read i64, instead got {}", val.len()), + }) + })?; + Ok(i64::from_le_bytes(arr)) +} + +fn read_nullterminated(buf: &[u8]) -> Result<&str> { + let mut splits = buf.splitn(2, |x| *x == 0); + let value = splits.next().ok_or_else(|| { + Error::new_without_key(ErrorKind::MalformedValue { + message: "no value".into(), + }) + })?; + if splits.next().is_some() { + Ok(try_to_str(value)?) + } else { + Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "expected null terminator".into(), + })) + } +} + +fn read_lenencoded(buf: &[u8]) -> Result<&str> { + let length = i32_from_slice(&buf[..4])?; + let end = checked_add(usize_try_from_i32(length)?, 4)?; + + if end < MIN_BSON_STRING_SIZE as usize { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: format!( + "BSON length encoded string needs to be at least {} bytes, instead got {}", + MIN_BSON_STRING_SIZE, end + ), + })); + } + + if buf.len() < end { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: format!( + "expected buffer to contain at least {} bytes, but it only has {}", + end, + buf.len() + ), + })); + } + + if buf[end - 1] != 0 { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "expected string to be null-terminated".to_string(), + })); + } + + // exclude null byte + try_to_str(&buf[4..(end - 1)]) +} + +fn try_to_str(data: &[u8]) -> Result<&str> { + std::str::from_utf8(data).map_err(|e| Error::new_without_key(ErrorKind::Utf8EncodingError(e))) +} + +fn usize_try_from_i32(i: i32) -> Result { + usize::try_from(i).map_err(|e| { + Error::new_without_key(ErrorKind::MalformedValue { + message: e.to_string(), + }) + }) +} + +fn checked_add(lhs: usize, rhs: usize) -> Result { + lhs.checked_add(rhs).ok_or_else(|| { + Error::new_without_key(ErrorKind::MalformedValue { + message: "attempted to add with overflow".to_string(), + }) + }) +} diff --git a/src/raw/test/mod.rs b/src/raw/test/mod.rs new file mode 100644 index 00000000..234b326d --- /dev/null +++ b/src/raw/test/mod.rs @@ -0,0 +1,504 @@ +mod props; + +use super::*; +use crate::{ + doc, + oid::ObjectId, + raw::error::ValueAccessErrorKind, + spec::BinarySubtype, + Binary, + Bson, + DateTime, + JavaScriptCodeWithScope, + Regex, + Timestamp, +}; +use chrono::{TimeZone, Utc}; + +fn to_bytes(doc: &crate::Document) -> Vec { + let mut docbytes = Vec::new(); + doc.to_writer(&mut docbytes).unwrap(); + docbytes +} + +#[test] +fn string_from_document() { + let docbytes = to_bytes(&doc! { + "this": "first", + "that": "second", + "something": "else", + }); + let rawdoc = RawDocument::new(&docbytes).unwrap(); + assert_eq!( + rawdoc.get("that").unwrap().unwrap().as_str().unwrap(), + "second", + ); +} + +#[test] +fn nested_document() { + let docbytes = to_bytes(&doc! { + "outer": { + "inner": "surprise", + "i64": 6_i64, + }, + }); + let rawdoc = RawDocument::new(&docbytes).unwrap(); + let subdoc = rawdoc + .get("outer") + .expect("get doc result") + .expect("get doc option") + .as_document() + .expect("as doc"); + assert_eq!( + subdoc + .get("inner") + .expect("get str result") + .expect("get str option") + .as_str() + .expect("as str"), + "surprise", + ); + + assert_eq!( + subdoc + .get("i64") + .expect("get i64 result") + .expect("get i64 option") + .as_i64() + .expect("as i64 result"), + 6 + ); +} + +#[test] +fn iterate() { + let docbytes = to_bytes(&doc! { + "apples": "oranges", + "peanut butter": "chocolate", + "easy as": {"do": 1, "re": 2, "mi": 3}, + }); + let rawdoc = RawDocument::new(&docbytes).expect("malformed bson document"); + let mut dociter = rawdoc.into_iter(); + let next = dociter.next().expect("no result").expect("invalid bson"); + assert_eq!(next.0, "apples"); + assert_eq!(next.1.as_str().expect("result was not a str"), "oranges"); + let next = dociter.next().expect("no result").expect("invalid bson"); + assert_eq!(next.0, "peanut butter"); + assert_eq!(next.1.as_str().expect("result was not a str"), "chocolate"); + let next = dociter.next().expect("no result").expect("invalid bson"); + assert_eq!(next.0, "easy as"); + let _doc = next.1.as_document().expect("result was a not a document"); + let next = dociter.next(); + assert!(next.is_none()); +} + +#[test] +fn rawdoc_to_doc() { + let docbytes = to_bytes(&doc! { + "f64": 2.5, + "string": "hello", + "document": {}, + "array": ["binary", "serialized", "object", "notation"], + "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, + "object_id": ObjectId::from_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), + "boolean": true, + "datetime": DateTime::now(), + "null": Bson::Null, + "regex": Bson::RegularExpression(Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}), + "javascript": Bson::JavaScriptCode(String::from("console.log(console);")), + "symbol": Bson::Symbol(String::from("artist-formerly-known-as")), + "javascript_with_scope": Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope{ code: String::from("console.log(msg);"), scope: doc!{"ok": true}}), + "int32": 23i32, + "timestamp": Bson::Timestamp(Timestamp { time: 3542578, increment: 0 }), + "int64": 46i64, + "end": "END", + }); + + let rawdoc = RawDocument::new(&docbytes).expect("invalid document"); + let doc: crate::Document = rawdoc.try_into().expect("invalid bson"); + let round_tripped_bytes = crate::to_vec(&doc).expect("serialize should work"); + assert_eq!(round_tripped_bytes, docbytes); + + let mut vec_writer_bytes = vec![]; + doc.to_writer(&mut vec_writer_bytes) + .expect("to writer should work"); + assert_eq!(vec_writer_bytes, docbytes); +} + +#[test] +fn f64() { + #![allow(clippy::float_cmp)] + + let rawdoc = RawDocumentBuf::from_document(&doc! { "f64": 2.5 }).unwrap(); + assert_eq!( + rawdoc + .get("f64") + .expect("error finding key f64") + .expect("no key f64") + .as_f64() + .expect("result was not a f64"), + 2.5, + ); +} + +#[test] +fn string() { + let rawdoc = RawDocumentBuf::from_document(&doc! {"string": "hello"}).unwrap(); + + assert_eq!( + rawdoc + .get("string") + .expect("error finding key string") + .expect("no key string") + .as_str() + .expect("result was not a string"), + "hello", + ); +} + +#[test] +fn document() { + let rawdoc = RawDocumentBuf::from_document(&doc! {"document": {}}).unwrap(); + + let doc = rawdoc + .get("document") + .expect("error finding key document") + .expect("no key document") + .as_document() + .expect("result was not a document"); + assert_eq!(doc.as_bytes(), [5u8, 0, 0, 0, 0].as_ref()); // Empty document +} + +#[test] +fn array() { + let rawdoc = RawDocumentBuf::from_document( + &doc! { "array": ["binary", "serialized", "object", "notation"]}, + ) + .unwrap(); + + let array = rawdoc + .get("array") + .expect("error finding key array") + .expect("no key array") + .as_array() + .expect("result was not an array"); + assert_eq!(array.get_str(0), Ok("binary")); + assert_eq!(array.get_str(3), Ok("notation")); + assert_eq!( + array.get_str(4).unwrap_err().kind, + ValueAccessErrorKind::NotPresent + ); +} + +#[test] +fn binary() { + let rawdoc = RawDocumentBuf::from_document(&doc! { + "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] } + }) + .unwrap(); + let binary: bson::RawBinary<'_> = rawdoc + .get("binary") + .expect("error finding key binary") + .expect("no key binary") + .as_binary() + .expect("result was not a binary object"); + assert_eq!(binary.subtype, BinarySubtype::Generic); + assert_eq!(binary.bytes, &[1, 2, 3]); +} + +#[test] +fn object_id() { + let rawdoc = RawDocumentBuf::from_document(&doc! { + "object_id": ObjectId::from_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), + }) + .unwrap(); + let oid = rawdoc + .get("object_id") + .expect("error finding key object_id") + .expect("no key object_id") + .as_object_id() + .expect("result was not an object id"); + assert_eq!(oid.to_hex(), "0102030405060708090a0b0c"); +} + +#[test] +fn boolean() { + let rawdoc = RawDocumentBuf::from_document(&doc! { + "boolean": true, + }) + .unwrap(); + + let boolean = rawdoc + .get("boolean") + .expect("error finding key boolean") + .expect("no key boolean") + .as_bool() + .expect("result was not boolean"); + + assert!(boolean); +} + +#[test] +fn datetime() { + let rawdoc = RawDocumentBuf::from_document(&doc! { + "boolean": true, + "datetime": DateTime::from_chrono(Utc.ymd(2000,10,31).and_hms(12, 30, 45)), + }) + .unwrap(); + let datetime = rawdoc + .get("datetime") + .expect("error finding key datetime") + .expect("no key datetime") + .as_datetime() + .expect("result was not datetime"); + assert_eq!(datetime.to_rfc3339(), "2000-10-31T12:30:45Z"); +} + +#[test] +fn null() { + let rawdoc = RawDocumentBuf::from_document(&doc! { + "null": null, + }) + .unwrap(); + let () = rawdoc + .get("null") + .expect("error finding key null") + .expect("no key null") + .as_null() + .expect("was not null"); +} + +#[test] +fn regex() { + let rawdoc = RawDocumentBuf::from_document(&doc! { + "regex": Bson::RegularExpression(Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}), + }).unwrap(); + let regex = rawdoc + .get("regex") + .expect("error finding key regex") + .expect("no key regex") + .as_regex() + .expect("was not regex"); + assert_eq!(regex.pattern, r"end\s*$"); + assert_eq!(regex.options, "i"); +} +#[test] +fn javascript() { + let rawdoc = RawDocumentBuf::from_document(&doc! { + "javascript": Bson::JavaScriptCode(String::from("console.log(console);")), + }) + .unwrap(); + let js = rawdoc + .get("javascript") + .expect("error finding key javascript") + .expect("no key javascript") + .as_javascript() + .expect("was not javascript"); + assert_eq!(js, "console.log(console);"); +} + +#[test] +fn symbol() { + let rawdoc = RawDocumentBuf::from_document(&doc! { + "symbol": Bson::Symbol(String::from("artist-formerly-known-as")), + }) + .unwrap(); + + let symbol = rawdoc + .get("symbol") + .expect("error finding key symbol") + .expect("no key symbol") + .as_symbol() + .expect("was not symbol"); + assert_eq!(symbol, "artist-formerly-known-as"); +} + +#[test] +fn javascript_with_scope() { + let rawdoc = RawDocumentBuf::from_document(&doc! { + "javascript_with_scope": Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope { + code: String::from("console.log(msg);"), + scope: doc! { "ok": true } + }), + }) + .unwrap(); + let js_with_scope = rawdoc + .get("javascript_with_scope") + .expect("error finding key javascript_with_scope") + .expect("no key javascript_with_scope") + .as_javascript_with_scope() + .expect("was not javascript with scope"); + assert_eq!(js_with_scope.code(), "console.log(msg);"); + let (scope_key, scope_value_bson) = js_with_scope + .scope() + .into_iter() + .next() + .expect("no next value in scope") + .expect("invalid element"); + assert_eq!(scope_key, "ok"); + let scope_value = scope_value_bson.as_bool().expect("not a boolean"); + assert!(scope_value); +} + +#[test] +fn int32() { + let rawdoc = RawDocumentBuf::from_document(&doc! { + "int32": 23i32, + }) + .unwrap(); + let int32 = rawdoc + .get("int32") + .expect("error finding key int32") + .expect("no key int32") + .as_i32() + .expect("was not int32"); + assert_eq!(int32, 23i32); +} + +#[test] +fn timestamp() { + let rawdoc = RawDocumentBuf::from_document(&doc! { + "timestamp": Bson::Timestamp(Timestamp { time: 3542578, increment: 7 }), + }) + .unwrap(); + let ts = rawdoc + .get("timestamp") + .expect("error finding key timestamp") + .expect("no key timestamp") + .as_timestamp() + .expect("was not a timestamp"); + + assert_eq!(ts.increment, 7); + assert_eq!(ts.time, 3542578); +} + +#[test] +fn int64() { + let rawdoc = RawDocumentBuf::from_document(&doc! { + "int64": 46i64, + }) + .unwrap(); + let int64 = rawdoc + .get("int64") + .expect("error finding key int64") + .expect("no key int64") + .as_i64() + .expect("was not int64"); + assert_eq!(int64, 46i64); +} +#[test] +fn document_iteration() { + let doc = doc! { + "f64": 2.5, + "string": "hello", + "document": {}, + "array": ["binary", "serialized", "object", "notation"], + "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, + "object_id": ObjectId::from_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), + "boolean": true, + "datetime": DateTime::now(), + "null": Bson::Null, + "regex": Bson::RegularExpression(Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}), + "javascript": Bson::JavaScriptCode(String::from("console.log(console);")), + "symbol": Bson::Symbol(String::from("artist-formerly-known-as")), + "javascript_with_scope": Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope{ code: String::from("console.log(msg);"), scope: doc!{"ok": true}}), + "int32": 23i32, + "timestamp": Bson::Timestamp(Timestamp { time: 3542578, increment: 0 }), + "int64": 46i64, + "end": "END", + }; + let rawdoc = RawDocumentBuf::from_document(&doc).unwrap(); + let rawdocref = rawdoc.as_ref(); + + assert_eq!( + rawdocref + .into_iter() + .collect::>>() + .expect("collecting iterated doc") + .len(), + 17 + ); + let end = rawdoc + .get("end") + .expect("error finding key end") + .expect("no key end") + .as_str() + .expect("was not str"); + assert_eq!(end, "END"); +} + +#[test] +fn into_bson_conversion() { + let docbytes = to_bytes(&doc! { + "f64": 2.5, + "string": "hello", + "document": {}, + "array": ["binary", "serialized", "object", "notation"], + "object_id": ObjectId::from_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), + "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, + "boolean": false, + }); + let rawbson = RawBson::Document(RawDocument::new(docbytes.as_slice()).unwrap()); + let b: Bson = rawbson.try_into().expect("invalid bson"); + let doc = b.as_document().expect("not a document"); + assert_eq!(*doc.get("f64").expect("f64 not found"), Bson::Double(2.5)); + assert_eq!( + *doc.get("string").expect("string not found"), + Bson::String(String::from("hello")) + ); + assert_eq!( + *doc.get("document").expect("document not found"), + Bson::Document(doc! {}) + ); + assert_eq!( + *doc.get("array").expect("array not found"), + Bson::Array( + vec!["binary", "serialized", "object", "notation"] + .into_iter() + .map(|s| Bson::String(String::from(s))) + .collect() + ) + ); + assert_eq!( + *doc.get("object_id").expect("object_id not found"), + Bson::ObjectId(ObjectId::from_bytes([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 + ])) + ); + assert_eq!( + *doc.get("binary").expect("binary not found"), + Bson::Binary(Binary { + subtype: BinarySubtype::Generic, + bytes: vec![1, 2, 3] + }) + ); + assert_eq!( + *doc.get("boolean").expect("boolean not found"), + Bson::Boolean(false) + ); +} + +use props::arbitrary_bson; +use proptest::prelude::*; +use std::convert::TryInto; + +proptest! { + #[test] + fn no_crashes(s: Vec) { + let _ = RawDocumentBuf::new(s); + } + + #[test] + fn roundtrip_bson(bson in arbitrary_bson()) { + let doc = doc!{"bson": bson}; + let raw = to_bytes(&doc); + let raw = RawDocumentBuf::new(raw); + prop_assert!(raw.is_ok()); + let raw = raw.unwrap(); + let roundtrip: Result = raw.try_into(); + prop_assert!(roundtrip.is_ok()); + let roundtrip = roundtrip.unwrap(); + prop_assert_eq!(doc, roundtrip); + } +} diff --git a/src/raw/test/props.rs b/src/raw/test/props.rs new file mode 100644 index 00000000..850dcade --- /dev/null +++ b/src/raw/test/props.rs @@ -0,0 +1,64 @@ +use crate::{spec::BinarySubtype, Binary, Bson, Document, JavaScriptCodeWithScope, Regex}; + +use proptest::prelude::*; + +fn arbitrary_binary_subtype() -> impl Strategy { + prop_oneof![ + Just(BinarySubtype::Generic), + Just(BinarySubtype::Function), + Just(BinarySubtype::BinaryOld), + Just(BinarySubtype::UuidOld), + Just(BinarySubtype::Uuid), + Just(BinarySubtype::Md5), + ] +} + +pub(crate) fn arbitrary_bson() -> impl Strategy { + let leaf = prop_oneof![ + Just(Bson::Null), + any::().prop_map(Bson::String), + any::().prop_map(Bson::Boolean), + any::().prop_map(Bson::Double), + any::().prop_map(Bson::Int32), + any::().prop_map(Bson::Int64), + any::<(String, String)>().prop_map(|(pattern, options)| { + let mut chars: Vec<_> = options.chars().collect(); + chars.sort_unstable(); + + let options: String = chars.into_iter().collect(); + Bson::RegularExpression(Regex { pattern, options }) + }), + any::<[u8; 12]>().prop_map(|bytes| Bson::ObjectId(crate::oid::ObjectId::from_bytes(bytes))), + (arbitrary_binary_subtype(), any::>()).prop_map(|(subtype, bytes)| { + let bytes = if let BinarySubtype::BinaryOld = subtype { + // BinarySubtype::BinaryOld expects a four byte prefix, which the bson::Bson type + // leaves up to the caller. + + let mut newbytes = Vec::with_capacity(bytes.len() + 4); + newbytes.extend_from_slice(&(bytes.len() as i32).to_le_bytes()); + newbytes.extend_from_slice(&bytes); + newbytes + } else { + bytes + }; + Bson::Binary(Binary { subtype, bytes }) + }), + any::().prop_map(Bson::JavaScriptCode), + ]; + + leaf.prop_recursive(4, 256, 10, |inner| { + prop_oneof![ + prop::collection::hash_map("[^\0]*", inner.clone(), 0..12) + .prop_map(|map| Bson::Document(map.into_iter().collect())), + prop::collection::vec(inner.clone(), 0..12).prop_map(Bson::Array), + ( + prop::collection::hash_map("[^\0]*", inner, 0..12) + .prop_map(|map| map.into_iter().collect::()), + any::() + ) + .prop_map(|(scope, code)| Bson::JavaScriptCodeWithScope( + JavaScriptCodeWithScope { code, scope } + )), + ] + }) +} diff --git a/src/tests/spec/corpus.rs b/src/tests/spec/corpus.rs index 330a091f..9c6844dd 100644 --- a/src/tests/spec/corpus.rs +++ b/src/tests/spec/corpus.rs @@ -3,7 +3,7 @@ use std::{ str::FromStr, }; -use crate::{tests::LOCK, Bson, Document}; +use crate::{raw::RawDocument, tests::LOCK, Bson, Document}; use pretty_assertions::assert_eq; use serde::Deserialize; @@ -79,6 +79,11 @@ fn run_test(test: TestFile) { let todocument_documentfromreader_cb: Document = crate::to_document(&documentfromreader_cb).expect(&description); + let document_from_raw_document: Document = RawDocument::new(canonical_bson.as_slice()) + .expect(&description) + .try_into() + .expect(&description); + // These cover the ways to serialize those `Documents` back to BSON. let mut documenttowriter_documentfromreader_cb = Vec::new(); documentfromreader_cb @@ -103,6 +108,11 @@ fn run_test(test: TestFile) { let tovec_documentfromreader_cb = crate::to_vec(&documentfromreader_cb).expect(&description); + let mut documenttowriter_document_from_raw_document = Vec::new(); + document_from_raw_document + .to_writer(&mut documenttowriter_document_from_raw_document) + .expect(&description); + // native_to_bson( bson_to_native(cB) ) = cB // now we ensure the hex for all 5 are equivalent to the canonical BSON provided by the @@ -142,6 +152,13 @@ fn run_test(test: TestFile) { description, ); + assert_eq!( + hex::encode(documenttowriter_document_from_raw_document).to_lowercase(), + valid.canonical_bson.to_lowercase(), + "{}", + description, + ); + // NaN == NaN is false, so we skip document comparisons that contain NaN if !description.to_ascii_lowercase().contains("nan") && !description.contains("decq541") { assert_eq!(documentfromreader_cb, fromreader_cb, "{}", description); @@ -157,6 +174,12 @@ fn run_test(test: TestFile) { "{}", description ); + + assert_eq!( + document_from_raw_document, documentfromreader_cb, + "{}", + description + ); } // native_to_bson( bson_to_native(dB) ) = cB @@ -189,6 +212,21 @@ fn run_test(test: TestFile) { description, ); + let document_from_raw_document: Document = RawDocument::new(db.as_slice()) + .expect(&description) + .try_into() + .expect(&description); + let mut documenttowriter_document_from_raw_document = Vec::new(); + document_from_raw_document + .to_writer(&mut documenttowriter_document_from_raw_document) + .expect(&description); + assert_eq!( + hex::encode(documenttowriter_document_from_raw_document).to_lowercase(), + valid.canonical_bson.to_lowercase(), + "{}", + description, + ); + // NaN == NaN is false, so we skip document comparisons that contain NaN if !description.contains("NaN") { assert_eq!( @@ -196,6 +234,12 @@ fn run_test(test: TestFile) { "{}", description ); + + assert_eq!( + document_from_raw_document, documentfromreader_cb, + "{}", + description + ); } } @@ -352,6 +396,16 @@ fn run_test(test: TestFile) { } for decode_error in test.decode_errors.iter() { + let description = format!( + "{} decode error: {}", + test.bson_type, decode_error.description + ); + let bson = hex::decode(&decode_error.bson).expect("should decode from hex"); + + if let Ok(doc) = RawDocument::new(bson.as_slice()) { + Document::try_from(doc).expect_err(description.as_str()); + } + // No meaningful definition of "byte count" for an arbitrary reader. if decode_error.description == "Stated length less than byte count, with garbage after envelope" @@ -359,11 +413,6 @@ fn run_test(test: TestFile) { continue; } - let description = format!( - "{} decode error: {}", - test.bson_type, decode_error.description - ); - let bson = hex::decode(&decode_error.bson).expect("should decode from hex"); Document::from_reader(bson.as_slice()).expect_err(&description); crate::from_reader::<_, Document>(bson.as_slice()).expect_err(description.as_str());