Skip to content

RUST-284: Incorporate rawbson code from rawbson = "0.2.1" in mod raw #229

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 48 commits into from
Oct 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
6403c4a
RUST-284 Add raw types for more efficient usage of BSON
jcdyer Feb 7, 2021
56299b3
RUST-284 Updates to raw BSON API, implementation, and documentation
saghm Feb 10, 2021
4dda9b7
add raw code with scope type
saghm Feb 11, 2021
7770cf1
add documentation to raw regex type
saghm Feb 11, 2021
77adbdb
revise doc comments on raw timestamp getters
saghm Feb 11, 2021
bd43739
name field in raw malformed value error
saghm Feb 11, 2021
1c8e856
use raw code with scope type in RawBson
saghm Feb 16, 2021
12ddf7f
document RawArrayIter
saghm Feb 16, 2021
43485f7
temp
saghm Feb 16, 2021
0658a81
refactor top-level module into submodules
saghm Feb 16, 2021
07b4e57
code compiling, tests passing
patrickfreed Oct 4, 2021
e02b45b
fix clippy
patrickfreed Oct 4, 2021
23e8d6d
include expected and unexpected types in Error::UnexpectedType
patrickfreed Oct 6, 2021
1769a14
fix typo
patrickfreed Oct 6, 2021
25a68c1
move props definitions to test submodule
patrickfreed Oct 6, 2021
8578553
return errors instead of panicking in certain places
patrickfreed Oct 6, 2021
c9c14c4
share read_bool code
patrickfreed Oct 6, 2021
4fc4b65
wrap Utf8EncodingError
patrickfreed Oct 6, 2021
4eda6de
rename into_inner -> into_vec
patrickfreed Oct 6, 2021
db3f8e7
remove unwrap from `RawDocument::from_document`
patrickfreed Oct 6, 2021
7dca3a3
add Cow From implementations
patrickfreed Oct 6, 2021
b707387
mark RawArray and RawDocumentRef as repr(transparent)
patrickfreed Oct 6, 2021
8714cc1
use a vec internally in RawDocument, fix tests
patrickfreed Oct 6, 2021
ffe0453
rename RawDocumentIter to Iter, add docstring
patrickfreed Oct 6, 2021
7f641d8
store the key in the error, reduce possibility of panics
patrickfreed Oct 7, 2021
308e6d4
wip struct -> enum
patrickfreed Oct 7, 2021
24dc86a
all but dbpointer done
patrickfreed Oct 12, 2021
4e48b01
implement dbpointer, cleanup
patrickfreed Oct 12, 2021
be6b9fe
corpus wip
patrickfreed Oct 12, 2021
bba68ed
finish corpus decode errors
patrickfreed Oct 12, 2021
ddd120a
wip typed helpers
patrickfreed Oct 12, 2021
e59b0a7
finish typed helpers, add key to ValueAccessError, various cleanup
patrickfreed Oct 13, 2021
e7ff8fb
rename RawDocumentRef to RawDoc, update documentation
patrickfreed Oct 13, 2021
69fc427
rename RawArray to RawArr
patrickfreed Oct 13, 2021
f67f9b1
split code into different files
patrickfreed Oct 13, 2021
f71b02e
various cleanup
patrickfreed Oct 13, 2021
1ef11d5
add debug impl for rawdoc wrappers
patrickfreed Oct 13, 2021
164de7c
fix tests
patrickfreed Oct 13, 2021
c064857
fix clippy
patrickfreed Oct 13, 2021
454c05a
fix rustdoc
patrickfreed Oct 13, 2021
a06068e
fix test
patrickfreed Oct 13, 2021
2379298
minor cleanup
patrickfreed Oct 13, 2021
2220a3f
test roundtrip
patrickfreed Oct 14, 2021
bd2daee
use map_err
patrickfreed Oct 14, 2021
617235d
use impl Intoi<String> in error constructor
patrickfreed Oct 14, 2021
82099e5
fix up docstrings
patrickfreed Oct 18, 2021
260c245
bump proptest to 1.0.0
patrickfreed Oct 18, 2021
eecedc8
rename `RawDoc` -> `RawDocument`, `RawDocument` -> `RawDocumentBuf`
patrickfreed Oct 22, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,10 @@ serde_bytes = "0.11.5"

[dev-dependencies]
assert_matches = "1.2"
serde_bytes = "0.11"
criterion = "0.3.0"
pretty_assertions = "0.6.1"
proptest = "1.0.0"
serde_bytes = "0.11"
chrono = { version = "0.4", features = ["serde"] }

[package.metadata.docs.rs]
Expand Down
20 changes: 13 additions & 7 deletions src/bson.rs
Original file line number Diff line number Diff line change
Expand Up @@ -730,13 +730,10 @@ impl Bson {
if let Ok(regex) = doc.get_document("$regularExpression") {
if let Ok(pattern) = regex.get_str("pattern") {
if let Ok(options) = regex.get_str("options") {
let mut options: Vec<_> = options.chars().collect();
options.sort_unstable();

return Bson::RegularExpression(Regex {
pattern: pattern.into(),
options: options.into_iter().collect(),
});
return Bson::RegularExpression(Regex::new(
pattern.into(),
options.into(),
));
}
}
}
Expand Down Expand Up @@ -1014,6 +1011,15 @@ pub struct Regex {
pub options: String,
}

impl Regex {
pub(crate) fn new(pattern: String, options: String) -> Self {
let mut chars: Vec<_> = options.chars().collect();
chars.sort_unstable();
let options: String = chars.into_iter().collect();
Self { pattern, options }
}
}

impl Display for Regex {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "/{}/{}", self.pattern, self.options)
Expand Down
2 changes: 1 addition & 1 deletion src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ pub(crate) fn read_string<R: Read + ?Sized>(reader: &mut R, utf8_lossy: bool) ->
Ok(s)
}

fn read_bool<R: Read>(mut reader: R) -> Result<bool> {
pub(crate) fn read_bool<R: Read>(mut reader: R) -> Result<bool> {
let val = read_u8(&mut reader)?;
if val > 1 {
return Err(Error::invalid_value(
Expand Down
2 changes: 1 addition & 1 deletion src/de/raw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,7 @@ impl<'de> serde::de::MapAccess<'de> for Decimal128Access {
where
V: serde::de::DeserializeSeed<'de>,
{
seed.deserialize(Decimal128Deserializer(self.decimal.clone()))
seed.deserialize(Decimal128Deserializer(self.decimal))
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/decimal128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::fmt;
///
/// Currently, this type can only be used to round-trip through BSON. See
/// [RUST-36](https://jira.mongodb.org/browse/RUST-36) to track the progress towards a complete implementation.
#[derive(Clone, PartialEq)]
#[derive(Copy, Clone, PartialEq)]
pub struct Decimal128 {
/// BSON bytes containing the decimal128. Stored for round tripping.
pub(crate) bytes: [u8; 128 / 8],
Expand Down
9 changes: 1 addition & 8 deletions src/extjson/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,7 @@ pub(crate) struct RegexBody {

impl Regex {
pub(crate) fn parse(self) -> crate::Regex {
let mut chars: Vec<_> = self.body.options.chars().collect();
chars.sort_unstable();
let options: String = chars.into_iter().collect();

crate::Regex {
pattern: self.body.pattern,
options,
}
crate::Regex::new(self.body.pattern, self.body.options)
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ pub use self::{
Deserializer,
},
decimal128::Decimal128,
raw::{RawDocument, RawDocumentBuf, RawArray},
ser::{to_bson, to_document, to_vec, Serializer},
uuid::{Uuid, UuidRepresentation},
};
Expand All @@ -293,6 +294,7 @@ pub mod decimal128;
pub mod document;
pub mod extjson;
pub mod oid;
pub mod raw;
pub mod ser;
pub mod serde_helpers;
pub mod spec;
Expand Down
242 changes: 242 additions & 0 deletions src/raw/array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
use std::convert::TryFrom;

use super::{
error::{ValueAccessError, ValueAccessErrorKind, ValueAccessResult},
Error,
Iter,
RawBinary,
RawBson,
RawDocument,
RawRegex,
Result,
};
use crate::{oid::ObjectId, spec::ElementType, Bson, DateTime, Timestamp};

/// A slice of a BSON document containing a BSON array value (akin to [`std::str`]). This can be
/// retrieved from a [`RawDocument`] via [`RawDocument::get`].
///
/// This is an _unsized_ type, meaning that it must always be used behind a pointer like `&`.
///
/// Accessing elements within a [`RawArray`] is similar to element access in [`crate::Document`],
/// but because the contents are parsed during iteration instead of at creation time, format errors
/// can happen at any time during use.
///
/// Iterating over a [`RawArray`] yields either an error or a value that borrows from the
/// original document without making any additional allocations.
///
/// ```
/// use bson::{doc, raw::RawDocument};
///
/// let doc = doc! {
/// "x": [1, true, "two", 5.5]
/// };
/// let bytes = bson::to_vec(&doc)?;
///
/// let rawdoc = RawDocument::new(bytes.as_slice())?;
/// let rawarray = rawdoc.get_array("x")?;
///
/// for v in rawarray {
/// println!("{:?}", v?);
/// }
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
///
/// Individual elements can be accessed using [`RawArray::get`] or any of
/// the type-specific getters, such as [`RawArray::get_object_id`] or
/// [`RawArray::get_str`]. Note that accessing elements is an O(N) operation, as it
/// requires iterating through the array from the beginning to find the requested index.
///
/// ```
/// # use bson::raw::{ValueAccessError};
/// use bson::{doc, raw::RawDocument};
///
/// let doc = doc! {
/// "x": [1, true, "two", 5.5]
/// };
/// let bytes = bson::to_vec(&doc)?;
///
/// let rawdoc = RawDocument::new(bytes.as_slice())?;
/// let rawarray = rawdoc.get_array("x")?;
///
/// assert_eq!(rawarray.get_bool(1)?, true);
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
#[derive(PartialEq)]
#[repr(transparent)]
pub struct RawArray {
pub(crate) doc: RawDocument,
}

impl RawArray {
pub(crate) fn from_doc(doc: &RawDocument) -> &RawArray {
// SAFETY:
//
// Dereferencing a raw pointer requires unsafe due to the potential that the pointer is
// null, dangling, or misaligned. We know the pointer is not null or dangling due to the
// fact that it's created by a safe reference. Converting &RawDocument to *const
// RawDocument will be properly aligned due to them being references to the same type,
// and converting *const RawDocument to *const RawArray is aligned due to the fact that
// the only field in a RawArray is a RawDocument, meaning the structs are represented
// identically at the byte level.
unsafe { &*(doc as *const RawDocument as *const RawArray) }
}

/// Gets a reference to the value at the given index.
pub fn get(&self, index: usize) -> Result<Option<RawBson<'_>>> {
self.into_iter().nth(index).transpose()
}

fn get_with<'a, T>(
&'a self,
index: usize,
expected_type: ElementType,
f: impl FnOnce(RawBson<'a>) -> Option<T>,
) -> ValueAccessResult<T> {
let bson = self
.get(index)
.map_err(|e| ValueAccessError {
key: index.to_string(),
kind: ValueAccessErrorKind::InvalidBson(e),
})?
.ok_or(ValueAccessError {
key: index.to_string(),
kind: ValueAccessErrorKind::NotPresent,
})?;
match f(bson) {
Some(t) => Ok(t),
None => Err(ValueAccessError {
key: index.to_string(),
kind: ValueAccessErrorKind::UnexpectedType {
expected: expected_type,
actual: bson.element_type(),
},
}),
}
}

/// Gets the BSON double at the given index or returns an error if the value at that index isn't
/// a double.
pub fn get_f64(&self, index: usize) -> ValueAccessResult<f64> {
self.get_with(index, ElementType::Double, RawBson::as_f64)
}

/// Gets a reference to the string at the given index or returns an error if the
/// value at that index isn't a string.
pub fn get_str(&self, index: usize) -> ValueAccessResult<&str> {
self.get_with(index, ElementType::String, RawBson::as_str)
}

/// Gets a reference to the document at the given index or returns an error if the
/// value at that index isn't a document.
pub fn get_document(&self, index: usize) -> ValueAccessResult<&RawDocument> {
self.get_with(index, ElementType::EmbeddedDocument, RawBson::as_document)
}

/// Gets a reference to the array at the given index or returns an error if the
/// value at that index isn't a array.
pub fn get_array(&self, index: usize) -> ValueAccessResult<&RawArray> {
self.get_with(index, ElementType::Array, RawBson::as_array)
}

/// Gets a reference to the BSON binary value at the given index or returns an error if the
/// value at that index isn't a binary.
pub fn get_binary(&self, index: usize) -> ValueAccessResult<RawBinary<'_>> {
self.get_with(index, ElementType::Binary, RawBson::as_binary)
}

/// Gets the ObjectId at the given index or returns an error if the value at that index isn't an
/// ObjectId.
pub fn get_object_id(&self, index: usize) -> ValueAccessResult<ObjectId> {
self.get_with(index, ElementType::ObjectId, RawBson::as_object_id)
}

/// Gets the boolean at the given index or returns an error if the value at that index isn't a
/// boolean.
pub fn get_bool(&self, index: usize) -> ValueAccessResult<bool> {
self.get_with(index, ElementType::Boolean, RawBson::as_bool)
}

/// Gets the DateTime at the given index or returns an error if the value at that index isn't a
/// DateTime.
pub fn get_datetime(&self, index: usize) -> ValueAccessResult<DateTime> {
self.get_with(index, ElementType::DateTime, RawBson::as_datetime)
}

/// Gets a reference to the BSON regex at the given index or returns an error if the
/// value at that index isn't a regex.
pub fn get_regex(&self, index: usize) -> ValueAccessResult<RawRegex<'_>> {
self.get_with(index, ElementType::RegularExpression, RawBson::as_regex)
}

/// Gets a reference to the BSON timestamp at the given index or returns an error if the
/// value at that index isn't a timestamp.
pub fn get_timestamp(&self, index: usize) -> ValueAccessResult<Timestamp> {
self.get_with(index, ElementType::Timestamp, RawBson::as_timestamp)
}

/// Gets the BSON int32 at the given index or returns an error if the value at that index isn't
/// a 32-bit integer.
pub fn get_i32(&self, index: usize) -> ValueAccessResult<i32> {
self.get_with(index, ElementType::Int32, RawBson::as_i32)
}

/// Gets BSON int64 at the given index or returns an error if the value at that index isn't a
/// 64-bit integer.
pub fn get_i64(&self, index: usize) -> ValueAccessResult<i64> {
self.get_with(index, ElementType::Int64, RawBson::as_i64)
}

/// Gets a reference to the raw bytes of the [`RawArray`].
pub fn as_bytes(&self) -> &[u8] {
self.doc.as_bytes()
}
}

impl std::fmt::Debug for RawArray {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("RawArray")
.field("data", &hex::encode(self.doc.as_bytes()))
.finish()
}
}

impl TryFrom<&RawArray> for Vec<Bson> {
type Error = Error;

fn try_from(arr: &RawArray) -> Result<Vec<Bson>> {
arr.into_iter()
.map(|result| {
let rawbson = result?;
Bson::try_from(rawbson)
})
.collect()
}
}

impl<'a> IntoIterator for &'a RawArray {
type IntoIter = RawArrayIter<'a>;
type Item = Result<RawBson<'a>>;

fn into_iter(self) -> RawArrayIter<'a> {
RawArrayIter {
inner: self.doc.into_iter(),
}
}
}

/// An iterator over borrowed raw BSON array values.
pub struct RawArrayIter<'a> {
inner: Iter<'a>,
}

impl<'a> Iterator for RawArrayIter<'a> {
type Item = Result<RawBson<'a>>;

fn next(&mut self) -> Option<Result<RawBson<'a>>> {
match self.inner.next() {
Some(Ok((_, v))) => Some(Ok(v)),
Some(Err(e)) => Some(Err(e)),
None => None,
}
}
}
Loading