From 75d544c474d17c3531c5633e79f3de6adf909d6d Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Mon, 18 Mar 2024 12:12:33 -0400 Subject: [PATCH 1/4] add append_ref --- src/raw/document_buf.rs | 184 +++++++++++++++++++++++++++++----------- 1 file changed, 134 insertions(+), 50 deletions(-) diff --git a/src/raw/document_buf.rs b/src/raw/document_buf.rs index 23981064..90d54df0 100644 --- a/src/raw/document_buf.rs +++ b/src/raw/document_buf.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; use crate::{ de::MIN_BSON_DOCUMENT_SIZE, - spec::BinarySubtype, + spec::{BinarySubtype, ElementType}, Document, RawBinaryRef, RawJavaScriptCodeWithScopeRef, @@ -215,42 +215,19 @@ impl RawDocumentBuf { /// # Ok::<(), Error>(()) /// ``` pub fn append(&mut self, key: impl AsRef, value: impl Into) { - fn append_string(doc: &mut RawDocumentBuf, value: &str) { - doc.data - .extend(((value.as_bytes().len() + 1) as i32).to_le_bytes()); - doc.data.extend(value.as_bytes()); - doc.data.push(0); - } - - fn append_cstring(doc: &mut RawDocumentBuf, value: &str) { - if value.contains('\0') { - panic!("cstr includes interior null byte: {}", value) - } - doc.data.extend(value.as_bytes()); - doc.data.push(0); - } - - let original_len = self.data.len(); - - // write the key for the next value to the end - // the element type will replace the previous null byte terminator of the document - append_cstring(self, key.as_ref()); - let value = value.into(); - let element_type = value.element_type(); - - match value { + self.append_to_data(key, value.element_type(), |data| match value { RawBson::Int32(i) => { - self.data.extend(i.to_le_bytes()); + data.extend(i.to_le_bytes()); } RawBson::String(s) => { - append_string(self, s.as_str()); + append_string(data, s.as_str()); } RawBson::Document(d) => { - self.data.extend(d.into_bytes()); + data.extend(d.into_bytes()); } RawBson::Array(a) => { - self.data.extend(a.into_vec()); + data.extend(a.into_vec()); } RawBson::Binary(b) => { let len = RawBinaryRef { @@ -258,38 +235,38 @@ impl RawDocumentBuf { subtype: b.subtype, } .len(); - self.data.extend(len.to_le_bytes()); - self.data.push(b.subtype.into()); + data.extend(len.to_le_bytes()); + data.push(b.subtype.into()); if let BinarySubtype::BinaryOld = b.subtype { - self.data.extend((len - 4).to_le_bytes()) + data.extend((len - 4).to_le_bytes()) } - self.data.extend(b.bytes); + data.extend(b.bytes); } RawBson::Boolean(b) => { - self.data.push(b as u8); + data.push(b as u8); } RawBson::DateTime(dt) => { - self.data.extend(dt.timestamp_millis().to_le_bytes()); + data.extend(dt.timestamp_millis().to_le_bytes()); } RawBson::DbPointer(dbp) => { - append_string(self, dbp.namespace.as_str()); - self.data.extend(dbp.id.bytes()); + append_string(data, dbp.namespace.as_str()); + data.extend(dbp.id.bytes()); } RawBson::Decimal128(d) => { - self.data.extend(d.bytes()); + data.extend(d.bytes()); } RawBson::Double(d) => { - self.data.extend(d.to_le_bytes()); + data.extend(d.to_le_bytes()); } RawBson::Int64(i) => { - self.data.extend(i.to_le_bytes()); + data.extend(i.to_le_bytes()); } RawBson::RegularExpression(re) => { - append_cstring(self, re.pattern.as_str()); - append_cstring(self, re.options.as_str()); + append_cstring(data, re.pattern.as_str()); + append_cstring(data, re.options.as_str()); } RawBson::JavaScriptCode(js) => { - append_string(self, js.as_str()); + append_string(data, js.as_str()); } RawBson::JavaScriptCodeWithScope(code_w_scope) => { let len = RawJavaScriptCodeWithScopeRef { @@ -297,21 +274,113 @@ impl RawDocumentBuf { scope: &code_w_scope.scope, } .len(); - self.data.extend(len.to_le_bytes()); - append_string(self, code_w_scope.code.as_str()); - self.data.extend(code_w_scope.scope.into_bytes()); + data.extend(len.to_le_bytes()); + append_string(data, code_w_scope.code.as_str()); + data.extend(code_w_scope.scope.into_bytes()); } RawBson::Timestamp(ts) => { - self.data.extend(ts.to_le_i64().to_le_bytes()); + data.extend(ts.to_le_i64().to_le_bytes()); } RawBson::ObjectId(oid) => { - self.data.extend(oid.bytes()); + data.extend(oid.bytes()); } RawBson::Symbol(s) => { - append_string(self, s.as_str()); + append_string(data, s.as_str()); } RawBson::Null | RawBson::Undefined | RawBson::MinKey | RawBson::MaxKey => {} - } + }) + } + + /// Append a key value pair to the end of the document without checking to see if + /// the key already exists. + /// + /// It is a user error to append the same key more than once to the same document, and it may + /// result in errors when communicating with MongoDB. + /// + /// If the provided key contains an interior null byte, this method will panic. + pub fn append_ref<'a>(&mut self, key: impl AsRef, value: impl Into>) { + let value = value.into(); + self.append_to_data(key, value.element_type(), |data| match value { + RawBsonRef::Int32(i) => { + data.extend(i.to_le_bytes()); + } + RawBsonRef::String(s) => { + append_string(data, s); + } + RawBsonRef::Document(d) => { + data.extend(d.as_bytes()); + } + RawBsonRef::Array(a) => { + data.extend(a.as_bytes()); + } + RawBsonRef::Binary(b) => { + let len = b.len(); + data.extend(len.to_le_bytes()); + data.push(b.subtype.into()); + if let BinarySubtype::BinaryOld = b.subtype { + data.extend((len - 4).to_le_bytes()) + } + data.extend(b.bytes); + } + RawBsonRef::Boolean(b) => { + data.push(b as u8); + } + RawBsonRef::DateTime(dt) => { + data.extend(dt.timestamp_millis().to_le_bytes()); + } + RawBsonRef::DbPointer(dbp) => { + append_string(data, dbp.namespace); + data.extend(dbp.id.bytes()); + } + RawBsonRef::Decimal128(d) => { + data.extend(d.bytes()); + } + RawBsonRef::Double(d) => { + data.extend(d.to_le_bytes()); + } + RawBsonRef::Int64(i) => { + data.extend(i.to_le_bytes()); + } + RawBsonRef::RegularExpression(re) => { + append_cstring(data, re.pattern); + append_cstring(data, re.options); + } + RawBsonRef::JavaScriptCode(js) => { + append_string(data, js); + } + RawBsonRef::JavaScriptCodeWithScope(code_w_scope) => { + let len = RawJavaScriptCodeWithScopeRef { + code: code_w_scope.code, + scope: &code_w_scope.scope, + } + .len(); + data.extend(len.to_le_bytes()); + append_string(data, code_w_scope.code); + data.extend(code_w_scope.scope.as_bytes()); + } + RawBsonRef::Timestamp(ts) => { + data.extend(ts.to_le_i64().to_le_bytes()); + } + RawBsonRef::ObjectId(oid) => { + data.extend(oid.bytes()); + } + RawBsonRef::Symbol(s) => { + append_string(data, s); + } + RawBsonRef::Null | RawBsonRef::Undefined | RawBsonRef::MinKey | RawBsonRef::MaxKey => {} + }) + } + + fn append_to_data(&mut self, key: impl AsRef, element_type: ElementType, apply: impl FnOnce(&mut Vec)) { + let original_len = self.data.len(); + + // write the key for the next value to the end + // the element type will replace the previous null byte terminator of the document + append_cstring(&mut self.data, key.as_ref()); + + // execute the append + apply(&mut self.data); + // update element type self.data[original_len - 1] = element_type as u8; // append trailing null byte @@ -328,6 +397,21 @@ impl RawDocumentBuf { } } +fn append_string(data: &mut Vec, value: &str) { + data + .extend(((value.as_bytes().len() + 1) as i32).to_le_bytes()); + data.extend(value.as_bytes()); + data.push(0); +} + +fn append_cstring(data: &mut Vec, value: &str) { + if value.contains('\0') { + panic!("cstr includes interior null byte: {}", value) + } + data.extend(value.as_bytes()); + data.push(0); +} + impl Default for RawDocumentBuf { fn default() -> Self { Self::new() From 5d776f5508d0b5b68cff195c3595c417783b7d33 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Mon, 18 Mar 2024 12:14:55 -0400 Subject: [PATCH 2/4] rustfmt --- src/raw/document_buf.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/raw/document_buf.rs b/src/raw/document_buf.rs index 90d54df0..29448f1f 100644 --- a/src/raw/document_buf.rs +++ b/src/raw/document_buf.rs @@ -371,7 +371,12 @@ impl RawDocumentBuf { }) } - fn append_to_data(&mut self, key: impl AsRef, element_type: ElementType, apply: impl FnOnce(&mut Vec)) { + fn append_to_data( + &mut self, + key: impl AsRef, + element_type: ElementType, + apply: impl FnOnce(&mut Vec), + ) { let original_len = self.data.len(); // write the key for the next value to the end @@ -398,8 +403,7 @@ impl RawDocumentBuf { } fn append_string(data: &mut Vec, value: &str) { - data - .extend(((value.as_bytes().len() + 1) as i32).to_le_bytes()); + data.extend(((value.as_bytes().len() + 1) as i32).to_le_bytes()); data.extend(value.as_bytes()); data.push(0); } From fdff85c2a3a2e447c7e144a3ca7cecf963b5e29a Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Mon, 18 Mar 2024 12:36:38 -0400 Subject: [PATCH 3/4] simpler --- src/raw/document_buf.rs | 193 +++++++++++----------------------------- 1 file changed, 52 insertions(+), 141 deletions(-) diff --git a/src/raw/document_buf.rs b/src/raw/document_buf.rs index 29448f1f..63e4f4bf 100644 --- a/src/raw/document_buf.rs +++ b/src/raw/document_buf.rs @@ -7,13 +7,7 @@ use std::{ use serde::{Deserialize, Serialize}; -use crate::{ - de::MIN_BSON_DOCUMENT_SIZE, - spec::{BinarySubtype, ElementType}, - Document, - RawBinaryRef, - RawJavaScriptCodeWithScopeRef, -}; +use crate::{de::MIN_BSON_DOCUMENT_SIZE, spec::BinarySubtype, Document}; use super::{ bson::RawBson, @@ -216,79 +210,7 @@ impl RawDocumentBuf { /// ``` pub fn append(&mut self, key: impl AsRef, value: impl Into) { let value = value.into(); - self.append_to_data(key, value.element_type(), |data| match value { - RawBson::Int32(i) => { - data.extend(i.to_le_bytes()); - } - RawBson::String(s) => { - append_string(data, s.as_str()); - } - RawBson::Document(d) => { - data.extend(d.into_bytes()); - } - RawBson::Array(a) => { - data.extend(a.into_vec()); - } - RawBson::Binary(b) => { - let len = RawBinaryRef { - bytes: b.bytes.as_slice(), - subtype: b.subtype, - } - .len(); - data.extend(len.to_le_bytes()); - data.push(b.subtype.into()); - if let BinarySubtype::BinaryOld = b.subtype { - data.extend((len - 4).to_le_bytes()) - } - data.extend(b.bytes); - } - RawBson::Boolean(b) => { - data.push(b as u8); - } - RawBson::DateTime(dt) => { - data.extend(dt.timestamp_millis().to_le_bytes()); - } - RawBson::DbPointer(dbp) => { - append_string(data, dbp.namespace.as_str()); - data.extend(dbp.id.bytes()); - } - RawBson::Decimal128(d) => { - data.extend(d.bytes()); - } - RawBson::Double(d) => { - data.extend(d.to_le_bytes()); - } - RawBson::Int64(i) => { - data.extend(i.to_le_bytes()); - } - RawBson::RegularExpression(re) => { - append_cstring(data, re.pattern.as_str()); - append_cstring(data, re.options.as_str()); - } - RawBson::JavaScriptCode(js) => { - append_string(data, js.as_str()); - } - RawBson::JavaScriptCodeWithScope(code_w_scope) => { - let len = RawJavaScriptCodeWithScopeRef { - code: code_w_scope.code.as_str(), - scope: &code_w_scope.scope, - } - .len(); - data.extend(len.to_le_bytes()); - append_string(data, code_w_scope.code.as_str()); - data.extend(code_w_scope.scope.into_bytes()); - } - RawBson::Timestamp(ts) => { - data.extend(ts.to_le_i64().to_le_bytes()); - } - RawBson::ObjectId(oid) => { - data.extend(oid.bytes()); - } - RawBson::Symbol(s) => { - append_string(data, s.as_str()); - } - RawBson::Null | RawBson::Undefined | RawBson::MinKey | RawBson::MaxKey => {} - }) + self.append_ref(key, value.as_raw_bson_ref()) } /// Append a key value pair to the end of the document without checking to see if @@ -299,92 +221,95 @@ impl RawDocumentBuf { /// /// If the provided key contains an interior null byte, this method will panic. pub fn append_ref<'a>(&mut self, key: impl AsRef, value: impl Into>) { + fn append_string(doc: &mut RawDocumentBuf, value: &str) { + doc.data + .extend(((value.as_bytes().len() + 1) as i32).to_le_bytes()); + doc.data.extend(value.as_bytes()); + doc.data.push(0); + } + + fn append_cstring(doc: &mut RawDocumentBuf, value: &str) { + if value.contains('\0') { + panic!("cstr includes interior null byte: {}", value) + } + doc.data.extend(value.as_bytes()); + doc.data.push(0); + } + + let original_len = self.data.len(); + + // write the key for the next value to the end + // the element type will replace the previous null byte terminator of the document + append_cstring(self, key.as_ref()); + let value = value.into(); - self.append_to_data(key, value.element_type(), |data| match value { + let element_type = value.element_type(); + + match value { RawBsonRef::Int32(i) => { - data.extend(i.to_le_bytes()); + self.data.extend(i.to_le_bytes()); } RawBsonRef::String(s) => { - append_string(data, s); + append_string(self, s); } RawBsonRef::Document(d) => { - data.extend(d.as_bytes()); + self.data.extend(d.as_bytes()); } RawBsonRef::Array(a) => { - data.extend(a.as_bytes()); + self.data.extend(a.as_bytes()); } RawBsonRef::Binary(b) => { let len = b.len(); - data.extend(len.to_le_bytes()); - data.push(b.subtype.into()); + self.data.extend(len.to_le_bytes()); + self.data.push(b.subtype.into()); if let BinarySubtype::BinaryOld = b.subtype { - data.extend((len - 4).to_le_bytes()) + self.data.extend((len - 4).to_le_bytes()) } - data.extend(b.bytes); + self.data.extend(b.bytes); } RawBsonRef::Boolean(b) => { - data.push(b as u8); + self.data.push(b as u8); } RawBsonRef::DateTime(dt) => { - data.extend(dt.timestamp_millis().to_le_bytes()); + self.data.extend(dt.timestamp_millis().to_le_bytes()); } RawBsonRef::DbPointer(dbp) => { - append_string(data, dbp.namespace); - data.extend(dbp.id.bytes()); + append_string(self, dbp.namespace); + self.data.extend(dbp.id.bytes()); } RawBsonRef::Decimal128(d) => { - data.extend(d.bytes()); + self.data.extend(d.bytes()); } RawBsonRef::Double(d) => { - data.extend(d.to_le_bytes()); + self.data.extend(d.to_le_bytes()); } RawBsonRef::Int64(i) => { - data.extend(i.to_le_bytes()); + self.data.extend(i.to_le_bytes()); } RawBsonRef::RegularExpression(re) => { - append_cstring(data, re.pattern); - append_cstring(data, re.options); + append_cstring(self, re.pattern); + append_cstring(self, re.options); } RawBsonRef::JavaScriptCode(js) => { - append_string(data, js); + append_string(self, js); } RawBsonRef::JavaScriptCodeWithScope(code_w_scope) => { - let len = RawJavaScriptCodeWithScopeRef { - code: code_w_scope.code, - scope: &code_w_scope.scope, - } - .len(); - data.extend(len.to_le_bytes()); - append_string(data, code_w_scope.code); - data.extend(code_w_scope.scope.as_bytes()); + let len = code_w_scope.len(); + self.data.extend(len.to_le_bytes()); + append_string(self, code_w_scope.code); + self.data.extend(code_w_scope.scope.as_bytes()); } RawBsonRef::Timestamp(ts) => { - data.extend(ts.to_le_i64().to_le_bytes()); + self.data.extend(ts.to_le_i64().to_le_bytes()); } RawBsonRef::ObjectId(oid) => { - data.extend(oid.bytes()); + self.data.extend(oid.bytes()); } RawBsonRef::Symbol(s) => { - append_string(data, s); + append_string(self, s); } RawBsonRef::Null | RawBsonRef::Undefined | RawBsonRef::MinKey | RawBsonRef::MaxKey => {} - }) - } - - fn append_to_data( - &mut self, - key: impl AsRef, - element_type: ElementType, - apply: impl FnOnce(&mut Vec), - ) { - let original_len = self.data.len(); - - // write the key for the next value to the end - // the element type will replace the previous null byte terminator of the document - append_cstring(&mut self.data, key.as_ref()); - - // execute the append - apply(&mut self.data); + } // update element type self.data[original_len - 1] = element_type as u8; @@ -402,20 +327,6 @@ impl RawDocumentBuf { } } -fn append_string(data: &mut Vec, value: &str) { - data.extend(((value.as_bytes().len() + 1) as i32).to_le_bytes()); - data.extend(value.as_bytes()); - data.push(0); -} - -fn append_cstring(data: &mut Vec, value: &str) { - if value.contains('\0') { - panic!("cstr includes interior null byte: {}", value) - } - data.extend(value.as_bytes()); - data.push(0); -} - impl Default for RawDocumentBuf { fn default() -> Self { Self::new() From 6e24208711976006504c9e8455c5606314baea85 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Mon, 18 Mar 2024 12:39:44 -0400 Subject: [PATCH 4/4] fix check --- src/raw/array_buf.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/raw/array_buf.rs b/src/raw/array_buf.rs index fa696783..96797996 100644 --- a/src/raw/array_buf.rs +++ b/src/raw/array_buf.rs @@ -96,10 +96,6 @@ impl RawArrayBuf { self.inner.append(self.len.to_string(), value); self.len += 1; } - - pub(crate) fn into_vec(self) -> Vec { - self.inner.into_bytes() - } } impl Debug for RawArrayBuf {