Skip to content

Commit 098e1a1

Browse files
RUST-2003 Binary vector subtype support (#513)
1 parent 04d1549 commit 098e1a1

File tree

10 files changed

+799
-9
lines changed

10 files changed

+799
-9
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ criterion = "0.3.0"
8282
pretty_assertions = "0.6.1"
8383
proptest = "1.0.0"
8484
serde_bytes = "0.11"
85+
serde_path_to_error = "0.1.16"
8586
chrono = { version = "0.4", features = ["serde", "clock", "std"], default-features = false }
8687
[target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dev-dependencies]
8788
getrandom = { version = "0.2", features = ["js"] }

src/binary.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
1+
#! Module containing functionality related to BSON binary values.
2+
3+
mod vector;
4+
15
use crate::{spec::BinarySubtype, Document, RawBinaryRef};
26
use std::{
37
convert::TryFrom,
48
error,
59
fmt::{self, Display},
610
};
711

12+
pub use vector::{PackedBitVector, Vector};
13+
814
/// Represents a BSON binary value.
915
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
1016
pub struct Binary {
@@ -98,14 +104,18 @@ impl Binary {
98104
pub enum Error {
99105
/// While trying to decode from base64, an error was returned.
100106
DecodingError { message: String },
107+
108+
/// A [`Vector`]-related error occurred.
109+
Vector { message: String },
101110
}
102111

103112
impl error::Error for Error {}
104113

105114
impl std::fmt::Display for Error {
106115
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
107116
match self {
108-
Error::DecodingError { message: m } => fmt.write_str(m),
117+
Error::DecodingError { message } => fmt.write_str(message),
118+
Error::Vector { message } => fmt.write_str(message),
109119
}
110120
}
111121
}

src/binary/vector.rs

Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
use std::{
2+
convert::{TryFrom, TryInto},
3+
mem::size_of,
4+
};
5+
6+
use serde::{Deserialize, Serialize};
7+
8+
use super::{Binary, Error, Result};
9+
use crate::{spec::BinarySubtype, Bson, RawBson};
10+
11+
const INT8: u8 = 0x03;
12+
const FLOAT32: u8 = 0x27;
13+
const PACKED_BIT: u8 = 0x10;
14+
15+
/// A vector of numeric values. This type can be converted into a [`Binary`] of subtype
16+
/// [`BinarySubtype::Vector`].
17+
///
18+
/// ```rust
19+
/// # use bson::binary::{Binary, Vector};
20+
/// let vector = Vector::Int8(vec![0, 1, 2]);
21+
/// let binary = Binary::from(vector);
22+
/// ```
23+
///
24+
/// `Vector` serializes to and deserializes from a `Binary`.
25+
///
26+
/// ```rust
27+
/// # use serde::{Serialize, Deserialize};
28+
/// # use bson::{binary::{Result, Vector}, spec::ElementType};
29+
/// #[derive(Serialize, Deserialize)]
30+
/// struct Data {
31+
/// vector: Vector,
32+
/// }
33+
///
34+
/// let data = Data { vector: Vector::Int8(vec![0, 1, 2]) };
35+
/// let document = bson::to_document(&data).unwrap();
36+
/// assert_eq!(document.get("vector").unwrap().element_type(), ElementType::Binary);
37+
///
38+
/// let data: Data = bson::from_document(document).unwrap();
39+
/// assert_eq!(data.vector, Vector::Int8(vec![0, 1, 2]));
40+
/// ```
41+
///
42+
/// See the
43+
/// [specification](https://github.com/mongodb/specifications/blob/master/source/bson-binary-vector/bson-binary-vector.md)
44+
/// for more details.
45+
#[derive(Clone, Debug, PartialEq)]
46+
pub enum Vector {
47+
/// A vector of `i8` values.
48+
Int8(Vec<i8>),
49+
50+
/// A vector of `f32` values.
51+
Float32(Vec<f32>),
52+
53+
/// A vector of packed bits. See [`PackedBitVector::new`] for more details.
54+
PackedBit(PackedBitVector),
55+
}
56+
57+
/// A vector of packed bits. This type can be constructed by calling [`PackedBitVector::new`].
58+
#[derive(Clone, Debug, PartialEq)]
59+
pub struct PackedBitVector {
60+
vector: Vec<u8>,
61+
padding: u8,
62+
}
63+
64+
impl PackedBitVector {
65+
/// Construct a new `PackedBitVector`. Each `u8` value in the provided `vector` represents 8
66+
/// single-bit elements in little-endian format. For example, the following vector:
67+
///
68+
/// ```rust
69+
/// # use bson::binary::{Result, PackedBitVector};
70+
/// # fn main() -> Result<()> {
71+
/// let packed_bits = vec![238, 224];
72+
/// let vector = PackedBitVector::new(packed_bits, 0)?;
73+
/// # Ok(())
74+
/// # }
75+
/// ```
76+
///
77+
/// represents a 16-bit vector containing the following values:
78+
///
79+
/// ```text
80+
/// [1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0]
81+
/// ```
82+
///
83+
/// Padding can optionally be specified to ignore a number of least-significant bits in the
84+
/// final byte. For example, the vector in the previous example with a padding of 4 would
85+
/// represent a 12-bit vector containing the following values:
86+
///
87+
/// ```text
88+
/// [1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0]
89+
/// ```
90+
///
91+
/// Padding must be within 0-7 inclusive. Padding must be 0 or unspecified if the provided
92+
/// vector is empty.
93+
pub fn new(vector: Vec<u8>, padding: impl Into<Option<u8>>) -> Result<Self> {
94+
let padding = padding.into().unwrap_or(0);
95+
if !(0..8).contains(&padding) {
96+
return Err(Error::Vector {
97+
message: format!("padding must be within 0-7 inclusive, got {}", padding),
98+
});
99+
}
100+
if padding != 0 && vector.is_empty() {
101+
return Err(Error::Vector {
102+
message: format!(
103+
"cannot specify non-zero padding if the provided vector is empty, got {}",
104+
padding
105+
),
106+
});
107+
}
108+
Ok(Self { vector, padding })
109+
}
110+
}
111+
112+
impl Vector {
113+
/// Construct a [`Vector`] from the given bytes. See the
114+
/// [specification](https://github.com/mongodb/specifications/blob/master/source/bson-binary-vector/bson-binary-vector.md#specification)
115+
/// for details on the expected byte format.
116+
pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Result<Self> {
117+
let bytes = bytes.as_ref();
118+
119+
if bytes.len() < 2 {
120+
return Err(Error::Vector {
121+
message: format!(
122+
"the provided bytes must have a length of at least 2, got {}",
123+
bytes.len()
124+
),
125+
});
126+
}
127+
128+
let d_type = bytes[0];
129+
let padding = bytes[1];
130+
if d_type != PACKED_BIT && padding != 0 {
131+
return Err(Error::Vector {
132+
message: format!(
133+
"padding can only be specified for a packed bit vector (data type {}), got \
134+
type {}",
135+
PACKED_BIT, d_type
136+
),
137+
});
138+
}
139+
let number_bytes = &bytes[2..];
140+
141+
match d_type {
142+
INT8 => {
143+
let vector = number_bytes
144+
.iter()
145+
.map(|n| i8::from_le_bytes([*n]))
146+
.collect();
147+
Ok(Self::Int8(vector))
148+
}
149+
FLOAT32 => {
150+
const F32_BYTES: usize = size_of::<f32>();
151+
152+
let mut vector = Vec::new();
153+
for chunk in number_bytes.chunks(F32_BYTES) {
154+
let bytes: [u8; F32_BYTES] = chunk.try_into().map_err(|_| Error::Vector {
155+
message: format!(
156+
"f32 vector values must be {} bytes, got {:?}",
157+
F32_BYTES, chunk,
158+
),
159+
})?;
160+
vector.push(f32::from_le_bytes(bytes));
161+
}
162+
Ok(Self::Float32(vector))
163+
}
164+
PACKED_BIT => {
165+
let packed_bit_vector = PackedBitVector::new(number_bytes.to_vec(), padding)?;
166+
Ok(Self::PackedBit(packed_bit_vector))
167+
}
168+
other => Err(Error::Vector {
169+
message: format!("unsupported vector data type: {}", other),
170+
}),
171+
}
172+
}
173+
174+
fn d_type(&self) -> u8 {
175+
match self {
176+
Self::Int8(_) => INT8,
177+
Self::Float32(_) => FLOAT32,
178+
Self::PackedBit(_) => PACKED_BIT,
179+
}
180+
}
181+
182+
fn padding(&self) -> u8 {
183+
match self {
184+
Self::Int8(_) => 0,
185+
Self::Float32(_) => 0,
186+
Self::PackedBit(PackedBitVector { padding, .. }) => *padding,
187+
}
188+
}
189+
}
190+
191+
impl From<&Vector> for Binary {
192+
fn from(vector: &Vector) -> Self {
193+
let d_type = vector.d_type();
194+
let padding = vector.padding();
195+
let mut bytes = vec![d_type, padding];
196+
197+
match vector {
198+
Vector::Int8(vector) => {
199+
for n in vector {
200+
bytes.extend_from_slice(&n.to_le_bytes());
201+
}
202+
}
203+
Vector::Float32(vector) => {
204+
for n in vector {
205+
bytes.extend_from_slice(&n.to_le_bytes());
206+
}
207+
}
208+
Vector::PackedBit(PackedBitVector { vector, .. }) => {
209+
for n in vector {
210+
bytes.extend_from_slice(&n.to_le_bytes());
211+
}
212+
}
213+
}
214+
215+
Self {
216+
subtype: BinarySubtype::Vector,
217+
bytes,
218+
}
219+
}
220+
}
221+
222+
impl From<Vector> for Binary {
223+
fn from(vector: Vector) -> Binary {
224+
Self::from(&vector)
225+
}
226+
}
227+
228+
impl TryFrom<&Binary> for Vector {
229+
type Error = Error;
230+
231+
fn try_from(binary: &Binary) -> Result<Self> {
232+
if binary.subtype != BinarySubtype::Vector {
233+
return Err(Error::Vector {
234+
message: format!("expected vector binary subtype, got {:?}", binary.subtype),
235+
});
236+
}
237+
Self::from_bytes(&binary.bytes)
238+
}
239+
}
240+
241+
impl TryFrom<Binary> for Vector {
242+
type Error = Error;
243+
244+
fn try_from(binary: Binary) -> std::result::Result<Self, Self::Error> {
245+
Self::try_from(&binary)
246+
}
247+
}
248+
249+
// Convenience impl to allow passing a Vector directly into the doc! macro. From<Vector> is already
250+
// implemented by a blanket impl in src/bson.rs.
251+
impl From<&Vector> for Bson {
252+
fn from(vector: &Vector) -> Self {
253+
Self::Binary(Binary::from(vector))
254+
}
255+
}
256+
257+
// Convenience impls to allow passing a Vector directly into the rawdoc! macro
258+
impl From<&Vector> for RawBson {
259+
fn from(vector: &Vector) -> Self {
260+
Self::Binary(Binary::from(vector))
261+
}
262+
}
263+
264+
impl From<Vector> for RawBson {
265+
fn from(vector: Vector) -> Self {
266+
Self::from(&vector)
267+
}
268+
}
269+
270+
impl Serialize for Vector {
271+
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
272+
where
273+
S: serde::Serializer,
274+
{
275+
let binary = Binary::from(self);
276+
binary.serialize(serializer)
277+
}
278+
}
279+
280+
impl<'de> Deserialize<'de> for Vector {
281+
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
282+
where
283+
D: serde::Deserializer<'de>,
284+
{
285+
let binary = Binary::deserialize(deserializer)?;
286+
Self::try_from(binary).map_err(serde::de::Error::custom)
287+
}
288+
}

src/spec.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ const BINARY_SUBTYPE_MD5: u8 = 0x05;
6262
const BINARY_SUBTYPE_ENCRYPTED: u8 = 0x06;
6363
const BINARY_SUBTYPE_COLUMN: u8 = 0x07;
6464
const BINARY_SUBTYPE_SENSITIVE: u8 = 0x08;
65+
const BINARY_SUBTYPE_VECTOR: u8 = 0x09;
6566
const BINARY_SUBTYPE_USER_DEFINED: u8 = 0x80;
6667

6768
/// All available BSON element types.
@@ -162,6 +163,7 @@ pub enum BinarySubtype {
162163
Encrypted,
163164
Column,
164165
Sensitive,
166+
Vector,
165167
UserDefined(u8),
166168
Reserved(u8),
167169
}
@@ -179,6 +181,7 @@ impl From<BinarySubtype> for u8 {
179181
BinarySubtype::Encrypted => BINARY_SUBTYPE_ENCRYPTED,
180182
BinarySubtype::Column => BINARY_SUBTYPE_COLUMN,
181183
BinarySubtype::Sensitive => BINARY_SUBTYPE_SENSITIVE,
184+
BinarySubtype::Vector => BINARY_SUBTYPE_VECTOR,
182185
BinarySubtype::UserDefined(x) => x,
183186
BinarySubtype::Reserved(x) => x,
184187
}
@@ -198,6 +201,7 @@ impl From<u8> for BinarySubtype {
198201
BINARY_SUBTYPE_ENCRYPTED => BinarySubtype::Encrypted,
199202
BINARY_SUBTYPE_COLUMN => BinarySubtype::Column,
200203
BINARY_SUBTYPE_SENSITIVE => BinarySubtype::Sensitive,
204+
BINARY_SUBTYPE_VECTOR => BinarySubtype::Vector,
201205
_ if t < BINARY_SUBTYPE_USER_DEFINED => BinarySubtype::Reserved(t),
202206
_ => BinarySubtype::UserDefined(t),
203207
}

0 commit comments

Comments
 (0)