Skip to content

RUST-2003 Binary vector subtype support #513

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ criterion = "0.3.0"
pretty_assertions = "0.6.1"
proptest = "1.0.0"
serde_bytes = "0.11"
serde_path_to_error = "0.1.16"
chrono = { version = "0.4", features = ["serde", "clock", "std"], default-features = false }
[target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dev-dependencies]
getrandom = { version = "0.2", features = ["js"] }
Expand Down
12 changes: 11 additions & 1 deletion src/binary.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
#! Module containing functionality related to BSON binary values.

mod vector;

use crate::{spec::BinarySubtype, Document, RawBinaryRef};
use std::{
convert::TryFrom,
error,
fmt::{self, Display},
};

pub use vector::{PackedBitVector, Vector};

/// Represents a BSON binary value.
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct Binary {
Expand Down Expand Up @@ -98,14 +104,18 @@ impl Binary {
pub enum Error {
/// While trying to decode from base64, an error was returned.
DecodingError { message: String },

/// A [`Vector`]-related error occurred.
Vector { message: String },
}

impl error::Error for Error {}

impl std::fmt::Display for Error {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
match self {
Error::DecodingError { message: m } => fmt.write_str(m),
Error::DecodingError { message } => fmt.write_str(message),
Error::Vector { message } => fmt.write_str(message),
}
}
}
Expand Down
288 changes: 288 additions & 0 deletions src/binary/vector.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,288 @@
use std::{
convert::{TryFrom, TryInto},
mem::size_of,
};

use serde::{Deserialize, Serialize};

use super::{Binary, Error, Result};
use crate::{spec::BinarySubtype, Bson, RawBson};

const INT8: u8 = 0x03;
const FLOAT32: u8 = 0x27;
const PACKED_BIT: u8 = 0x10;

/// A vector of numeric values. This type can be converted into a [`Binary`] of subtype
/// [`BinarySubtype::Vector`].
///
/// ```rust
/// # use bson::binary::{Binary, Vector};
/// let vector = Vector::Int8(vec![0, 1, 2]);
/// let binary = Binary::from(vector);
/// ```
///
/// `Vector` serializes to and deserializes from a `Binary`.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think there's much value in treating this type as anything other than a convenience wrapper around Binary when it comes to serde, but we can revisit if a use case arises

///
/// ```rust
/// # use serde::{Serialize, Deserialize};
/// # use bson::{binary::{Result, Vector}, spec::ElementType};
/// #[derive(Serialize, Deserialize)]
/// struct Data {
/// vector: Vector,
/// }
///
/// let data = Data { vector: Vector::Int8(vec![0, 1, 2]) };
/// let document = bson::to_document(&data).unwrap();
/// assert_eq!(document.get("vector").unwrap().element_type(), ElementType::Binary);
///
/// let data: Data = bson::from_document(document).unwrap();
/// assert_eq!(data.vector, Vector::Int8(vec![0, 1, 2]));
/// ```
///
/// See the
/// [specification](https://github.com/mongodb/specifications/blob/master/source/bson-binary-vector/bson-binary-vector.md)
/// for more details.
#[derive(Clone, Debug, PartialEq)]
pub enum Vector {
/// A vector of `i8` values.
Int8(Vec<i8>),

/// A vector of `f32` values.
Float32(Vec<f32>),

/// A vector of packed bits. See [`PackedBitVector::new`] for more details.
PackedBit(PackedBitVector),
}

/// A vector of packed bits. This type can be constructed by calling [`PackedBitVector::new`].
#[derive(Clone, Debug, PartialEq)]
pub struct PackedBitVector {
vector: Vec<u8>,
padding: u8,
}

impl PackedBitVector {
/// Construct a new `PackedBitVector`. Each `u8` value in the provided `vector` represents 8
/// single-bit elements in little-endian format. For example, the following vector:
///
/// ```rust
/// # use bson::binary::{Result, PackedBitVector};
/// # fn main() -> Result<()> {
/// let packed_bits = vec![238, 224];
/// let vector = PackedBitVector::new(packed_bits, 0)?;
/// # Ok(())
/// # }
/// ```
///
/// represents a 16-bit vector containing the following values:
///
/// ```text
/// [1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0]
/// ```
///
/// Padding can optionally be specified to ignore a number of least-significant bits in the
/// final byte. For example, the vector in the previous example with a padding of 4 would
/// represent a 12-bit vector containing the following values:
///
/// ```text
/// [1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0]
/// ```
///
/// Padding must be within 0-7 inclusive. Padding must be 0 or unspecified if the provided
/// vector is empty.
pub fn new(vector: Vec<u8>, padding: impl Into<Option<u8>>) -> Result<Self> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the spec requires this type to be valid upon construction, hence the opaque type and result-returning new method

let padding = padding.into().unwrap_or(0);
if !(0..8).contains(&padding) {
return Err(Error::Vector {
message: format!("padding must be within 0-7 inclusive, got {}", padding),
});
}
if padding != 0 && vector.is_empty() {
return Err(Error::Vector {
message: format!(
"cannot specify non-zero padding if the provided vector is empty, got {}",
padding
),
});
}
Ok(Self { vector, padding })
}
}

impl Vector {
/// Construct a [`Vector`] from the given bytes. See the
/// [specification](https://github.com/mongodb/specifications/blob/master/source/bson-binary-vector/bson-binary-vector.md#specification)
/// for details on the expected byte format.
pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Result<Self> {
let bytes = bytes.as_ref();

if bytes.len() < 2 {
return Err(Error::Vector {
message: format!(
"the provided bytes must have a length of at least 2, got {}",
bytes.len()
),
});
}

let d_type = bytes[0];
let padding = bytes[1];
if d_type != PACKED_BIT && padding != 0 {
return Err(Error::Vector {
message: format!(
"padding can only be specified for a packed bit vector (data type {}), got \
type {}",
PACKED_BIT, d_type
),
});
}
let number_bytes = &bytes[2..];

match d_type {
INT8 => {
let vector = number_bytes
.iter()
.map(|n| i8::from_le_bytes([*n]))
.collect();
Ok(Self::Int8(vector))
}
FLOAT32 => {
const F32_BYTES: usize = size_of::<f32>();

let mut vector = Vec::new();
for chunk in number_bytes.chunks(F32_BYTES) {
let bytes: [u8; F32_BYTES] = chunk.try_into().map_err(|_| Error::Vector {
message: format!(
"f32 vector values must be {} bytes, got {:?}",
F32_BYTES, chunk,
),
})?;
vector.push(f32::from_le_bytes(bytes));
}
Ok(Self::Float32(vector))
}
PACKED_BIT => {
let packed_bit_vector = PackedBitVector::new(number_bytes.to_vec(), padding)?;
Ok(Self::PackedBit(packed_bit_vector))
}
other => Err(Error::Vector {
message: format!("unsupported vector data type: {}", other),
}),
}
}

fn d_type(&self) -> u8 {
match self {
Self::Int8(_) => INT8,
Self::Float32(_) => FLOAT32,
Self::PackedBit(_) => PACKED_BIT,
}
}

fn padding(&self) -> u8 {
match self {
Self::Int8(_) => 0,
Self::Float32(_) => 0,
Self::PackedBit(PackedBitVector { padding, .. }) => *padding,
}
}
}

impl From<&Vector> for Binary {
fn from(vector: &Vector) -> Self {
let d_type = vector.d_type();
let padding = vector.padding();
let mut bytes = vec![d_type, padding];

match vector {
Vector::Int8(vector) => {
for n in vector {
bytes.extend_from_slice(&n.to_le_bytes());
}
}
Vector::Float32(vector) => {
for n in vector {
bytes.extend_from_slice(&n.to_le_bytes());
}
}
Vector::PackedBit(PackedBitVector { vector, .. }) => {
for n in vector {
bytes.extend_from_slice(&n.to_le_bytes());
}
}
}

Self {
subtype: BinarySubtype::Vector,
bytes,
}
}
}

impl From<Vector> for Binary {
fn from(vector: Vector) -> Binary {
Self::from(&vector)
}
}

impl TryFrom<&Binary> for Vector {
type Error = Error;

fn try_from(binary: &Binary) -> Result<Self> {
if binary.subtype != BinarySubtype::Vector {
return Err(Error::Vector {
message: format!("expected vector binary subtype, got {:?}", binary.subtype),
});
}
Self::from_bytes(&binary.bytes)
}
}

impl TryFrom<Binary> for Vector {
type Error = Error;

fn try_from(binary: Binary) -> std::result::Result<Self, Self::Error> {
Self::try_from(&binary)
}
}

// Convenience impl to allow passing a Vector directly into the doc! macro. From<Vector> is already
// implemented by a blanket impl in src/bson.rs.
impl From<&Vector> for Bson {
fn from(vector: &Vector) -> Self {
Self::Binary(Binary::from(vector))
}
}

// Convenience impls to allow passing a Vector directly into the rawdoc! macro
impl From<&Vector> for RawBson {
fn from(vector: &Vector) -> Self {
Self::Binary(Binary::from(vector))
}
}

impl From<Vector> for RawBson {
fn from(vector: Vector) -> Self {
Self::from(&vector)
}
}

impl Serialize for Vector {
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let binary = Binary::from(self);
binary.serialize(serializer)
}
}

impl<'de> Deserialize<'de> for Vector {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let binary = Binary::deserialize(deserializer)?;
Self::try_from(binary).map_err(serde::de::Error::custom)
}
}
4 changes: 4 additions & 0 deletions src/spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ const BINARY_SUBTYPE_MD5: u8 = 0x05;
const BINARY_SUBTYPE_ENCRYPTED: u8 = 0x06;
const BINARY_SUBTYPE_COLUMN: u8 = 0x07;
const BINARY_SUBTYPE_SENSITIVE: u8 = 0x08;
const BINARY_SUBTYPE_VECTOR: u8 = 0x09;
const BINARY_SUBTYPE_USER_DEFINED: u8 = 0x80;

/// All available BSON element types.
Expand Down Expand Up @@ -162,6 +163,7 @@ pub enum BinarySubtype {
Encrypted,
Column,
Sensitive,
Vector,
UserDefined(u8),
Reserved(u8),
}
Expand All @@ -179,6 +181,7 @@ impl From<BinarySubtype> for u8 {
BinarySubtype::Encrypted => BINARY_SUBTYPE_ENCRYPTED,
BinarySubtype::Column => BINARY_SUBTYPE_COLUMN,
BinarySubtype::Sensitive => BINARY_SUBTYPE_SENSITIVE,
BinarySubtype::Vector => BINARY_SUBTYPE_VECTOR,
BinarySubtype::UserDefined(x) => x,
BinarySubtype::Reserved(x) => x,
}
Expand All @@ -198,6 +201,7 @@ impl From<u8> for BinarySubtype {
BINARY_SUBTYPE_ENCRYPTED => BinarySubtype::Encrypted,
BINARY_SUBTYPE_COLUMN => BinarySubtype::Column,
BINARY_SUBTYPE_SENSITIVE => BinarySubtype::Sensitive,
BINARY_SUBTYPE_VECTOR => BinarySubtype::Vector,
_ if t < BINARY_SUBTYPE_USER_DEFINED => BinarySubtype::Reserved(t),
_ => BinarySubtype::UserDefined(t),
}
Expand Down
Loading