Skip to content

Commit b34c0a6

Browse files
vbabaninstIncMale
andauthored
Add BSON Binary Subtype 9 support for vector storage and retrieval. (#1528)
- Implement encoding and decoding logic for vectors using new BSON binary subtype 9. - Add support for INT8, FLOAT32, and PACKED_BIT data types with padding. - Provide API methods for converting vectors to BSON binary and BSON binary to vectors. JAVA-5544 --------- Co-authored-by: Valentin Kovalenko <[email protected]>
1 parent e0d978d commit b34c0a6

34 files changed

+2563
-57
lines changed

bson-scala/src/main/scala/org/mongodb/scala/bson/codecs/macrocodecs/MacroCodec.scala

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import scala.collection.mutable
2222
import org.bson._
2323
import org.bson.codecs.configuration.{ CodecRegistries, CodecRegistry }
2424
import org.bson.codecs.{ Codec, DecoderContext, Encoder, EncoderContext }
25+
import scala.collection.immutable.Vector
2526

2627
import org.mongodb.scala.bson.BsonNull
2728

bson-scala/src/test/scala/org/mongodb/scala/bson/codecs/MacrosSpec.scala

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import org.mongodb.scala.bson.annotations.{ BsonIgnore, BsonProperty }
3030
import org.mongodb.scala.bson.codecs.Macros.{ createCodecProvider, createCodecProviderIgnoreNone }
3131
import org.mongodb.scala.bson.codecs.Registry.DEFAULT_CODEC_REGISTRY
3232
import org.mongodb.scala.bson.collection.immutable.Document
33+
import scala.collection.immutable.Vector
3334

3435
import scala.collection.JavaConverters._
3536
import scala.reflect.ClassTag

bson/src/main/org/bson/BsonBinary.java

+32
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,13 @@
1818

1919
import org.bson.assertions.Assertions;
2020
import org.bson.internal.UuidHelper;
21+
import org.bson.internal.vector.VectorHelper;
2122

2223
import java.util.Arrays;
2324
import java.util.UUID;
2425

26+
import static org.bson.internal.vector.VectorHelper.encodeVectorToBinary;
27+
2528
/**
2629
* A representation of the BSON Binary type. Note that for performance reasons instances of this class are not immutable,
2730
* so care should be taken to only modify the underlying byte array if you know what you're doing, or else make a defensive copy.
@@ -89,6 +92,20 @@ public BsonBinary(final UUID uuid) {
8992
this(uuid, UuidRepresentation.STANDARD);
9093
}
9194

95+
/**
96+
* Constructs a {@linkplain BsonBinarySubType#VECTOR subtype 9} {@link BsonBinary} from the given {@link Vector}.
97+
*
98+
* @param vector the {@link Vector}
99+
* @since 5.3
100+
*/
101+
public BsonBinary(final Vector vector) {
102+
if (vector == null) {
103+
throw new IllegalArgumentException("Vector must not be null");
104+
}
105+
this.data = encodeVectorToBinary(vector);
106+
type = BsonBinarySubType.VECTOR.getValue();
107+
}
108+
92109
/**
93110
* Construct a new instance from the given UUID and UuidRepresentation
94111
*
@@ -127,6 +144,21 @@ public UUID asUuid() {
127144
return UuidHelper.decodeBinaryToUuid(this.data.clone(), this.type, UuidRepresentation.STANDARD);
128145
}
129146

147+
/**
148+
* Returns the binary as a {@link Vector}. The {@linkplain #getType() subtype} must be {@linkplain BsonBinarySubType#VECTOR 9}.
149+
*
150+
* @return the vector
151+
* @throws BsonInvalidOperationException if the binary subtype is not {@link BsonBinarySubType#VECTOR}.
152+
* @since 5.3
153+
*/
154+
public Vector asVector() {
155+
if (type != BsonBinarySubType.VECTOR.getValue()) {
156+
throw new BsonInvalidOperationException("type must be a Vector subtype.");
157+
}
158+
159+
return VectorHelper.decodeBinaryToVector(this.data);
160+
}
161+
130162
/**
131163
* Returns the binary as a UUID.
132164
*

bson/src/main/org/bson/BsonBinarySubType.java

+14-5
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
package org.bson;
1818

1919
/**
20-
* The Binary subtype
20+
* The Binary subtype.
2121
*
2222
* @since 3.0
2323
*/
@@ -60,7 +60,7 @@ public enum BsonBinarySubType {
6060
ENCRYPTED((byte) 0x06),
6161

6262
/**
63-
* Columnar data
63+
* Columnar data.
6464
*
6565
* @since 4.4
6666
*/
@@ -73,6 +73,15 @@ public enum BsonBinarySubType {
7373
*/
7474
SENSITIVE((byte) 0x08),
7575

76+
/**
77+
* Vector data.
78+
*
79+
* @mongodb.server.release 6.0
80+
* @since 5.3
81+
* @see Vector
82+
*/
83+
VECTOR((byte) 0x09),
84+
7685
/**
7786
* User defined binary data.
7887
*/
@@ -81,10 +90,10 @@ public enum BsonBinarySubType {
8190
private final byte value;
8291

8392
/**
84-
* Returns true if the given value is a UUID subtype
93+
* Returns true if the given value is a UUID subtype.
8594
*
86-
* @param value the subtype value as a byte
87-
* @return true if value is a UUID subtype
95+
* @param value the subtype value as a byte.
96+
* @return true if value is a UUID subtype.
8897
* @since 3.4
8998
*/
9099
public static boolean isUuid(final byte value) {
+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
* Copyright 2008-present MongoDB, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.bson;
18+
19+
import java.util.Arrays;
20+
21+
import static org.bson.assertions.Assertions.assertNotNull;
22+
23+
/**
24+
* Represents a vector of 32-bit floating-point numbers, where each element in the vector is a float.
25+
* <p>
26+
* The {@link Float32Vector} is used to store and retrieve data efficiently using the BSON Binary Subtype 9 format.
27+
*
28+
* @mongodb.server.release 6.0
29+
* @see Vector#floatVector(float[])
30+
* @see BsonBinary#BsonBinary(Vector)
31+
* @see BsonBinary#asVector()
32+
* @since 5.3
33+
*/
34+
public final class Float32Vector extends Vector {
35+
36+
private final float[] data;
37+
38+
Float32Vector(final float[] vectorData) {
39+
super(DataType.FLOAT32);
40+
this.data = assertNotNull(vectorData);
41+
}
42+
43+
/**
44+
* Retrieve the underlying float array representing this {@link Float32Vector}, where each float
45+
* represents an element of a vector.
46+
* <p>
47+
* NOTE: The underlying float array is not copied; changes to the returned array will be reflected in this instance.
48+
*
49+
* @return the underlying float array representing this {@link Float32Vector} vector.
50+
*/
51+
public float[] getData() {
52+
return assertNotNull(data);
53+
}
54+
55+
@Override
56+
public boolean equals(final Object o) {
57+
if (this == o) {
58+
return true;
59+
}
60+
if (o == null || getClass() != o.getClass()) {
61+
return false;
62+
}
63+
Float32Vector that = (Float32Vector) o;
64+
return Arrays.equals(data, that.data);
65+
}
66+
67+
@Override
68+
public int hashCode() {
69+
return Arrays.hashCode(data);
70+
}
71+
72+
@Override
73+
public String toString() {
74+
return "Float32Vector{"
75+
+ "data=" + Arrays.toString(data)
76+
+ ", dataType=" + getDataType()
77+
+ '}';
78+
}
79+
}
+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*
2+
* Copyright 2008-present MongoDB, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.bson;
18+
19+
import java.util.Arrays;
20+
import java.util.Objects;
21+
22+
import static org.bson.assertions.Assertions.assertNotNull;
23+
24+
/**
25+
* Represents a vector of 8-bit signed integers, where each element in the vector is a byte.
26+
* <p>
27+
* The {@link Int8Vector} is used to store and retrieve data efficiently using the BSON Binary Subtype 9 format.
28+
*
29+
* @mongodb.server.release 6.0
30+
* @see Vector#int8Vector(byte[])
31+
* @see BsonBinary#BsonBinary(Vector)
32+
* @see BsonBinary#asVector()
33+
* @since 5.3
34+
*/
35+
public final class Int8Vector extends Vector {
36+
37+
private byte[] data;
38+
39+
Int8Vector(final byte[] data) {
40+
super(DataType.INT8);
41+
this.data = assertNotNull(data);
42+
}
43+
44+
/**
45+
* Retrieve the underlying byte array representing this {@link Int8Vector} vector, where each byte represents
46+
* an element of a vector.
47+
* <p>
48+
* NOTE: The underlying byte array is not copied; changes to the returned array will be reflected in this instance.
49+
*
50+
* @return the underlying byte array representing this {@link Int8Vector} vector.
51+
*/
52+
public byte[] getData() {
53+
return assertNotNull(data);
54+
}
55+
56+
@Override
57+
public boolean equals(final Object o) {
58+
if (this == o) {
59+
return true;
60+
}
61+
if (o == null || getClass() != o.getClass()) {
62+
return false;
63+
}
64+
Int8Vector that = (Int8Vector) o;
65+
return Objects.deepEquals(data, that.data);
66+
}
67+
68+
@Override
69+
public int hashCode() {
70+
return Arrays.hashCode(data);
71+
}
72+
73+
@Override
74+
public String toString() {
75+
return "Int8Vector{"
76+
+ "data=" + Arrays.toString(data)
77+
+ ", dataType=" + getDataType()
78+
+ '}';
79+
}
80+
}
+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/*
2+
* Copyright 2008-present MongoDB, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.bson;
18+
19+
import java.util.Arrays;
20+
import java.util.Objects;
21+
22+
import static org.bson.assertions.Assertions.assertNotNull;
23+
24+
/**
25+
* Represents a packed bit vector, where each element of the vector is represented by a single bit (0 or 1).
26+
* <p>
27+
* The {@link PackedBitVector} is used to store data efficiently using the BSON Binary Subtype 9 format.
28+
*
29+
* @mongodb.server.release 6.0
30+
* @see Vector#packedBitVector(byte[], byte)
31+
* @see BsonBinary#BsonBinary(Vector)
32+
* @see BsonBinary#asVector()
33+
* @since 5.3
34+
*/
35+
public final class PackedBitVector extends Vector {
36+
37+
private final byte padding;
38+
private final byte[] data;
39+
40+
PackedBitVector(final byte[] data, final byte padding) {
41+
super(DataType.PACKED_BIT);
42+
this.data = assertNotNull(data);
43+
this.padding = padding;
44+
}
45+
46+
/**
47+
* Retrieve the underlying byte array representing this {@link PackedBitVector} vector, where
48+
* each bit represents an element of the vector (either 0 or 1).
49+
* <p>
50+
* Note that the {@linkplain #getPadding() padding value} should be considered when interpreting the final byte of the array,
51+
* as it indicates how many least-significant bits are to be ignored.
52+
*
53+
* @return the underlying byte array representing this {@link PackedBitVector} vector.
54+
* @see #getPadding()
55+
*/
56+
public byte[] getData() {
57+
return assertNotNull(data);
58+
}
59+
60+
/**
61+
* Returns the padding value for this vector.
62+
*
63+
* <p>Padding refers to the number of least-significant bits in the final byte that are ignored when retrieving
64+
* {@linkplain #getData() the vector array}. For instance, if the padding value is 3, this means that the last byte contains
65+
* 3 least-significant unused bits, which should be disregarded during operations.</p>
66+
* <p>
67+
*
68+
* NOTE: The underlying byte array is not copied; changes to the returned array will be reflected in this instance.
69+
*
70+
* @return the padding value (between 0 and 7).
71+
*/
72+
public byte getPadding() {
73+
return this.padding;
74+
}
75+
76+
@Override
77+
public boolean equals(final Object o) {
78+
if (this == o) {
79+
return true;
80+
}
81+
if (o == null || getClass() != o.getClass()) {
82+
return false;
83+
}
84+
PackedBitVector that = (PackedBitVector) o;
85+
return padding == that.padding && Arrays.equals(data, that.data);
86+
}
87+
88+
@Override
89+
public int hashCode() {
90+
return Objects.hash(padding, Arrays.hashCode(data));
91+
}
92+
93+
@Override
94+
public String toString() {
95+
return "PackedBitVector{"
96+
+ "padding=" + padding
97+
+ ", data=" + Arrays.toString(data)
98+
+ ", dataType=" + getDataType()
99+
+ '}';
100+
}
101+
}

0 commit comments

Comments
 (0)