Skip to content

Commit 859a517

Browse files
authored
Merge branch 'main' into feat/scalar/int
2 parents ebf635c + b7a2a6e commit 859a517

File tree

3 files changed

+216
-0
lines changed

3 files changed

+216
-0
lines changed

Diff for: lib/build.gradle

+2
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ dependencies {
4141
testImplementation('org.junit.jupiter:junit-jupiter-api:5.10.0')
4242
testImplementation('org.mockito:mockito-core:5.4.0')
4343
testImplementation('org.mockito:mockito-junit-jupiter:5.4.0')
44+
testImplementation('org.apache.arrow:arrow-memory-netty:12.0.1')
4445
testImplementation('nl.jqno.equalsverifier:equalsverifier:3.15')
4546
testRuntimeOnly('org.junit.jupiter:junit-jupiter-engine:5.10.0')
4647

@@ -52,6 +53,7 @@ test {
5253
testLogging {
5354
events "passed", "skipped", "failed"
5455
}
56+
jvmArgs("--add-opens=java.base/java.nio=ALL-UNNAMED")
5557
}
5658

5759
// Apply a specific Java toolchain to ease working on different environments.

Diff for: lib/src/main/java/io/cloudquery/types/UUIDType.java

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
package io.cloudquery.types;
2+
3+
import org.apache.arrow.memory.BufferAllocator;
4+
import org.apache.arrow.memory.util.hash.ArrowBufHasher;
5+
import org.apache.arrow.vector.ExtensionTypeVector;
6+
import org.apache.arrow.vector.FieldVector;
7+
import org.apache.arrow.vector.FixedSizeBinaryVector;
8+
import org.apache.arrow.vector.types.pojo.ArrowType;
9+
import org.apache.arrow.vector.types.pojo.FieldType;
10+
11+
import java.nio.ByteBuffer;
12+
import java.util.UUID;
13+
14+
import static org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
15+
16+
public class UUIDType extends ExtensionType {
17+
public static final int BYTE_WIDTH = 16;
18+
19+
@Override
20+
public ArrowType storageType() {
21+
return new FixedSizeBinary(BYTE_WIDTH);
22+
}
23+
24+
@Override
25+
public String extensionName() {
26+
return "uuid";
27+
}
28+
29+
@Override
30+
public boolean extensionEquals(ExtensionType other) {
31+
return other instanceof UUIDType;
32+
}
33+
34+
@Override
35+
public String serialize() {
36+
return "uuid-serialized";
37+
}
38+
39+
@Override
40+
public ArrowType deserialize(ArrowType storageType, String serializedData) {
41+
if (!serializedData.equals("uuid-serialized")) {
42+
throw new IllegalArgumentException("Type identifier did not match: " + serializedData);
43+
}
44+
if (!storageType.equals(storageType())) {
45+
throw new IllegalArgumentException("invalid storage type for UUIDType: " + storageType.getTypeID());
46+
}
47+
return new UUIDType();
48+
}
49+
50+
@Override
51+
public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) {
52+
return new UUIDVector(name, allocator, new FixedSizeBinaryVector(name, allocator, BYTE_WIDTH));
53+
}
54+
55+
public static class UUIDVector extends ExtensionTypeVector<FixedSizeBinaryVector> {
56+
public UUIDVector(String name, BufferAllocator allocator, FixedSizeBinaryVector valueVectors) {
57+
super(name, allocator, valueVectors);
58+
}
59+
60+
@Override
61+
public Object getObject(int index) {
62+
final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index));
63+
return new UUID(bb.getLong(), bb.getLong());
64+
}
65+
66+
@Override
67+
public int hashCode(int index) {
68+
return hashCode(index, null);
69+
}
70+
71+
@Override
72+
public int hashCode(int index, ArrowBufHasher hasher) {
73+
return getUnderlyingVector().hashCode(index, hasher);
74+
}
75+
76+
public UUID get(int index) {
77+
return (UUID) getObject(index);
78+
}
79+
80+
public void set(int index, UUID uuid) {
81+
ByteBuffer bb = ByteBuffer.allocate(BYTE_WIDTH);
82+
bb.putLong(uuid.getMostSignificantBits());
83+
bb.putLong(uuid.getLeastSignificantBits());
84+
getUnderlyingVector().set(index, bb.array());
85+
}
86+
}
87+
}
+127
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
package io.cloudquery.types;
2+
3+
import io.cloudquery.types.UUIDType.UUIDVector;
4+
import org.apache.arrow.memory.BufferAllocator;
5+
import org.apache.arrow.memory.RootAllocator;
6+
import org.apache.arrow.vector.FieldVector;
7+
import org.apache.arrow.vector.VectorSchemaRoot;
8+
import org.apache.arrow.vector.ipc.ArrowFileReader;
9+
import org.apache.arrow.vector.ipc.ArrowFileWriter;
10+
import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
11+
import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry;
12+
import org.apache.arrow.vector.types.pojo.Field;
13+
import org.apache.arrow.vector.types.pojo.Schema;
14+
import org.junit.jupiter.api.AfterAll;
15+
import org.junit.jupiter.api.BeforeAll;
16+
import org.junit.jupiter.api.BeforeEach;
17+
import org.junit.jupiter.api.Test;
18+
19+
import java.io.File;
20+
import java.io.IOException;
21+
import java.nio.channels.FileChannel;
22+
import java.nio.channels.WritableByteChannel;
23+
import java.nio.file.Files;
24+
import java.nio.file.Paths;
25+
import java.nio.file.StandardOpenOption;
26+
import java.util.Collections;
27+
import java.util.List;
28+
import java.util.UUID;
29+
import java.util.stream.IntStream;
30+
31+
import static org.junit.jupiter.api.Assertions.assertEquals;
32+
import static org.junit.jupiter.api.Assertions.assertTrue;
33+
34+
class UUIDTypeTest {
35+
private static final String FIELD_NAME = "uuid";
36+
private static final List<UUID> UUIDS = IntStream.range(0, 10).mapToObj(i -> UUID.randomUUID()).toList();
37+
38+
private File file;
39+
40+
@BeforeAll
41+
public static void setUpTest() {
42+
ExtensionTypeRegistry.register(new UUIDType());
43+
}
44+
45+
@AfterAll
46+
public static void tearDown() {
47+
ExtensionTypeRegistry.unregister(new UUIDType());
48+
}
49+
50+
51+
@BeforeEach
52+
void setUp() throws IOException {
53+
file = File.createTempFile("uuid_test", ".arrow");
54+
}
55+
56+
@Test
57+
public void shouldSetUUIDsOnUUIDVector() {
58+
UUID uuid1 = UUID.randomUUID();
59+
UUID uuid2 = UUID.randomUUID();
60+
61+
try (BufferAllocator allocator = new RootAllocator()) {
62+
ExtensionType uuidType = ExtensionTypeRegistry.lookup("uuid");
63+
try (UUIDVector vector = (UUIDVector) uuidType.getNewVector("vector", null, allocator)) {
64+
vector.setValueCount(4);
65+
vector.set(0, uuid1);
66+
vector.setNull(1);
67+
vector.set(2, uuid2);
68+
vector.setNull(3);
69+
70+
// Assert that the values were set correctly
71+
assertEquals(uuid1, vector.get(0), "UUIDs should match");
72+
assertTrue(vector.isNull(1), "Should be null");
73+
assertEquals(uuid2, vector.get(2), "UUIDs should match");
74+
assertTrue(vector.isNull(3), "Should be null");
75+
76+
// Assert that the value count and null count are correct
77+
assertEquals(4, vector.getValueCount(), "Value count should match");
78+
assertEquals(2, vector.getNullCount(), "Null count should match");
79+
}
80+
}
81+
}
82+
83+
@Test
84+
public void roundTripUUID() throws IOException {
85+
// Generate some data and write it to a file
86+
try (BufferAllocator allocator = new RootAllocator(); VectorSchemaRoot root = createVectorSchemaRoot(allocator)) {
87+
generateDataAndWriteToFile(root);
88+
}
89+
90+
// Read the data back from the file and assert that it matches what we wrote
91+
try (BufferAllocator allocator = new RootAllocator();
92+
ArrowFileReader reader = new ArrowFileReader(Files.newByteChannel(Paths.get(file.getAbsolutePath())), allocator)) {
93+
94+
reader.loadNextBatch();
95+
96+
FieldVector fieldVector = reader.getVectorSchemaRoot().getVector(FIELD_NAME);
97+
assertEquals(UUIDS.size(), fieldVector.getValueCount(), "Value count should match");
98+
for (int i = 0; i < UUIDS.size(); i++) {
99+
assertEquals(UUIDS.get(i), fieldVector.getObject(i), "UUIDs should match");
100+
}
101+
}
102+
}
103+
104+
private static VectorSchemaRoot createVectorSchemaRoot(BufferAllocator allocator) {
105+
return VectorSchemaRoot.create(new Schema(Collections.singletonList(Field.nullable(FIELD_NAME, new UUIDType()))), allocator);
106+
}
107+
108+
private void generateDataAndWriteToFile(VectorSchemaRoot root) throws IOException {
109+
// Get the vector representing the column
110+
UUIDVector vector = (UUIDVector) root.getVector(FIELD_NAME);
111+
112+
// Generate some UUIDs
113+
vector.setValueCount(UUIDS.size());
114+
for (int i = 0; i < UUIDS.size(); i++) {
115+
vector.set(i, UUIDS.get(i));
116+
}
117+
root.setRowCount(UUIDS.size());
118+
119+
// Write the data to a file
120+
try (WritableByteChannel channel = FileChannel.open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE);
121+
ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) {
122+
writer.start();
123+
writer.writeBatch();
124+
writer.end();
125+
}
126+
}
127+
}

0 commit comments

Comments
 (0)