8
8
import io .cloudquery .schema .Resource ;
9
9
import io .cloudquery .schema .Table ;
10
10
import io .cloudquery .schema .Table .TableBuilder ;
11
+ import io .cloudquery .types .JSONType ;
11
12
import io .cloudquery .types .JSONType .JSONVector ;
13
+ import io .cloudquery .types .UUIDType ;
12
14
import io .cloudquery .types .UUIDType .UUIDVector ;
13
15
import java .io .ByteArrayOutputStream ;
14
16
import java .io .IOException ;
15
17
import java .nio .channels .Channels ;
16
- import java .util .ArrayList ;
17
- import java .util .HashMap ;
18
- import java .util .List ;
19
- import java .util .Map ;
20
- import java .util .Objects ;
18
+ import java .time .Duration ;
19
+ import java .util .*;
21
20
import org .apache .arrow .memory .BufferAllocator ;
22
21
import org .apache .arrow .memory .RootAllocator ;
23
- import org .apache .arrow .vector .BigIntVector ;
24
- import org .apache .arrow .vector .BitVector ;
25
- import org .apache .arrow .vector .DateDayVector ;
26
- import org .apache .arrow .vector .FieldVector ;
27
- import org .apache .arrow .vector .FixedSizeBinaryVector ;
28
- import org .apache .arrow .vector .Float4Vector ;
29
- import org .apache .arrow .vector .Float8Vector ;
30
- import org .apache .arrow .vector .IntVector ;
31
- import org .apache .arrow .vector .LargeVarBinaryVector ;
32
- import org .apache .arrow .vector .LargeVarCharVector ;
33
- import org .apache .arrow .vector .SmallIntVector ;
34
- import org .apache .arrow .vector .TimeStampVector ;
35
- import org .apache .arrow .vector .TinyIntVector ;
36
- import org .apache .arrow .vector .UInt1Vector ;
37
- import org .apache .arrow .vector .UInt2Vector ;
38
- import org .apache .arrow .vector .UInt4Vector ;
39
- import org .apache .arrow .vector .UInt8Vector ;
40
- import org .apache .arrow .vector .VarBinaryVector ;
41
- import org .apache .arrow .vector .VarCharVector ;
42
- import org .apache .arrow .vector .VectorSchemaRoot ;
22
+ import org .apache .arrow .vector .*;
43
23
import org .apache .arrow .vector .ipc .ArrowReader ;
44
24
import org .apache .arrow .vector .ipc .ArrowStreamReader ;
45
25
import org .apache .arrow .vector .ipc .ArrowStreamWriter ;
26
+ import org .apache .arrow .vector .types .pojo .ArrowType ;
46
27
import org .apache .arrow .vector .types .pojo .Field ;
47
28
import org .apache .arrow .vector .types .pojo .FieldType ;
48
29
import org .apache .arrow .vector .types .pojo .Schema ;
49
30
import org .apache .arrow .vector .util .Text ;
31
+ import org .joou .UByte ;
32
+ import org .joou .UInteger ;
33
+ import org .joou .ULong ;
34
+ import org .joou .UShort ;
50
35
51
36
public class ArrowHelper {
52
37
public static final String CQ_EXTENSION_INCREMENTAL = "cq:extension:incremental" ;
@@ -72,6 +57,32 @@ private static void setVectorData(FieldVector vector, Object data) {
72
57
bitVector .set (0 , (boolean ) data ? 1 : 0 );
73
58
return ;
74
59
}
60
+ if (vector instanceof DateDayVector dayDateVector ) {
61
+ dayDateVector .set (0 , (int ) data );
62
+ return ;
63
+ }
64
+ if (vector instanceof DateMilliVector dateMilliVector ) {
65
+ dateMilliVector .set (0 , (long ) data );
66
+ return ;
67
+ }
68
+ if (vector instanceof DurationVector durationVector ) {
69
+ Duration duration = (Duration ) data ;
70
+ switch (durationVector .getUnit ()) {
71
+ case SECOND -> {
72
+ durationVector .set (0 , duration .toSeconds ());
73
+ }
74
+ case MILLISECOND -> {
75
+ durationVector .set (0 , duration .toMillis ());
76
+ }
77
+ case MICROSECOND -> {
78
+ durationVector .set (0 , duration .toNanos () / 1000 );
79
+ }
80
+ case NANOSECOND -> {
81
+ durationVector .set (0 , duration .toNanos ());
82
+ }
83
+ }
84
+ return ;
85
+ }
75
86
if (vector instanceof FixedSizeBinaryVector fixedSizeBinaryVector ) {
76
87
fixedSizeBinaryVector .set (0 , (byte []) data );
77
88
return ;
@@ -100,6 +111,22 @@ private static void setVectorData(FieldVector vector, Object data) {
100
111
smallIntVector .set (0 , (short ) data );
101
112
return ;
102
113
}
114
+ if (vector instanceof TimeMicroVector timeMicroVector ) {
115
+ timeMicroVector .set (0 , (long ) data );
116
+ return ;
117
+ }
118
+ if (vector instanceof TimeMilliVector timeMilliVector ) {
119
+ timeMilliVector .set (0 , (int ) data );
120
+ return ;
121
+ }
122
+ if (vector instanceof TimeNanoVector timeNanoVector ) {
123
+ timeNanoVector .set (0 , (long ) data );
124
+ return ;
125
+ }
126
+ if (vector instanceof TimeSecVector timeSecVector ) {
127
+ timeSecVector .set (0 , (int ) data );
128
+ return ;
129
+ }
103
130
if (vector instanceof TimeStampVector timeStampVector ) {
104
131
timeStampVector .set (0 , (long ) data );
105
132
return ;
@@ -109,19 +136,19 @@ private static void setVectorData(FieldVector vector, Object data) {
109
136
return ;
110
137
}
111
138
if (vector instanceof UInt1Vector uInt1Vector ) {
112
- uInt1Vector .set (0 , (byte ) data );
139
+ uInt1Vector .set (0 , (( UByte ) data ). shortValue () );
113
140
return ;
114
141
}
115
142
if (vector instanceof UInt2Vector uInt2Vector ) {
116
- uInt2Vector .set (0 , (short ) data );
143
+ uInt2Vector .set (0 , (( UShort ) data ). intValue () );
117
144
return ;
118
145
}
119
146
if (vector instanceof UInt4Vector uInt4Vector ) {
120
- uInt4Vector .set (0 , (int ) data );
147
+ uInt4Vector .set (0 , (( UInteger ) data ). intValue () );
121
148
return ;
122
149
}
123
150
if (vector instanceof UInt8Vector uInt8Vector ) {
124
- uInt8Vector .set (0 , (long ) data );
151
+ uInt8Vector .set (0 , (( ULong ) data ). longValue () );
125
152
return ;
126
153
}
127
154
if (vector instanceof VarBinaryVector varBinaryVector ) {
@@ -132,16 +159,14 @@ private static void setVectorData(FieldVector vector, Object data) {
132
159
vectorCharVector .set (0 , (Text ) data );
133
160
return ;
134
161
}
135
- if (vector instanceof UUIDVector uuidVector ) {
136
- uuidVector .set (0 , (java .util .UUID ) data );
137
- return ;
138
- }
162
+ // CloudQuery-specific
139
163
if (vector instanceof JSONVector jsonVector ) {
140
164
jsonVector .setSafe (0 , (byte []) data );
141
165
return ;
142
166
}
143
- if (vector instanceof DateDayVector dayDateVector ) {
144
- dayDateVector .set (0 , (int ) data );
167
+ // CloudQuery-specific
168
+ if (vector instanceof UUIDVector uuidVector ) {
169
+ uuidVector .set (0 , (java .util .UUID ) data );
145
170
return ;
146
171
}
147
172
@@ -177,17 +202,7 @@ public static Schema toArrowSchema(Table table) {
177
202
List <Column > columns = table .getColumns ();
178
203
Field [] fields = new Field [columns .size ()];
179
204
for (int i = 0 ; i < columns .size (); i ++) {
180
- Column column = columns .get (i );
181
- Map <String , String > metadata = new HashMap <>();
182
- metadata .put (CQ_EXTENSION_UNIQUE , Boolean .toString (column .isUnique ()));
183
- metadata .put (CQ_EXTENSION_PRIMARY_KEY , Boolean .toString (column .isPrimaryKey ()));
184
- metadata .put (CQ_EXTENSION_INCREMENTAL , Boolean .toString (column .isIncrementalKey ()));
185
- Field field =
186
- new Field (
187
- column .getName (),
188
- new FieldType (!column .isNotNull (), column .getType (), null , metadata ),
189
- null );
190
- fields [i ] = field ;
205
+ fields [i ] = getField (columns .get (i ));
191
206
}
192
207
Map <String , String > metadata = new HashMap <>();
193
208
metadata .put (CQ_TABLE_NAME , table .getName ());
@@ -204,23 +219,21 @@ public static Schema toArrowSchema(Table table) {
204
219
return new Schema (asList (fields ), metadata );
205
220
}
206
221
222
+ private static Field getField (Column column ) {
223
+ Map <String , String > metadata = new HashMap <>();
224
+ metadata .put (CQ_EXTENSION_UNIQUE , Boolean .toString (column .isUnique ()));
225
+ metadata .put (CQ_EXTENSION_PRIMARY_KEY , Boolean .toString (column .isPrimaryKey ()));
226
+ metadata .put (CQ_EXTENSION_INCREMENTAL , Boolean .toString (column .isIncrementalKey ()));
227
+ return new Field (
228
+ column .getName (),
229
+ new FieldType (!column .isNotNull (), column .getType (), null , metadata ),
230
+ null );
231
+ }
232
+
207
233
public static Table fromArrowSchema (Schema schema ) {
208
234
List <Column > columns = new ArrayList <>();
209
235
for (Field field : schema .getFields ()) {
210
- boolean isUnique = Objects .equals (field .getMetadata ().get (CQ_EXTENSION_UNIQUE ), "true" );
211
- boolean isPrimaryKey =
212
- Objects .equals (field .getMetadata ().get (CQ_EXTENSION_PRIMARY_KEY ), "true" );
213
- boolean isIncrementalKey =
214
- Objects .equals (field .getMetadata ().get (CQ_EXTENSION_INCREMENTAL ), "true" );
215
-
216
- columns .add (
217
- Column .builder ()
218
- .name (field .getName ())
219
- .unique (isUnique )
220
- .primaryKey (isPrimaryKey )
221
- .incrementalKey (isIncrementalKey )
222
- .type (field .getType ())
223
- .build ());
236
+ columns .add (getColumn (field ));
224
237
}
225
238
226
239
Map <String , String > metaData = schema .getCustomMetadata ();
@@ -244,6 +257,40 @@ public static Table fromArrowSchema(Schema schema) {
244
257
return tableBuilder .build ();
245
258
}
246
259
260
+ private static Column getColumn (Field field ) {
261
+ boolean isUnique = Objects .equals (field .getMetadata ().get (CQ_EXTENSION_UNIQUE ), "true" );
262
+ boolean isPrimaryKey =
263
+ Objects .equals (field .getMetadata ().get (CQ_EXTENSION_PRIMARY_KEY ), "true" );
264
+ boolean isIncrementalKey =
265
+ Objects .equals (field .getMetadata ().get (CQ_EXTENSION_INCREMENTAL ), "true" );
266
+
267
+ ArrowType fieldType = field .getType ();
268
+ String extensionName =
269
+ field .getMetadata ().get (ArrowType .ExtensionType .EXTENSION_METADATA_KEY_NAME );
270
+ String extensionMetadata =
271
+ field .getMetadata ().get (ArrowType .ExtensionType .EXTENSION_METADATA_KEY_METADATA );
272
+
273
+ // We need to scan our extension types manually because of
274
+ // https://github.com/apache/arrow/issues/38891
275
+ if (JSONType .EXTENSION_NAME .equals (extensionName )
276
+ && JSONType .INSTANCE .serialize ().equals (extensionMetadata )
277
+ && JSONType .INSTANCE .storageType ().equals (fieldType )) {
278
+ fieldType = JSONType .INSTANCE ;
279
+ } else if (UUIDType .EXTENSION_NAME .equals (extensionName )
280
+ && UUIDType .INSTANCE .serialize ().equals (extensionMetadata )
281
+ && UUIDType .INSTANCE .storageType ().equals (fieldType )) {
282
+ fieldType = UUIDType .INSTANCE ;
283
+ }
284
+
285
+ return Column .builder ()
286
+ .name (field .getName ())
287
+ .unique (isUnique )
288
+ .primaryKey (isPrimaryKey )
289
+ .incrementalKey (isIncrementalKey )
290
+ .type (fieldType )
291
+ .build ();
292
+ }
293
+
247
294
public static ByteString encode (Resource resource ) throws IOException {
248
295
try (BufferAllocator bufferAllocator = new RootAllocator ()) {
249
296
Table table = resource .getTable ();
0 commit comments