4
4
5
5
import com .google .protobuf .ByteString ;
6
6
import io .cloudquery .schema .Column ;
7
+ import io .cloudquery .schema .Resource ;
7
8
import io .cloudquery .schema .Table ;
8
9
import io .cloudquery .schema .Table .TableBuilder ;
10
+ import io .cloudquery .types .JSONType .JSONVector ;
11
+ import io .cloudquery .types .UUIDType .UUIDVector ;
9
12
import java .io .ByteArrayOutputStream ;
10
13
import java .io .IOException ;
11
14
import java .nio .channels .Channels ;
15
18
import java .util .Map ;
16
19
import org .apache .arrow .memory .BufferAllocator ;
17
20
import org .apache .arrow .memory .RootAllocator ;
21
+ import org .apache .arrow .vector .BigIntVector ;
22
+ import org .apache .arrow .vector .BitVector ;
23
+ import org .apache .arrow .vector .FieldVector ;
24
+ import org .apache .arrow .vector .FixedSizeBinaryVector ;
25
+ import org .apache .arrow .vector .Float4Vector ;
26
+ import org .apache .arrow .vector .Float8Vector ;
27
+ import org .apache .arrow .vector .IntVector ;
28
+ import org .apache .arrow .vector .LargeVarBinaryVector ;
29
+ import org .apache .arrow .vector .LargeVarCharVector ;
30
+ import org .apache .arrow .vector .SmallIntVector ;
31
+ import org .apache .arrow .vector .TimeStampVector ;
32
+ import org .apache .arrow .vector .TinyIntVector ;
33
+ import org .apache .arrow .vector .UInt1Vector ;
34
+ import org .apache .arrow .vector .UInt2Vector ;
35
+ import org .apache .arrow .vector .UInt4Vector ;
36
+ import org .apache .arrow .vector .UInt8Vector ;
37
+ import org .apache .arrow .vector .VarBinaryVector ;
38
+ import org .apache .arrow .vector .VarCharVector ;
18
39
import org .apache .arrow .vector .VectorSchemaRoot ;
19
40
import org .apache .arrow .vector .ipc .ArrowReader ;
20
41
import org .apache .arrow .vector .ipc .ArrowStreamReader ;
21
42
import org .apache .arrow .vector .ipc .ArrowStreamWriter ;
22
43
import org .apache .arrow .vector .types .pojo .Field ;
44
+ import org .apache .arrow .vector .types .pojo .FieldType ;
23
45
import org .apache .arrow .vector .types .pojo .Schema ;
46
+ import org .apache .arrow .vector .util .Text ;
24
47
25
48
public class ArrowHelper {
49
+ public static final String CQ_EXTENSION_INCREMENTAL = "cq:extension:incremental" ;
50
+ public static final String CQ_EXTENSION_CONSTRAINT_NAME = "cq:extension:constraint_name" ;
51
+ public static final String CQ_EXTENSION_PRIMARY_KEY = "cq:extension:primary_key" ;
52
+ public static final String CQ_EXTENSION_UNIQUE = "cq:extension:unique" ;
26
53
public static final String CQ_TABLE_NAME = "cq:table_name" ;
27
54
public static final String CQ_TABLE_TITLE = "cq:table_title" ;
28
55
public static final String CQ_TABLE_DESCRIPTION = "cq:table_description" ;
29
56
public static final String CQ_TABLE_DEPENDS_ON = "cq:table_depends_on" ;
30
57
58
+ private static void setVectorData (FieldVector vector , Object data ) {
59
+ vector .allocateNew ();
60
+ if (vector instanceof BigIntVector ) {
61
+ ((BigIntVector ) vector ).set (0 , (long ) data );
62
+ return ;
63
+ }
64
+ if (vector instanceof BitVector ) {
65
+ ((BitVector ) vector ).set (0 , (int ) data );
66
+ return ;
67
+ }
68
+ if (vector instanceof FixedSizeBinaryVector ) {
69
+ ((FixedSizeBinaryVector ) vector ).set (0 , (byte []) data );
70
+ return ;
71
+ }
72
+ if (vector instanceof Float4Vector ) {
73
+ ((Float4Vector ) vector ).set (0 , (float ) data );
74
+ return ;
75
+ }
76
+ if (vector instanceof Float8Vector ) {
77
+ ((Float8Vector ) vector ).set (0 , (double ) data );
78
+ return ;
79
+ }
80
+ if (vector instanceof IntVector ) {
81
+ ((IntVector ) vector ).set (0 , (int ) data );
82
+ return ;
83
+ }
84
+ if (vector instanceof LargeVarBinaryVector ) {
85
+ ((LargeVarBinaryVector ) vector ).set (0 , (byte []) data );
86
+ return ;
87
+ }
88
+ if (vector instanceof LargeVarCharVector ) {
89
+ ((LargeVarCharVector ) vector ).set (0 , (Text ) data );
90
+ return ;
91
+ }
92
+ if (vector instanceof SmallIntVector ) {
93
+ ((SmallIntVector ) vector ).set (0 , (short ) data );
94
+ return ;
95
+ }
96
+ if (vector instanceof TimeStampVector ) {
97
+ ((TimeStampVector ) vector ).set (0 , (long ) data );
98
+ return ;
99
+ }
100
+ if (vector instanceof TinyIntVector ) {
101
+ ((TinyIntVector ) vector ).set (0 , (byte ) data );
102
+ return ;
103
+ }
104
+ if (vector instanceof UInt1Vector ) {
105
+ ((UInt1Vector ) vector ).set (0 , (byte ) data );
106
+ return ;
107
+ }
108
+ if (vector instanceof UInt2Vector ) {
109
+ ((UInt2Vector ) vector ).set (0 , (short ) data );
110
+ return ;
111
+ }
112
+ if (vector instanceof UInt4Vector ) {
113
+ ((UInt4Vector ) vector ).set (0 , (int ) data );
114
+ return ;
115
+ }
116
+ if (vector instanceof UInt8Vector ) {
117
+ ((UInt8Vector ) vector ).set (0 , (long ) data );
118
+ return ;
119
+ }
120
+ if (vector instanceof VarBinaryVector ) {
121
+ ((VarBinaryVector ) vector ).set (0 , (byte []) data );
122
+ return ;
123
+ }
124
+ if (vector instanceof VarCharVector ) {
125
+ ((VarCharVector ) vector ).set (0 , (Text ) data );
126
+ return ;
127
+ }
128
+ if (vector instanceof UUIDVector ) {
129
+ ((UUIDVector ) vector ).set (0 , (java .util .UUID ) data );
130
+ return ;
131
+ }
132
+ if (vector instanceof JSONVector ) {
133
+ ((JSONVector ) vector ).setSafe (0 , (byte []) data );
134
+ return ;
135
+ }
136
+
137
+ throw new IllegalArgumentException ("Unsupported vector type: " + vector .getClass ());
138
+ }
139
+
31
140
public static ByteString encode (Table table ) throws IOException {
32
141
try (BufferAllocator bufferAllocator = new RootAllocator ()) {
33
142
Schema schema = toArrowSchema (table );
34
- VectorSchemaRoot schemaRoot = VectorSchemaRoot .create (schema , bufferAllocator );
35
- try (ByteArrayOutputStream out = new ByteArrayOutputStream ()) {
36
- try (ArrowStreamWriter writer =
37
- new ArrowStreamWriter (schemaRoot , null , Channels .newChannel (out ))) {
38
- writer .start ();
39
- writer .end ();
40
- return ByteString .copyFrom (out .toByteArray ());
143
+ try (VectorSchemaRoot schemaRoot = VectorSchemaRoot .create (schema , bufferAllocator )) {
144
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream ()) {
145
+ try (ArrowStreamWriter writer =
146
+ new ArrowStreamWriter (schemaRoot , null , Channels .newChannel (out ))) {
147
+ writer .start ();
148
+ writer .end ();
149
+ return ByteString .copyFrom (out .toByteArray ());
150
+ }
41
151
}
42
152
}
43
153
}
@@ -57,7 +167,15 @@ public static Schema toArrowSchema(Table table) {
57
167
Field [] fields = new Field [columns .size ()];
58
168
for (int i = 0 ; i < columns .size (); i ++) {
59
169
Column column = columns .get (i );
60
- Field field = Field .nullable (column .getName (), column .getType ());
170
+ Map <String , String > metadata = new HashMap <>();
171
+ metadata .put (CQ_EXTENSION_UNIQUE , column .isUnique () ? "true" : "false" );
172
+ metadata .put (CQ_EXTENSION_PRIMARY_KEY , column .isPrimaryKey () ? "true" : "false" );
173
+ metadata .put (CQ_EXTENSION_INCREMENTAL , column .isIncrementalKey () ? "true" : "false" );
174
+ Field field =
175
+ new Field (
176
+ column .getName (),
177
+ new FieldType (!column .isNotNull (), column .getType (), null , metadata ),
178
+ null );
61
179
fields [i ] = field ;
62
180
}
63
181
Map <String , String > metadata = new HashMap <>();
@@ -71,22 +189,37 @@ public static Schema toArrowSchema(Table table) {
71
189
if (table .getParent () != null ) {
72
190
metadata .put (CQ_TABLE_DEPENDS_ON , table .getParent ().getName ());
73
191
}
192
+ metadata .put (CQ_EXTENSION_CONSTRAINT_NAME , table .getConstraintName ());
74
193
return new Schema (asList (fields ), metadata );
75
194
}
76
195
77
196
public static Table fromArrowSchema (Schema schema ) {
78
197
List <Column > columns = new ArrayList <>();
79
198
for (Field field : schema .getFields ()) {
80
- columns .add (Column .builder ().name (field .getName ()).type (field .getType ()).build ());
199
+ boolean isUnique = field .getMetadata ().get (CQ_EXTENSION_UNIQUE ) == "true" ;
200
+ boolean isPrimaryKey = field .getMetadata ().get (CQ_EXTENSION_PRIMARY_KEY ) == "true" ;
201
+ boolean isIncrementalKey = field .getMetadata ().get (CQ_EXTENSION_INCREMENTAL ) == "true" ;
202
+
203
+ columns .add (
204
+ Column .builder ()
205
+ .name (field .getName ())
206
+ .unique (isUnique )
207
+ .primaryKey (isPrimaryKey )
208
+ .incrementalKey (isIncrementalKey )
209
+ .type (field .getType ())
210
+ .build ());
81
211
}
82
212
83
213
Map <String , String > metaData = schema .getCustomMetadata ();
84
214
String name = metaData .get (CQ_TABLE_NAME );
85
215
String title = metaData .get (CQ_TABLE_TITLE );
86
216
String description = metaData .get (CQ_TABLE_DESCRIPTION );
87
217
String parent = metaData .get (CQ_TABLE_DEPENDS_ON );
218
+ String constraintName = metaData .get (CQ_EXTENSION_CONSTRAINT_NAME );
219
+
220
+ TableBuilder tableBuilder =
221
+ Table .builder ().name (name ).constraintName (constraintName ).columns (columns );
88
222
89
- TableBuilder tableBuilder = Table .builder ().name (name ).columns (columns );
90
223
if (title != null ) {
91
224
tableBuilder .title (title );
92
225
}
@@ -99,4 +232,29 @@ public static Table fromArrowSchema(Schema schema) {
99
232
100
233
return tableBuilder .build ();
101
234
}
235
+
236
+ public static ByteString encode (Resource resource ) throws IOException {
237
+ try (BufferAllocator bufferAllocator = new RootAllocator ()) {
238
+ Table table = resource .getTable ();
239
+ Schema schema = toArrowSchema (table );
240
+ try (VectorSchemaRoot vectorRoot = VectorSchemaRoot .create (schema , bufferAllocator )) {
241
+ for (int i = 0 ; i < table .getColumns ().size (); i ++) {
242
+ FieldVector vector = vectorRoot .getVector (i );
243
+ Object data = resource .getData ().get (i ).get ();
244
+ setVectorData (vector , data );
245
+ }
246
+ // TODO: Support encoding multiple resources
247
+ vectorRoot .setRowCount (1 );
248
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream ()) {
249
+ try (ArrowStreamWriter writer =
250
+ new ArrowStreamWriter (vectorRoot , null , Channels .newChannel (out ))) {
251
+ writer .start ();
252
+ writer .writeBatch ();
253
+ writer .end ();
254
+ return ByteString .copyFrom (out .toByteArray ());
255
+ }
256
+ }
257
+ }
258
+ }
259
+ }
102
260
}
0 commit comments