Skip to content

Commit 0a470b7

Browse files
authored
feat: Implement concurrency and relations resolving (#91)
1 parent 1fe0c7f commit 0a470b7

File tree

7 files changed

+142
-54
lines changed

7 files changed

+142
-54
lines changed

Diff for: lib/src/main/java/io/cloudquery/memdb/MemDB.java

+21
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,27 @@ public void resolve(
5454
}
5555
})
5656
.transform(TransformWithClass.builder(Table2Data.class).pkField("id").build())
57+
.relations(
58+
List.of(
59+
Table.builder()
60+
.name("table2_child")
61+
.resolver(
62+
new TableResolver() {
63+
64+
@Override
65+
public void resolve(
66+
ClientMeta clientMeta,
67+
Resource parent,
68+
TableOutputStream stream) {
69+
String parentName = parent.get("name").toString();
70+
stream.write(
71+
Table2ChildData.builder().name(parentName + "_name1").build());
72+
stream.write(
73+
Table2ChildData.builder().name(parentName + "_name2").build());
74+
}
75+
})
76+
.transform(TransformWithClass.builder(Table2ChildData.class).build())
77+
.build()))
5778
.build());
5879
}
5980

Diff for: lib/src/main/java/io/cloudquery/memdb/Spec.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ public static Spec fromJSON(String json) throws JsonMappingException, JsonProces
1313
ObjectMapper objectMapper = new ObjectMapper();
1414
Spec spec = objectMapper.readValue(json, Spec.class);
1515
if (spec.getConcurrency() == 0) {
16-
spec.setConcurrency(10000);
16+
spec.setConcurrency(100);
1717
}
1818
return spec;
1919
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
package io.cloudquery.memdb;
2+
3+
import lombok.Builder;
4+
import lombok.Getter;
5+
6+
@Builder
7+
@Getter
8+
public class Table2ChildData {
9+
private String name;
10+
}

Diff for: lib/src/main/java/io/cloudquery/scheduler/OnResourceResolved.java

-3
This file was deleted.

Diff for: lib/src/main/java/io/cloudquery/scheduler/Scheduler.java

+62-21
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
package io.cloudquery.scheduler;
22

3+
import com.google.protobuf.ByteString;
34
import io.cloudquery.helper.ArrowHelper;
45
import io.cloudquery.plugin.v3.Sync;
56
import io.cloudquery.schema.ClientMeta;
7+
import io.cloudquery.schema.Resource;
68
import io.cloudquery.schema.Table;
79
import io.grpc.stub.StreamObserver;
810
import java.util.List;
11+
import java.util.concurrent.ExecutorService;
12+
import java.util.concurrent.Executors;
13+
import java.util.concurrent.TimeUnit;
914
import lombok.Builder;
1015
import lombok.NonNull;
1116
import org.apache.logging.log4j.Logger;
@@ -20,8 +25,58 @@ public class Scheduler {
2025
private int concurrency;
2126
private boolean deterministicCqId;
2227

23-
public void sync() {
28+
private void resolveTables(List<Table> tables, Resource parent, int concurrency)
29+
throws InterruptedException {
30+
if (tables == null || tables.isEmpty()) {
31+
return;
32+
}
33+
ExecutorService executor = Executors.newFixedThreadPool(Math.min(tables.size(), concurrency));
2434
for (Table table : tables) {
35+
final int nextLevelConcurrency = Math.max(1, concurrency / 2);
36+
executor.submit(
37+
new Runnable() {
38+
@Override
39+
public void run() {
40+
try {
41+
String tableMessage =
42+
parent != null
43+
? "table " + table.getName() + " of parent" + parent.getTable().getName()
44+
: "table " + table.getName();
45+
46+
logger.info("resolving {}", tableMessage);
47+
if (!table.getResolver().isPresent()) {
48+
logger.error("no resolver for {}", tableMessage);
49+
return;
50+
}
51+
52+
SchedulerTableOutputStream schedulerTableOutputStream =
53+
new SchedulerTableOutputStream(table, parent, client, logger);
54+
table.getResolver().get().resolve(client, parent, schedulerTableOutputStream);
55+
56+
for (Resource resource : schedulerTableOutputStream.getResources()) {
57+
ByteString record = resource.encode();
58+
Sync.MessageInsert insert =
59+
Sync.MessageInsert.newBuilder().setRecord(record).build();
60+
Sync.Response response = Sync.Response.newBuilder().setInsert(insert).build();
61+
syncStream.onNext(response);
62+
resolveTables(table.getRelations(), resource, nextLevelConcurrency);
63+
}
64+
65+
logger.info("resolved {}", tableMessage);
66+
} catch (Exception e) {
67+
logger.error("Failed to resolve table: {}", table.getName(), e);
68+
syncStream.onError(e);
69+
return;
70+
}
71+
}
72+
});
73+
}
74+
executor.shutdown();
75+
executor.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
76+
}
77+
78+
public void sync() {
79+
for (Table table : Table.flattenTables(tables)) {
2580
try {
2681
logger.info("sending migrate message for table: {}", table.getName());
2782
Sync.MessageMigrateTable migrateTable =
@@ -34,26 +89,12 @@ public void sync() {
3489
}
3590
}
3691

37-
for (Table table : tables) {
38-
try {
39-
logger.info("resolving table: {}", table.getName());
40-
if (!table.getResolver().isPresent()) {
41-
logger.error("no resolver for table: {}", table.getName());
42-
continue;
43-
}
44-
SchedulerTableOutputStream schedulerTableOutputStream =
45-
SchedulerTableOutputStream.builder()
46-
.table(table)
47-
.client(client)
48-
.logger(logger)
49-
.syncStream(syncStream)
50-
.build();
51-
table.getResolver().get().resolve(client, null, schedulerTableOutputStream);
52-
logger.info("resolved table: {}", table.getName());
53-
} catch (Exception e) {
54-
syncStream.onError(e);
55-
return;
56-
}
92+
try {
93+
resolveTables(this.tables, null, this.concurrency);
94+
} catch (InterruptedException e) {
95+
logger.error("Failed to resolve tables", e);
96+
syncStream.onError(e);
97+
return;
5798
}
5899

59100
syncStream.onCompleted();
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,71 @@
11
package io.cloudquery.scheduler;
22

3-
import com.google.protobuf.ByteString;
43
import io.cloudquery.plugin.TableOutputStream;
5-
import io.cloudquery.plugin.v3.Sync;
64
import io.cloudquery.schema.ClientMeta;
75
import io.cloudquery.schema.Column;
86
import io.cloudquery.schema.Resource;
97
import io.cloudquery.schema.Table;
108
import io.cloudquery.transformers.TransformerException;
11-
import io.grpc.stub.StreamObserver;
12-
import lombok.Builder;
9+
import java.util.ArrayList;
10+
import java.util.List;
11+
import java.util.concurrent.ExecutorService;
12+
import java.util.concurrent.Executors;
13+
import java.util.concurrent.TimeUnit;
1314
import lombok.NonNull;
1415
import org.apache.logging.log4j.Logger;
1516

16-
@Builder
1717
public class SchedulerTableOutputStream implements TableOutputStream {
18+
private static final int RESOURCE_RESOLVE_CONCURRENCY = 100;
19+
private static final int RESOURCE_RESOLVE_TIMEOUT_MINUTES = 10;
1820
@NonNull private final Table table;
1921
private final Resource parent;
2022
@NonNull private final ClientMeta client;
2123
@NonNull private final Logger logger;
22-
@NonNull private final StreamObserver<io.cloudquery.plugin.v3.Sync.Response> syncStream;
24+
25+
private List<Resource> resources = new ArrayList<Resource>();
26+
27+
private ExecutorService executor;
28+
29+
public SchedulerTableOutputStream(
30+
@NonNull Table table, Resource parent, @NonNull ClientMeta client, @NonNull Logger logger) {
31+
this.table = table;
32+
this.parent = parent;
33+
this.client = client;
34+
this.logger = logger;
35+
this.executor = Executors.newFixedThreadPool(RESOURCE_RESOLVE_CONCURRENCY);
36+
}
2337

2438
@Override
2539
public void write(Object data) {
2640
Resource resource = Resource.builder().table(table).parent(parent).item(data).build();
2741
for (Column column : table.getColumns()) {
28-
try {
29-
logger.info("resolving column: {}", column.getName());
30-
if (column.getResolver() == null) {
31-
logger.error("no resolver for column: {}", column.getName());
32-
continue;
33-
}
34-
column.getResolver().resolve(client, resource, column);
35-
logger.info("resolved column: {}", column.getName());
36-
} catch (TransformerException e) {
37-
logger.error("Failed to resolve column: {}", column.getName(), e);
38-
return;
39-
}
42+
executor.submit(
43+
new Runnable() {
44+
@Override
45+
public void run() {
46+
try {
47+
logger.debug("resolving column: {}", column.getName());
48+
if (column.getResolver() == null) {
49+
logger.error("no resolver for column: {}", column.getName());
50+
return;
51+
}
52+
column.getResolver().resolve(client, resource, column);
53+
logger.debug("resolved column: {}", column.getName());
54+
return;
55+
} catch (TransformerException e) {
56+
logger.error("Failed to resolve column: {}", column.getName(), e);
57+
return;
58+
}
59+
}
60+
});
4061
}
62+
resources.add(resource);
63+
}
4164

42-
try {
43-
ByteString record = resource.encode();
44-
Sync.MessageInsert insert = Sync.MessageInsert.newBuilder().setRecord(record).build();
45-
Sync.Response response = Sync.Response.newBuilder().setInsert(insert).build();
46-
syncStream.onNext(response);
47-
} catch (Exception e) {
48-
logger.error("Failed to encode resource: {}", resource, e);
49-
return;
50-
}
65+
public List<Resource> getResources() throws InterruptedException {
66+
// TODO: Optimize this to not wait for all resources to complete
67+
executor.shutdown();
68+
executor.awaitTermination(RESOURCE_RESOLVE_TIMEOUT_MINUTES, TimeUnit.MINUTES);
69+
return this.resources;
5170
}
5271
}

Diff for: lib/src/main/java/io/cloudquery/schema/Table.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import io.cloudquery.transformers.TransformerException;
1010
import java.util.ArrayList;
1111
import java.util.Collections;
12-
import java.util.HashMap;
12+
import java.util.LinkedHashMap;
1313
import java.util.List;
1414
import java.util.Map;
1515
import java.util.Optional;
@@ -28,7 +28,7 @@ public interface Transform {
2828
}
2929

3030
public static List<Table> flattenTables(List<Table> tables) {
31-
Map<String, Table> flattenMap = new HashMap<>();
31+
Map<String, Table> flattenMap = new LinkedHashMap<>();
3232
for (Table table : tables) {
3333
Table newTable = table.toBuilder().relations(Collections.emptyList()).build();
3434
flattenMap.put(newTable.name, newTable);

0 commit comments

Comments
 (0)