From bd6655c91e4c49c42b71ebb5f4d96c8aadefefdb Mon Sep 17 00:00:00 2001 From: Martin Norbury Date: Wed, 9 Aug 2023 09:59:43 +0100 Subject: [PATCH] Adding Table filterDFS functionaility Implementing the same tests as we have in the Go [plugin-sdk](https://github.com/cloudquery/plugin-sdk/blob/main/schema/table_test.go#L58-L276) supporting filtering tables by including and skipping tables based on glob patterns. --- lib/build.gradle | 2 + .../io/cloudquery/helper/GlobMatcher.java | 23 ++ .../io/cloudquery/schema/SchemaException.java | 22 ++ .../main/java/io/cloudquery/schema/Table.java | 91 ++++++++ .../io/cloudquery/helper/GlobMatcherTest.java | 41 ++++ .../cloudquery/schema/TableFilterDFSTest.java | 199 ++++++++++++++++++ .../{TableTest.java => TableFlattenTest.java} | 30 +-- .../io/cloudquery/schema/TableMaxTest.java | 26 +++ 8 files changed, 412 insertions(+), 22 deletions(-) create mode 100644 lib/src/main/java/io/cloudquery/helper/GlobMatcher.java create mode 100644 lib/src/main/java/io/cloudquery/schema/SchemaException.java create mode 100644 lib/src/test/java/io/cloudquery/helper/GlobMatcherTest.java create mode 100644 lib/src/test/java/io/cloudquery/schema/TableFilterDFSTest.java rename lib/src/test/java/io/cloudquery/schema/{TableTest.java => TableFlattenTest.java} (51%) create mode 100644 lib/src/test/java/io/cloudquery/schema/TableMaxTest.java diff --git a/lib/build.gradle b/lib/build.gradle index ecf8598..d0ddfd2 100644 --- a/lib/build.gradle +++ b/lib/build.gradle @@ -34,6 +34,8 @@ dependencies { implementation "io.grpc:grpc-testing:1.57.1" implementation "io.cloudquery:plugin-pb-java:0.0.5" implementation "org.apache.arrow:arrow-vector:12.0.1" + + testImplementation 'org.assertj:assertj-core:3.24.2' } testing { diff --git a/lib/src/main/java/io/cloudquery/helper/GlobMatcher.java b/lib/src/main/java/io/cloudquery/helper/GlobMatcher.java new file mode 100644 index 0000000..36ecad4 --- /dev/null +++ b/lib/src/main/java/io/cloudquery/helper/GlobMatcher.java @@ -0,0 +1,23 @@ +package io.cloudquery.helper; + +import lombok.Getter; + +import java.nio.file.FileSystems; +import java.nio.file.Path; +import java.nio.file.PathMatcher; + +public class GlobMatcher { + private final PathMatcher pathMatcher; + + @Getter + private final String stringMatch; + + public GlobMatcher(String stringMatch) { + this.stringMatch = stringMatch; + this.pathMatcher = FileSystems.getDefault().getPathMatcher("glob:" + stringMatch); + } + + public boolean matches(String name) { + return pathMatcher.matches(Path.of(name)); + } +} diff --git a/lib/src/main/java/io/cloudquery/schema/SchemaException.java b/lib/src/main/java/io/cloudquery/schema/SchemaException.java new file mode 100644 index 0000000..c56fb1a --- /dev/null +++ b/lib/src/main/java/io/cloudquery/schema/SchemaException.java @@ -0,0 +1,22 @@ +package io.cloudquery.schema; + +public class SchemaException extends Exception { + public SchemaException() { + } + + public SchemaException(String message) { + super(message); + } + + public SchemaException(String message, Throwable cause) { + super(message, cause); + } + + public SchemaException(Throwable cause) { + super(cause); + } + + public SchemaException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } +} diff --git a/lib/src/main/java/io/cloudquery/schema/Table.java b/lib/src/main/java/io/cloudquery/schema/Table.java index 78bbe18..d2c6fac 100644 --- a/lib/src/main/java/io/cloudquery/schema/Table.java +++ b/lib/src/main/java/io/cloudquery/schema/Table.java @@ -1,12 +1,16 @@ package io.cloudquery.schema; +import io.cloudquery.helper.GlobMatcher; import lombok.Builder; import lombok.Getter; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.function.Predicate; @Builder(toBuilder = true) @Getter @@ -23,6 +27,67 @@ public static List flattenTables(List
tables) { return flattenMap.values().stream().toList(); } + public static List
filterDFS(List
tables, List includeConfiguration, List skipConfiguration, boolean skipDependentTables) throws SchemaException { + List includes = includeConfiguration.stream().map(GlobMatcher::new).toList(); + List excludes = skipConfiguration.stream().map(GlobMatcher::new).toList(); + + List
flattenedTables = flattenTables(tables); + for (GlobMatcher includeMatcher : includes) { + boolean includeMatch = false; + for (Table table : flattenedTables) { + if (includeMatcher.matches(table.getName())) { + includeMatch = true; + break; + } + } + if (!includeMatch) { + throw new SchemaException("table configuration includes a pattern \"" + includeMatcher.getStringMatch() + "\" with no matches"); + } + } + for (GlobMatcher excludeMatcher : excludes) { + boolean excludeMatch = false; + for (Table table : flattenedTables) { + if (excludeMatcher.matches(table.getName())) { + excludeMatch = true; + break; + } + } + if (!excludeMatch) { + throw new SchemaException("skip configuration includes a pattern \"" + excludeMatcher.getStringMatch() + "\" with no matches"); + } + } + + Predicate
include = table -> { + for (GlobMatcher matcher : includes) { + if (matcher.matches(table.getName())) { + return true; + } + } + return false; + }; + + Predicate
exclude = table -> { + for (GlobMatcher matcher : excludes) { + if (matcher.matches(table.getName())) { + return true; + } + } + return false; + }; + + return filterDFSFunc(tables, include, exclude, skipDependentTables); + } + + private static List
filterDFSFunc(List
tables, Predicate
include, Predicate
exclude, boolean skipDependentTables) { + List
filteredTables = new ArrayList<>(); + for (Table table : tables) { + Table filteredTable = table.toBuilder().parent(null).build(); + Optional
optionalFilteredTable = filteredTable.filterDfs(false, include, exclude, skipDependentTables); + optionalFilteredTable.ifPresent(filteredTables::add); + } + return filteredTables; + } + public static int maxDepth(List
tables) { int depth = 0; if (tables.isEmpty()) { @@ -39,6 +104,32 @@ public static int maxDepth(List
tables) { private String name; + private Table parent; + @Builder.Default private List
relations = Collections.emptyList(); + + private Optional
filterDfs(boolean parentMatched, Predicate
include, Predicate
exclude, boolean skipDependentTables) { + if (exclude.test(this)) { + return Optional.empty(); + } + boolean matched = parentMatched && !skipDependentTables; + if (include.test(this)) { + matched = true; + } + List
filteredRelations = new ArrayList<>(); + for (Table relation : relations) { + Optional
filteredChild = relation.filterDfs(matched, include, exclude, skipDependentTables); + if (filteredChild.isPresent()) { + matched = true; + filteredRelations.add(filteredChild.get()); + } + } + this.relations = filteredRelations; + if (matched) { + return Optional.of(this); + } + return Optional.empty(); + } + } diff --git a/lib/src/test/java/io/cloudquery/helper/GlobMatcherTest.java b/lib/src/test/java/io/cloudquery/helper/GlobMatcherTest.java new file mode 100644 index 0000000..e62c622 --- /dev/null +++ b/lib/src/test/java/io/cloudquery/helper/GlobMatcherTest.java @@ -0,0 +1,41 @@ +package io.cloudquery.helper; + +import org.junit.Test; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class GlobMatcherTest { + @Test + public void shouldMatchWildcard() { + GlobMatcher globMatcher = new GlobMatcher("*"); + + assertTrue(globMatcher.matches("aws_ec2_vpc")); + assertTrue(globMatcher.matches("aws_ec2_eip")); + assertTrue(globMatcher.matches("aws_ec2_instance")); + } + + @Test + public void shouldMatchWildcardSuffix() { + GlobMatcher globMatcher = new GlobMatcher("aws_*"); + + assertTrue(globMatcher.matches("aws_ec2_vpc")); + assertTrue(globMatcher.matches("aws_ec2_eip")); + assertTrue(globMatcher.matches("aws_ec2_instance")); + + assertFalse(globMatcher.matches("gcp_project")); + assertFalse(globMatcher.matches("other_aws_resource")); + } + + @Test + public void shouldMatchWildcardPrefixAndSuffix() { + GlobMatcher globMatcher = new GlobMatcher("*ec2*"); + + assertTrue(globMatcher.matches("aws_ec2_vpc")); + assertTrue(globMatcher.matches("aws_ec2_eip")); + assertTrue(globMatcher.matches("aws_ec2_instance")); + + assertFalse(globMatcher.matches("gcp_project")); + assertFalse(globMatcher.matches("other_aws_resource")); + } +} \ No newline at end of file diff --git a/lib/src/test/java/io/cloudquery/schema/TableFilterDFSTest.java b/lib/src/test/java/io/cloudquery/schema/TableFilterDFSTest.java new file mode 100644 index 0000000..afbe7cc --- /dev/null +++ b/lib/src/test/java/io/cloudquery/schema/TableFilterDFSTest.java @@ -0,0 +1,199 @@ +package io.cloudquery.schema; + +import org.junit.Test; + +import java.util.Collections; +import java.util.List; +import java.util.stream.Stream; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; + +public class TableFilterDFSTest { + public static final List
BASIC_TABLES = Stream.of("table1", "table2", "table3").map( + name -> Table.builder().name(name).build() + ).toList(); + + public static final List
NESTED_TABLE = List.of( + Table.builder().name("main_table").relations( + List.of( + Table.builder().name("sub_table").relations( + List.of( + Table.builder().name("sub_sub_table").build() + ) + ).build() + ) + ).build() + ); + + public static final List EMPTY_CONFIGURATION = Collections.emptyList(); + + @Test + public void shouldReturnAllTables() throws SchemaException { + List includeConfiguration = List.of("*"); + + List
filteredTables = Table.filterDFS(BASIC_TABLES, includeConfiguration, EMPTY_CONFIGURATION, false); + + assertThat(extractTableNames(filteredTables)).containsOnly("table1", "table2", "table3"); + } + + @Test + public void shouldFilterTables() throws SchemaException { + List includeConfiguration = List.of("*"); + List skipConfiguration = List.of("table1", "table3"); + + List
filteredTables = Table.filterDFS(BASIC_TABLES, includeConfiguration, skipConfiguration, false); + + assertThat(extractTableNames(filteredTables)).containsOnly("table2"); + } + + @Test + public void shouldFilterSpecificTableWhenProvided() throws SchemaException { + List includeConfiguration = List.of("table2"); + + List
filteredTables = Table.filterDFS(BASIC_TABLES, includeConfiguration, EMPTY_CONFIGURATION, false); + + assertThat(extractTableNames(filteredTables)).containsOnly("table2"); + } + + @Test + public void shouldFilterTablesMatchingGlobPattern() throws SchemaException { + List includeConfiguration = List.of("table*"); + List skipConfiguration = List.of("table2", "table3"); + + List
filteredTables = Table.filterDFS(BASIC_TABLES, includeConfiguration, skipConfiguration, false); + + assertThat(extractTableNames(filteredTables)).containsOnly("table1"); + } + + @Test + public void shouldReturnTableOnlyOnceEvenIfMatchedByMultiplePatterns() throws SchemaException { + List includeConfiguration = List.of("*", "table1", "table*", "table2"); + + List
filteredTables = Table.filterDFS(BASIC_TABLES, includeConfiguration, EMPTY_CONFIGURATION, false); + + assertThat(extractTableNames(filteredTables)).containsOnly("table1", "table2", "table3"); + } + + @Test + public void shouldMatchPrefixGlobs() throws SchemaException { + List includeConfiguration = List.of("*2"); + + List
filteredTables = Table.filterDFS(BASIC_TABLES, includeConfiguration, EMPTY_CONFIGURATION, false); + + assertThat(extractTableNames(filteredTables)).containsOnly("table2"); + } + + @Test + public void shouldMatchSuffixGlobs() throws SchemaException { + List includeConfiguration = List.of("table*"); + + List
filteredTables = Table.filterDFS(BASIC_TABLES, includeConfiguration, EMPTY_CONFIGURATION, false); + + assertThat(extractTableNames(filteredTables)).containsOnly("table1", "table2", "table3"); + } + + @Test + public void shouldSkipGlobs() throws SchemaException { + List includeConfiguration = List.of("*"); + List skipConfiguration = List.of("t*1"); + + List
filteredTables = Table.filterDFS(BASIC_TABLES, includeConfiguration, skipConfiguration, false); + + assertThat(extractTableNames(filteredTables)).containsOnly("table2", "table3"); + } + + @Test + public void shouldReturnTheParentAndAllDescendants() throws SchemaException { + List includeConfiguration = List.of("main_table"); + + List
filteredTables = Table.filterDFS(NESTED_TABLE, includeConfiguration, EMPTY_CONFIGURATION, false); + + assertThat(extractTableNames(filteredTables)).containsOnly("main_table", "sub_sub_table", "sub_table"); + } + + @Test + public void shouldThrowExceptionIfNoIncludeMatches() { + String tableMatch = "bad_match"; + List includeConfiguration = List.of(tableMatch); + + SchemaException schemaException = assertThrows(SchemaException.class, () -> Table.filterDFS(NESTED_TABLE, includeConfiguration, EMPTY_CONFIGURATION, false)); + assertEquals("table configuration includes a pattern \"" + tableMatch + "\" with no matches", schemaException.getMessage()); + } + + @Test + public void shouldThrowExceptionIfNoExcludeMatches() { + String tableMatch = "bad_match"; + List includeConfiguration = List.of("*"); + List skipConfiguration = List.of(tableMatch); + + SchemaException schemaException = assertThrows(SchemaException.class, () -> Table.filterDFS(NESTED_TABLE, includeConfiguration, skipConfiguration, false)); + assertEquals("skip configuration includes a pattern \"" + tableMatch + "\" with no matches", schemaException.getMessage()); + } + + @Test + public void shouldSkipChildTableButReturnSiblings() throws SchemaException { + List
tables = List.of( + Table.builder().name("main_table").relations( + List.of( + Table.builder().name("sub_table_1").parent(Table.builder().name("main_table").build()).build(), + Table.builder().name("sub_table_2").parent(Table.builder().name("main_table").build()).build() + ) + ).build() + ); + + List includeTables = List.of("main_table"); + List skipTables = List.of("sub_table_2"); + + List
filteredTables = Table.filterDFS(tables, includeTables, skipTables, false); + + assertThat(extractTableNames(filteredTables)).containsOnly("main_table", "sub_table_1"); + } + + @Test + public void shouldSkipChildTablesIfSkipDependentTrue() throws SchemaException { + List
tables = List.of( + Table.builder().name("main_table").relations( + List.of( + Table.builder().name("sub_table_1").parent(Table.builder().name("main_table").build()).build(), + Table.builder().name("sub_table_2").parent(Table.builder().name("main_table").build()).build() + ) + ).build() + ); + + List includeTables = List.of("main_table"); + List skipTables = List.of("sub_table_2"); + + List
filteredTables = Table.filterDFS(tables, includeTables, skipTables, true); + + assertThat(extractTableNames(filteredTables)).containsOnly("main_table"); + } + + @Test + public void shouldSkipChildTablesIfSkipDependentTablesIsTrueButNotIfExplicitlyIncluded() throws SchemaException { + List
tables = List.of( + Table.builder().name("main_table_1").relations( + List.of( + Table.builder().name("sub_table_1").parent(Table.builder().name("main_table_1").build()).build() + ) + ).build(), + Table.builder().name("main_table_2").relations( + List.of( + Table.builder().name("sub_table_2").parent(Table.builder().name("main_table_2").build()).build(), + Table.builder().name("sub_table_3").parent(Table.builder().name("main_table_2").build()).build() + ) + ).build() + ); + + List includeTables = List.of("main_table_1", "sub_table_2"); + + List
filteredTables = Table.filterDFS(tables, includeTables, EMPTY_CONFIGURATION, true); + + assertThat(extractTableNames(filteredTables)).containsOnly("main_table_1", "main_table_2", "sub_table_2"); + } + + private List extractTableNames(List
filteredTables) { + return Table.flattenTables(filteredTables).stream().map(Table::getName).toList(); + } +} diff --git a/lib/src/test/java/io/cloudquery/schema/TableTest.java b/lib/src/test/java/io/cloudquery/schema/TableFlattenTest.java similarity index 51% rename from lib/src/test/java/io/cloudquery/schema/TableTest.java rename to lib/src/test/java/io/cloudquery/schema/TableFlattenTest.java index 1ffb420..f402a52 100644 --- a/lib/src/test/java/io/cloudquery/schema/TableTest.java +++ b/lib/src/test/java/io/cloudquery/schema/TableFlattenTest.java @@ -1,27 +1,20 @@ package io.cloudquery.schema; -import org.junit.Before; import org.junit.Test; -import java.util.Collections; import java.util.List; import static org.junit.Assert.assertEquals; -public class TableTest { +public class TableFlattenTest { - public Table testTable; - - @Before - public void setUp() { - testTable = Table.builder(). - name("test"). - relations(List.of( - Table.builder().name("test2").build(), - Table.builder().name("test3").build(), - Table.builder().name("test4").build() - )).build(); - } + public Table testTable = Table.builder(). + name("test"). + relations(List.of( + Table.builder().name("test2").build(), + Table.builder().name("test3").build(), + Table.builder().name("test4").build() + )).build(); @Test public void shouldFlattenTables() { @@ -42,11 +35,4 @@ public void shouldFlattenTablesWithDuplicates() { assertEquals(3, testTable.getRelations().size()); assertEquals(4, flattenedTables.size()); } - - @Test - public void shouldReturnMaxDepth() { - assertEquals(0, Table.maxDepth(Collections.emptyList())); - assertEquals(2, Table.maxDepth(List.of(testTable))); - assertEquals(3, Table.maxDepth(List.of(testTable.toBuilder().relations(List.of(testTable)).build()))); - } } diff --git a/lib/src/test/java/io/cloudquery/schema/TableMaxTest.java b/lib/src/test/java/io/cloudquery/schema/TableMaxTest.java new file mode 100644 index 0000000..2ba40d4 --- /dev/null +++ b/lib/src/test/java/io/cloudquery/schema/TableMaxTest.java @@ -0,0 +1,26 @@ +package io.cloudquery.schema; + +import org.junit.Test; + +import java.util.Collections; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +public class TableMaxTest { + + public Table testTable = Table.builder(). + name("test"). + relations(List.of( + Table.builder().name("test2").build(), + Table.builder().name("test3").build(), + Table.builder().name("test4").build() + )).build(); + + @Test + public void shouldReturnMaxDepth() { + assertEquals(0, Table.maxDepth(Collections.emptyList())); + assertEquals(2, Table.maxDepth(List.of(testTable))); + assertEquals(3, Table.maxDepth(List.of(testTable.toBuilder().relations(List.of(testTable)).build()))); + } +}