diff --git a/docs/reference/aggregations/bucket/terms-aggregation.asciidoc b/docs/reference/aggregations/bucket/terms-aggregation.asciidoc index 31d552843e33a..fe2a700be92d6 100644 --- a/docs/reference/aggregations/bucket/terms-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/terms-aggregation.asciidoc @@ -599,6 +599,8 @@ expire then we may be missing accounts of interest and have set our numbers too Ultimately this is a balancing act between managing the Elasticsearch resources required to process a single request and the volume of requests that the client application must issue to complete a task. +WARNING: Partitions cannot be used together with an `exclude` parameter. + ==== Multi-field terms aggregation The `terms` aggregation does not support collecting terms from multiple fields diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/IncludeExclude.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/IncludeExclude.java index 30653f04a355a..eb63ff5c433df 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/IncludeExclude.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/IncludeExclude.java @@ -36,6 +36,7 @@ import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.RegExp; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.Version; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -78,17 +79,8 @@ public static IncludeExclude merge(IncludeExclude include, IncludeExclude exclud if (include.isPartitionBased()) { throw new IllegalArgumentException("Cannot specify any excludes when using a partition-based include"); } - String includeMethod = include.isRegexBased() ? "regex" : "set"; - String excludeMethod = exclude.isRegexBased() ? "regex" : "set"; - if (includeMethod.equals(excludeMethod) == false) { - throw new IllegalArgumentException("Cannot mix a " + includeMethod + "-based include with a " - + excludeMethod + "-based method"); - } - if (include.isRegexBased()) { - return new IncludeExclude(include.include, exclude.exclude); - } else { - return new IncludeExclude(include.includeValues, exclude.excludeValues); - } + + return new IncludeExclude(include.include, exclude.exclude, include.includeValues, exclude.excludeValues); } public static IncludeExclude parseInclude(XContentParser parser) throws IOException { @@ -196,46 +188,39 @@ public boolean accept(BytesRef value) { } } - static class AutomatonBackedStringFilter extends StringFilter { + class SetAndRegexStringFilter extends StringFilter { private final ByteRunAutomaton runAutomaton; - - private AutomatonBackedStringFilter(Automaton automaton) { - this.runAutomaton = new ByteRunAutomaton(automaton); - } - - /** - * Returns whether the given value is accepted based on the {@code include} & {@code exclude} patterns. - */ - @Override - public boolean accept(BytesRef value) { - return runAutomaton.run(value.bytes, value.offset, value.length); - } - } - - static class TermListBackedStringFilter extends StringFilter { - private final Set valids; private final Set invalids; - TermListBackedStringFilter(Set includeValues, Set excludeValues) { - this.valids = includeValues; - this.invalids = excludeValues; + private SetAndRegexStringFilter(DocValueFormat format) { + Automaton automaton = toAutomaton(); + this.runAutomaton = automaton == null ? null : new ByteRunAutomaton(automaton); + this.valids = parseForDocValues(includeValues, format); + this.invalids = parseForDocValues(excludeValues, format); } /** - * Returns whether the given value is accepted based on the - * {@code include} & {@code exclude} sets. + * Returns whether the given value is accepted based on the {@code includeValues} & {@code excludeValues} + * sets, as well as the {@code include} & {@code exclude} patterns. */ @Override public boolean accept(BytesRef value) { - return ((valids == null) || (valids.contains(value))) && ((invalids == null) || (!invalids.contains(value))); + if (valids != null && valids.contains(value) == false) { + return false; + } + + if (runAutomaton != null && runAutomaton.run(value.bytes, value.offset, value.length) == false) { + return false; + } + + return invalids == null || invalids.contains(value) == false; } } public abstract static class OrdinalsFilter extends Filter { public abstract LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException; - } class PartitionedOrdinalsFilter extends OrdinalsFilter { @@ -258,59 +243,64 @@ public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) thro } } - static class AutomatonBackedOrdinalsFilter extends OrdinalsFilter { + class SetAndRegexOrdinalsFilter extends OrdinalsFilter { private final CompiledAutomaton compiled; + private final SortedSet valids; + private final SortedSet invalids; - private AutomatonBackedOrdinalsFilter(Automaton automaton) { - this.compiled = new CompiledAutomaton(automaton); + private SetAndRegexOrdinalsFilter(DocValueFormat format) { + Automaton automaton = toAutomaton(); + this.compiled = automaton == null ? null : new CompiledAutomaton(automaton); + this.valids = parseForDocValues(includeValues, format); + this.invalids = parseForDocValues(excludeValues, format); } /** - * Computes which global ordinals are accepted by this IncludeExclude instance. - * + * Computes which global ordinals are accepted by this IncludeExclude instance, based on the combination of + * the {@code includeValues} & {@code excludeValues} sets, as well as the {@code include} & + * {@code exclude} patterns. */ @Override public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException { - LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount()); - TermsEnum globalTermsEnum; - Terms globalTerms = new DocValuesTerms(globalOrdinals); - // TODO: specialize based on compiled.type: for ALL and prefixes (sinkState >= 0 ) we can avoid i/o and just set bits. - globalTermsEnum = compiled.getTermsEnum(globalTerms); - for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) { - acceptedGlobalOrdinals.set(globalTermsEnum.ord()); - } - return acceptedGlobalOrdinals; - } - - } - - static class TermListBackedOrdinalsFilter extends OrdinalsFilter { - - private final SortedSet includeValues; - private final SortedSet excludeValues; - - TermListBackedOrdinalsFilter(SortedSet includeValues, SortedSet excludeValues) { - this.includeValues = includeValues; - this.excludeValues = excludeValues; - } - - @Override - public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException { - LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount()); - if (includeValues != null) { - for (BytesRef term : includeValues) { + LongBitSet acceptedGlobalOrdinals = null; + if (valids != null) { + acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount()); + for (BytesRef term : valids) { long ord = globalOrdinals.lookupTerm(term); if (ord >= 0) { acceptedGlobalOrdinals.set(ord); } } - } else if (acceptedGlobalOrdinals.length() > 0) { - // default to all terms being acceptable - acceptedGlobalOrdinals.set(0, acceptedGlobalOrdinals.length()); } - if (excludeValues != null) { - for (BytesRef term : excludeValues) { + + if (compiled != null) { + LongBitSet automatonGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount()); + TermsEnum globalTermsEnum; + Terms globalTerms = new DocValuesTerms(globalOrdinals); + // TODO: specialize based on compiled.type: for ALL and prefixes (sinkState >= 0 ) we can avoid i/o and just set bits. + globalTermsEnum = compiled.getTermsEnum(globalTerms); + for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) { + automatonGlobalOrdinals.set(globalTermsEnum.ord()); + } + + if (acceptedGlobalOrdinals == null) { + acceptedGlobalOrdinals = automatonGlobalOrdinals; + } else { + acceptedGlobalOrdinals.and(automatonGlobalOrdinals); + } + } + + if (acceptedGlobalOrdinals == null) { + acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount()); + if (acceptedGlobalOrdinals.length() > 0) { + // default to all terms being acceptable + acceptedGlobalOrdinals.set(0, acceptedGlobalOrdinals.length()); + } + } + + if (invalids != null) { + for (BytesRef term : invalids) { long ord = globalOrdinals.lookupTerm(term); if (ord >= 0) { acceptedGlobalOrdinals.clear(ord); @@ -319,9 +309,9 @@ public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) thro } return acceptedGlobalOrdinals; } - } + private final RegExp include, exclude; private final SortedSet includeValues, excludeValues; private final int incZeroBasedPartition; @@ -332,17 +322,36 @@ public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) thro * @param exclude The regular expression pattern for the terms to be excluded */ public IncludeExclude(RegExp include, RegExp exclude) { - if (include == null && exclude == null) { + this(include, exclude, null, null); + } + + public IncludeExclude(RegExp include, RegExp exclude, SortedSet includeValues, SortedSet excludeValues) { + if (include == null && exclude == null && includeValues == null && excludeValues == null) { + throw new IllegalArgumentException(); + } + if (include != null && includeValues != null) { + throw new IllegalArgumentException(); + } + if (exclude != null && excludeValues != null) { throw new IllegalArgumentException(); } this.include = include; this.exclude = exclude; - this.includeValues = null; - this.excludeValues = null; + this.includeValues = includeValues; + this.excludeValues = excludeValues; this.incZeroBasedPartition = 0; this.incNumPartitions = 0; } + public IncludeExclude(String include, String exclude, String[] includeValues, String[] excludeValues) { + this( + include == null ? null : new RegExp(include), + exclude == null ? null : new RegExp(exclude), + convertToBytesRefSet(includeValues), + convertToBytesRefSet(excludeValues) + ); + } + public IncludeExclude(String include, String exclude) { this(include == null ? null : new RegExp(include), exclude == null ? null : new RegExp(exclude)); } @@ -352,15 +361,7 @@ public IncludeExclude(String include, String exclude) { * @param excludeValues The terms to be excluded */ public IncludeExclude(SortedSet includeValues, SortedSet excludeValues) { - if (includeValues == null && excludeValues == null) { - throw new IllegalArgumentException(); - } - this.include = null; - this.exclude = null; - this.incZeroBasedPartition = 0; - this.incNumPartitions = 0; - this.includeValues = includeValues; - this.excludeValues = excludeValues; + this(null, null, includeValues, excludeValues); } public IncludeExclude(String[] includeValues, String[] excludeValues) { @@ -395,18 +396,21 @@ public IncludeExclude(int partition, int numPartitions) { */ public IncludeExclude(StreamInput in) throws IOException { if (in.readBoolean()) { - includeValues = null; - excludeValues = null; - incZeroBasedPartition = 0; - incNumPartitions = 0; String includeString = in.readOptionalString(); include = includeString == null ? null : new RegExp(includeString); String excludeString = in.readOptionalString(); exclude = excludeString == null ? null : new RegExp(excludeString); - return; + if (in.getVersion().before(Version.V_8_0_0)) { + incZeroBasedPartition = 0; + incNumPartitions = 0; + includeValues = null; + excludeValues = null; + return; + } + } else { + include = null; + exclude = null; } - include = null; - exclude = null; if (in.readBoolean()) { int size = in.readVInt(); includeValues = new TreeSet<>(); @@ -436,26 +440,28 @@ public void writeTo(StreamOutput out) throws IOException { if (regexBased) { out.writeOptionalString(include == null ? null : include.getOriginalString()); out.writeOptionalString(exclude == null ? null : exclude.getOriginalString()); - } else { - boolean hasIncludes = includeValues != null; - out.writeBoolean(hasIncludes); - if (hasIncludes) { - out.writeVInt(includeValues.size()); - for (BytesRef value : includeValues) { - out.writeBytesRef(value); - } + if (out.getVersion().before(Version.V_8_0_0)) { + return; } - boolean hasExcludes = excludeValues != null; - out.writeBoolean(hasExcludes); - if (hasExcludes) { - out.writeVInt(excludeValues.size()); - for (BytesRef value : excludeValues) { - out.writeBytesRef(value); - } + } + boolean hasIncludes = includeValues != null; + out.writeBoolean(hasIncludes); + if (hasIncludes) { + out.writeVInt(includeValues.size()); + for (BytesRef value : includeValues) { + out.writeBytesRef(value); } - out.writeVInt(incNumPartitions); - out.writeVInt(incZeroBasedPartition); } + boolean hasExcludes = excludeValues != null; + out.writeBoolean(hasExcludes); + if (hasExcludes) { + out.writeVInt(excludeValues.size()); + for (BytesRef value : excludeValues) { + out.writeBytesRef(value); + } + } + out.writeVInt(incNumPartitions); + out.writeVInt(incZeroBasedPartition); } private static SortedSet convertToBytesRefSet(String[] values) { @@ -573,29 +579,25 @@ public boolean isPartitionBased() { private Automaton toAutomaton() { Automaton a = null; + if (include == null && exclude == null) { + return a; + } if (include != null) { a = include.toAutomaton(); - } else if (includeValues != null) { - a = Automata.makeStringUnion(includeValues); } else { a = Automata.makeAnyString(); } if (exclude != null) { a = Operations.minus(a, exclude.toAutomaton(), Operations.DEFAULT_MAX_DETERMINIZED_STATES); - } else if (excludeValues != null) { - a = Operations.minus(a, Automata.makeStringUnion(excludeValues), Operations.DEFAULT_MAX_DETERMINIZED_STATES); } return a; } public StringFilter convertToStringFilter(DocValueFormat format) { - if (isRegexBased()) { - return new AutomatonBackedStringFilter(toAutomaton()); - } if (isPartitionBased()){ return new PartitionedStringFilter(); } - return new TermListBackedStringFilter(parseForDocValues(includeValues, format), parseForDocValues(excludeValues, format)); + return new SetAndRegexStringFilter(format); } private static SortedSet parseForDocValues(SortedSet endUserFormattedValues, DocValueFormat format) { @@ -612,15 +614,11 @@ private static SortedSet parseForDocValues(SortedSet endUser } public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format) { - - if (isRegexBased()) { - return new AutomatonBackedOrdinalsFilter(toAutomaton()); - } if (isPartitionBased()){ return new PartitionedOrdinalsFilter(); } - return new TermListBackedOrdinalsFilter(parseForDocValues(includeValues, format), parseForDocValues(excludeValues, format)); + return new SetAndRegexOrdinalsFilter(format); } public LongFilter convertToLongFilter(DocValueFormat format) { diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/TermsTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/TermsTests.java index bbf8d1d81f80e..90cca12d0dea4 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/TermsTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/TermsTests.java @@ -102,55 +102,43 @@ protected TermsAggregationBuilder createTestAggregatorBuilder() { factory.format("###.##"); } if (randomBoolean()) { - IncludeExclude incExc = null; - switch (randomInt(6)) { - case 0: - incExc = new IncludeExclude(new RegExp("foobar"), null); - break; - case 1: - incExc = new IncludeExclude(null, new RegExp("foobaz")); - break; - case 2: - incExc = new IncludeExclude(new RegExp("foobar"), new RegExp("foobaz")); - break; - case 3: - SortedSet includeValues = new TreeSet<>(); - int numIncs = randomIntBetween(1, 20); - for (int i = 0; i < numIncs; i++) { - includeValues.add(new BytesRef(randomAlphaOfLengthBetween(1, 30))); - } - SortedSet excludeValues = null; - incExc = new IncludeExclude(includeValues, excludeValues); - break; - case 4: - SortedSet includeValues2 = null; - SortedSet excludeValues2 = new TreeSet<>(); - int numExcs2 = randomIntBetween(1, 20); - for (int i = 0; i < numExcs2; i++) { - excludeValues2.add(new BytesRef(randomAlphaOfLengthBetween(1, 30))); - } - incExc = new IncludeExclude(includeValues2, excludeValues2); - break; - case 5: - SortedSet includeValues3 = new TreeSet<>(); - int numIncs3 = randomIntBetween(1, 20); - for (int i = 0; i < numIncs3; i++) { - includeValues3.add(new BytesRef(randomAlphaOfLengthBetween(1, 30))); + RegExp includeRegexp = null, excludeRegexp = null; + SortedSet includeValues = null, excludeValues = null; + boolean hasIncludeOrExclude = false; + + if (randomBoolean()) { + hasIncludeOrExclude = true; + if (randomBoolean()) { + includeRegexp = new RegExp(randomAlphaOfLengthBetween(5, 10)); + } else { + includeValues = new TreeSet<>(); + int numIncs = randomIntBetween(1, 20); + for (int i = 0; i < numIncs; i++) { + includeValues.add(new BytesRef(randomAlphaOfLengthBetween(1, 30))); + } } - SortedSet excludeValues3 = new TreeSet<>(); - int numExcs3 = randomIntBetween(1, 20); - for (int i = 0; i < numExcs3; i++) { - excludeValues3.add(new BytesRef(randomAlphaOfLengthBetween(1, 30))); + } + + if (randomBoolean()) { + hasIncludeOrExclude = true; + if (randomBoolean()) { + excludeRegexp = new RegExp(randomAlphaOfLengthBetween(5, 10)); + } else { + excludeValues = new TreeSet<>(); + int numIncs = randomIntBetween(1, 20); + for (int i = 0; i < numIncs; i++) { + excludeValues.add(new BytesRef(randomAlphaOfLengthBetween(1, 30))); + } } - incExc = new IncludeExclude(includeValues3, excludeValues3); - break; - case 6: + } + + IncludeExclude incExc; + if (hasIncludeOrExclude) { + incExc = new IncludeExclude(includeRegexp, excludeRegexp, includeValues, excludeValues); + } else { final int numPartitions = randomIntBetween(1, 100); final int partition = randomIntBetween(0, numPartitions - 1); incExc = new IncludeExclude(partition, numPartitions); - break; - default: - fail(); } factory.includeExclude(incExc); } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java index 7187e25a7266b..e6f81ae6d2fdd 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java @@ -455,6 +455,42 @@ public void testStringIncludeExclude() throws Exception { assertEquals("val010", result.getBuckets().get(1).getKeyAsString()); assertEquals(1L, result.getBuckets().get(1).getDocCount()); assertTrue(AggregationInspectionHelper.hasValue((InternalTerms)result)); + + aggregationBuilder = new TermsAggregationBuilder("_name").userValueTypeHint(ValueType.STRING) + .executionHint(executionHint) + .includeExclude(new IncludeExclude("val00.+", null, null, + new String[]{"val001", "val002", "val003", "val004", "val005", "val006", "val007", "val008"})) + .field("mv_field") + .order(BucketOrder.key(true)); + aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType); + aggregator.preCollection(); + indexSearcher.search(new MatchAllDocsQuery(), aggregator); + aggregator.postCollection(); + result = reduce(aggregator); + assertEquals(2, result.getBuckets().size()); + assertEquals("val000", result.getBuckets().get(0).getKeyAsString()); + assertEquals(1L, result.getBuckets().get(0).getDocCount()); + assertEquals("val009", result.getBuckets().get(1).getKeyAsString()); + assertEquals(1L, result.getBuckets().get(1).getDocCount()); + assertTrue(AggregationInspectionHelper.hasValue((InternalTerms)result)); + + aggregationBuilder = new TermsAggregationBuilder("_name").userValueTypeHint(ValueType.STRING) + .executionHint(executionHint) + .includeExclude(new IncludeExclude(null, "val01.+", new String[]{"val001", + "val002", "val010"}, null)) + .field("mv_field") + .order(BucketOrder.key(true)); + aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType); + aggregator.preCollection(); + indexSearcher.search(new MatchAllDocsQuery(), aggregator); + aggregator.postCollection(); + result = reduce(aggregator); + assertEquals(2, result.getBuckets().size()); + assertEquals("val001", result.getBuckets().get(0).getKeyAsString()); + assertEquals(1L, result.getBuckets().get(0).getDocCount()); + assertEquals("val002", result.getBuckets().get(1).getKeyAsString()); + assertEquals(1L, result.getBuckets().get(1).getDocCount()); + assertTrue(AggregationInspectionHelper.hasValue((InternalTerms)result)); } } } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/support/IncludeExcludeTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/support/IncludeExcludeTests.java index dc2624dc39e40..259883847ea9b 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/support/IncludeExcludeTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/support/IncludeExcludeTests.java @@ -33,6 +33,7 @@ import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude; import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude.OrdinalsFilter; +import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude.StringFilter; import org.elasticsearch.test.ESTestCase; import java.io.IOException; @@ -56,7 +57,9 @@ public void testEmptyTermsWithOrds() throws IOException { assertEquals(0, acceptedOrds.length()); } - public void testSingleTermWithOrds() throws IOException { + private void testCaseTermAccept(IncludeExclude inexcl, boolean accept) throws IOException { + BytesRef value = new BytesRef("foo"); + SortedSetDocValues ords = new AbstractSortedSetDocValues() { boolean consumed = true; @@ -80,7 +83,7 @@ public long nextOrd() { @Override public BytesRef lookupOrd(long ord) { assertEquals(0, ord); - return new BytesRef("foo"); + return value; } @Override @@ -89,37 +92,60 @@ public long getValueCount() { } }; - IncludeExclude inexcl = new IncludeExclude( - new TreeSet<>(Collections.singleton(new BytesRef("foo"))), - null); - OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW); - LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(ords); + OrdinalsFilter ordFilter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW); + LongBitSet acceptedOrds = ordFilter.acceptedGlobalOrdinals(ords); assertEquals(1, acceptedOrds.length()); - assertTrue(acceptedOrds.get(0)); + assertEquals(acceptedOrds.get(0), accept); - inexcl = new IncludeExclude( - new TreeSet<>(Collections.singleton(new BytesRef("bar"))), - null); - filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW); - acceptedOrds = filter.acceptedGlobalOrdinals(ords); - assertEquals(1, acceptedOrds.length()); - assertFalse(acceptedOrds.get(0)); - - inexcl = new IncludeExclude( - new TreeSet<>(Collections.singleton(new BytesRef("foo"))), - new TreeSet<>(Collections.singleton(new BytesRef("foo")))); - filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW); - acceptedOrds = filter.acceptedGlobalOrdinals(ords); - assertEquals(1, acceptedOrds.length()); - assertFalse(acceptedOrds.get(0)); + StringFilter strFilter = inexcl.convertToStringFilter(DocValueFormat.RAW); + assertEquals(strFilter.accept(value), accept); + } - inexcl = new IncludeExclude( - null, // means everything included - new TreeSet<>(Collections.singleton(new BytesRef("foo")))); - filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW); - acceptedOrds = filter.acceptedGlobalOrdinals(ords); - assertEquals(1, acceptedOrds.length()); - assertFalse(acceptedOrds.get(0)); + public void testTermAccept() throws IOException { + String[] fooSet = {"foo"}; + String[] barSet = {"bar"}; + String fooRgx = "f.*"; + String barRgx = "b.*"; + + // exclude foo: "foo" is not accepted + testCaseTermAccept(new IncludeExclude(null, null, null, fooSet), false); + testCaseTermAccept(new IncludeExclude(null, fooRgx, null, null), false); + + // exclude bar: "foo" is accepted + testCaseTermAccept(new IncludeExclude(null, null, null, barSet), true); + testCaseTermAccept(new IncludeExclude(null, barRgx, null, null), true); + + // include foo: "foo" is accepted + testCaseTermAccept(new IncludeExclude(null, null, fooSet, null), true); + testCaseTermAccept(new IncludeExclude(fooRgx, null, null, null), true); + + // include bar: "foo" is not accepted + testCaseTermAccept(new IncludeExclude(null, null, barSet, null), false); + testCaseTermAccept(new IncludeExclude(barRgx, null, null, null), false); + + // include foo, exclude foo: "foo" is not accepted + testCaseTermAccept(new IncludeExclude(null, null, fooSet, fooSet), false); + testCaseTermAccept(new IncludeExclude(null, fooRgx, fooSet, null), false); + testCaseTermAccept(new IncludeExclude(fooRgx, null, null, fooSet), false); + testCaseTermAccept(new IncludeExclude(fooRgx, fooRgx, null, null), false); + + // include foo, exclude bar: "foo" is accepted + testCaseTermAccept(new IncludeExclude(null, null, fooSet, barSet), true); + testCaseTermAccept(new IncludeExclude(null, barRgx, fooSet, null), true); + testCaseTermAccept(new IncludeExclude(fooRgx, null, null, barSet), true); + testCaseTermAccept(new IncludeExclude(fooRgx, barRgx, null, null), true); + + // include bar, exclude foo: "foo" is not accepted + testCaseTermAccept(new IncludeExclude(null, null, barSet, fooSet), false); + testCaseTermAccept(new IncludeExclude(null, fooRgx, barSet, null), false); + testCaseTermAccept(new IncludeExclude(barRgx, null, null, fooSet), false); + testCaseTermAccept(new IncludeExclude(barRgx, fooRgx, null, null), false); + + // include bar, exclude bar: "foo" is not accepted + testCaseTermAccept(new IncludeExclude(null, null, barSet, barSet), false); + testCaseTermAccept(new IncludeExclude(null, barRgx, barSet, null), false); + testCaseTermAccept(new IncludeExclude(barRgx, null, null, barSet), false); + testCaseTermAccept(new IncludeExclude(barRgx, barRgx, null, null), false); } public void testPartitionedEquals() throws IOException { @@ -250,6 +276,52 @@ public void testRegexIncludeAndExclude() throws IOException { assertTrue(serialized.hashCode() != different.hashCode()); } + public void testRegexIncludeAndSetExclude() throws IOException { + String incRegex = "foo.*"; + String[] excValues = { "a", "b" }; + String differentIncRegex = "foosball"; + String[] differentExcValues = { "a", "c" }; + + IncludeExclude serialized = serializeMixedRegex(new IncludeExclude(incRegex, null, null, excValues)); + assertFalse(serialized.isPartitionBased()); + assertTrue(serialized.isRegexBased()); + + IncludeExclude same = new IncludeExclude(incRegex, null, null, excValues); + assertEquals(serialized, same); + assertEquals(serialized.hashCode(), same.hashCode()); + + IncludeExclude differentInc = new IncludeExclude(differentIncRegex, null, null, excValues); + assertFalse(serialized.equals(differentInc)); + assertTrue(serialized.hashCode() != differentInc.hashCode()); + + IncludeExclude differentExc = new IncludeExclude(incRegex, null, null, differentExcValues); + assertFalse(serialized.equals(differentExc)); + assertTrue(serialized.hashCode() != differentExc.hashCode()); + } + + public void testSetIncludeAndRegexExclude() throws IOException { + String[] incValues = { "a", "b" }; + String excRegex = "foo.*"; + String[] differentIncValues = { "a", "c" }; + String differentExcRegex = "foosball"; + + IncludeExclude serialized = serializeMixedRegex(new IncludeExclude(null, excRegex, incValues, null)); + assertFalse(serialized.isPartitionBased()); + assertTrue(serialized.isRegexBased()); + + IncludeExclude same = new IncludeExclude(null, excRegex, incValues, null); + assertEquals(serialized, same); + assertEquals(serialized.hashCode(), same.hashCode()); + + IncludeExclude differentInc = new IncludeExclude(null, excRegex, differentIncValues, null); + assertFalse(serialized.equals(differentInc)); + assertTrue(serialized.hashCode() != differentInc.hashCode()); + + IncludeExclude differentExc = new IncludeExclude(null, differentExcRegex, incValues, null); + assertFalse(serialized.equals(differentExc)); + assertTrue(serialized.hashCode() != differentExc.hashCode()); + } + // Serializes/deserializes the IncludeExclude statement with include AND // exclude clauses private IncludeExclude serializeMixedRegex(IncludeExclude incExc) throws IOException { @@ -286,4 +358,12 @@ private IncludeExclude serializeMixedRegex(IncludeExclude incExc) throws IOExcep } } + public void testInvalidIncludeExcludeCombination() { + String[] values = {"foo"}; + String regex = "foo"; + + expectThrows(IllegalArgumentException.class, () -> new IncludeExclude((String) null, null, null, null)); + expectThrows(IllegalArgumentException.class, () -> new IncludeExclude(regex, null, values, null)); + expectThrows(IllegalArgumentException.class, () -> new IncludeExclude(null, regex, null, values)); + } }