Skip to content

Commit 02c82d8

Browse files
authored
Rewire terms agg to use new VS registry (#51182)
Also adds some basic AggTestCases for untested code paths (and boilerplate for future tests once the IT are converted over)
1 parent b36c79b commit 02c82d8

File tree

8 files changed

+747
-88
lines changed

8 files changed

+747
-88
lines changed

server/src/main/java/org/elasticsearch/search/SearchModule.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,8 @@ private void registerAggregations(List<SearchPlugin> plugins) {
381381
.addResultReader(StringTerms.NAME, StringTerms::new)
382382
.addResultReader(UnmappedTerms.NAME, UnmappedTerms::new)
383383
.addResultReader(LongTerms.NAME, LongTerms::new)
384-
.addResultReader(DoubleTerms.NAME, DoubleTerms::new));
384+
.addResultReader(DoubleTerms.NAME, DoubleTerms::new)
385+
.setAggregatorRegistrar(TermsAggregationBuilder::registerAggregators));
385386
registerAggregation(new AggregationSpec(RareTermsAggregationBuilder.NAME, RareTermsAggregationBuilder::new,
386387
RareTermsAggregationBuilder::parse)
387388
.addResultReader(StringRareTerms.NAME, StringRareTerms::new)

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregationBuilder.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,14 @@
4242
import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
4343
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
4444
import org.elasticsearch.search.aggregations.support.ValuesSourceParserHelper;
45+
import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry;
4546
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
4647

4748
import java.io.IOException;
4849
import java.util.List;
4950
import java.util.Map;
5051
import java.util.Objects;
52+
import java.util.concurrent.atomic.AtomicBoolean;
5153

5254
public class TermsAggregationBuilder extends ValuesSourceAggregationBuilder<TermsAggregationBuilder>
5355
implements MultiBucketAggregationBuilder {
@@ -100,6 +102,13 @@ public static AggregationBuilder parse(String aggregationName, XContentParser pa
100102
return PARSER.parse(parser, new TermsAggregationBuilder(aggregationName), null);
101103
}
102104

105+
private static AtomicBoolean wasRegistered = new AtomicBoolean(false);
106+
public static void registerAggregators(ValuesSourceRegistry valuesSourceRegistry) {
107+
if (wasRegistered.compareAndSet(false, true) == true) {
108+
TermsAggregatorFactory.registerAggregators(valuesSourceRegistry);
109+
}
110+
}
111+
103112
private BucketOrder order = BucketOrder.compound(BucketOrder.count(false)); // automatically adds tie-breaker key asc order
104113
private IncludeExclude includeExclude = null;
105114
private String executionHint = null;

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java

Lines changed: 156 additions & 83 deletions
Large diffs are not rendered by default.
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.search.aggregations.bucket.terms;
20+
21+
import org.elasticsearch.search.DocValueFormat;
22+
import org.elasticsearch.search.aggregations.Aggregator;
23+
import org.elasticsearch.search.aggregations.AggregatorFactories;
24+
import org.elasticsearch.search.aggregations.BucketOrder;
25+
import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude;
26+
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator;
27+
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
28+
import org.elasticsearch.search.aggregations.support.AggregatorSupplier;
29+
import org.elasticsearch.search.aggregations.support.ValuesSource;
30+
import org.elasticsearch.search.internal.SearchContext;
31+
32+
import java.io.IOException;
33+
import java.util.List;
34+
import java.util.Map;
35+
36+
interface TermsAggregatorSupplier extends AggregatorSupplier {
37+
Aggregator build(String name,
38+
AggregatorFactories factories,
39+
ValuesSource valuesSource,
40+
BucketOrder order,
41+
DocValueFormat format,
42+
TermsAggregator.BucketCountThresholds bucketCountThresholds,
43+
IncludeExclude includeExclude,
44+
String executionHint,
45+
SearchContext context,
46+
Aggregator parent,
47+
Aggregator.SubAggCollectionMode subAggCollectMode,
48+
boolean showTermDocCountError,
49+
List<PipelineAggregator> pipelineAggregators,
50+
Map<String, Object> metaData) throws IOException;
51+
}
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.search.aggregations.bucket.terms;
20+
21+
import org.apache.lucene.document.Document;
22+
import org.apache.lucene.index.DirectoryReader;
23+
import org.apache.lucene.index.IndexReader;
24+
import org.apache.lucene.index.RandomIndexWriter;
25+
import org.apache.lucene.search.IndexSearcher;
26+
import org.apache.lucene.search.MatchAllDocsQuery;
27+
import org.apache.lucene.search.MatchNoDocsQuery;
28+
import org.apache.lucene.search.Query;
29+
import org.apache.lucene.store.Directory;
30+
import org.apache.lucene.util.BytesRef;
31+
import org.apache.lucene.util.automaton.RegExp;
32+
import org.elasticsearch.common.Numbers;
33+
import org.elasticsearch.index.mapper.BinaryFieldMapper;
34+
import org.elasticsearch.index.mapper.MappedFieldType;
35+
import org.elasticsearch.search.DocValueFormat;
36+
import org.elasticsearch.search.aggregations.AggregationExecutionException;
37+
import org.elasticsearch.search.aggregations.AggregatorTestCase;
38+
import org.elasticsearch.search.aggregations.support.ValueType;
39+
40+
import java.io.IOException;
41+
import java.util.ArrayList;
42+
import java.util.List;
43+
import java.util.function.Consumer;
44+
45+
import static org.hamcrest.Matchers.equalTo;
46+
47+
public class BinaryTermsAggregatorTests extends AggregatorTestCase {
48+
private static final String BINARY_FIELD = "binary";
49+
50+
private static final List<Long> dataset;
51+
static {
52+
List<Long> d = new ArrayList<>(45);
53+
for (int i = 0; i < 10; i++) {
54+
for (int j = 0; j < i; j++) {
55+
d.add((long) i);
56+
}
57+
}
58+
dataset = d;
59+
}
60+
61+
public void testMatchNoDocs() throws IOException {
62+
testBothCases(new MatchNoDocsQuery(), dataset,
63+
aggregation -> aggregation.field(BINARY_FIELD),
64+
agg -> assertEquals(0, agg.getBuckets().size()), ValueType.STRING
65+
);
66+
}
67+
68+
public void testMatchAllDocs() throws IOException {
69+
Query query = new MatchAllDocsQuery();
70+
71+
testBothCases(query, dataset,
72+
aggregation -> aggregation.field(BINARY_FIELD),
73+
agg -> {
74+
assertEquals(9, agg.getBuckets().size());
75+
for (int i = 0; i < 9; i++) {
76+
StringTerms.Bucket bucket = (StringTerms.Bucket) agg.getBuckets().get(i);
77+
byte[] bytes = Numbers.longToBytes(9L - i);
78+
String bytesAsString = (String) DocValueFormat.BINARY.format(new BytesRef(bytes));
79+
assertThat(bucket.getKey(), equalTo(bytesAsString));
80+
assertThat(bucket.getDocCount(), equalTo(9L - i));
81+
}
82+
}, null);
83+
}
84+
85+
public void testBadIncludeExclude() throws IOException {
86+
IncludeExclude includeExclude = new IncludeExclude(new RegExp("foo"), null);
87+
88+
// Make sure the include/exclude fails regardless of how the user tries to type hint the agg
89+
AggregationExecutionException e = expectThrows(AggregationExecutionException.class,
90+
() -> testBothCases(new MatchNoDocsQuery(), dataset,
91+
aggregation -> aggregation.field(BINARY_FIELD).includeExclude(includeExclude).format("yyyy-MM-dd"),
92+
agg -> fail("test should have failed with exception"), null // default, no hint
93+
));
94+
assertThat(e.getMessage(), equalTo("Aggregation [_name] cannot support regular expression style include/exclude settings as " +
95+
"they can only be applied to string fields. Use an array of values for include/exclude clauses"));
96+
97+
e = expectThrows(AggregationExecutionException.class,
98+
() -> testBothCases(new MatchNoDocsQuery(), dataset,
99+
aggregation -> aggregation.field(BINARY_FIELD).includeExclude(includeExclude).format("yyyy-MM-dd"),
100+
agg -> fail("test should have failed with exception"), ValueType.STRING // string type hint
101+
));
102+
assertThat(e.getMessage(), equalTo("Aggregation [_name] cannot support regular expression style include/exclude settings as " +
103+
"they can only be applied to string fields. Use an array of values for include/exclude clauses"));
104+
105+
e = expectThrows(AggregationExecutionException.class, () -> testBothCases(new MatchNoDocsQuery(), dataset,
106+
aggregation -> aggregation.field(BINARY_FIELD).includeExclude(includeExclude),
107+
agg -> fail("test should have failed with exception"), ValueType.NUMERIC // numeric type hint
108+
));
109+
assertThat(e.getMessage(), equalTo("Aggregation [_name] cannot support regular expression style include/exclude settings as " +
110+
"they can only be applied to string fields. Use an array of values for include/exclude clauses"));
111+
}
112+
113+
private void testSearchCase(Query query, List<Long> dataset,
114+
Consumer<TermsAggregationBuilder> configure,
115+
Consumer<InternalMappedTerms> verify, ValueType valueType) throws IOException {
116+
executeTestCase(false, query, dataset, configure, verify, valueType);
117+
}
118+
119+
private void testSearchAndReduceCase(Query query, List<Long> dataset,
120+
Consumer<TermsAggregationBuilder> configure,
121+
Consumer<InternalMappedTerms> verify, ValueType valueType) throws IOException {
122+
executeTestCase(true, query, dataset, configure, verify, valueType);
123+
}
124+
125+
private void testBothCases(Query query, List<Long> dataset,
126+
Consumer<TermsAggregationBuilder> configure,
127+
Consumer<InternalMappedTerms> verify, ValueType valueType) throws IOException {
128+
testSearchCase(query, dataset, configure, verify, valueType);
129+
testSearchAndReduceCase(query, dataset, configure, verify, valueType);
130+
}
131+
132+
private void executeTestCase(boolean reduced, Query query, List<Long> dataset,
133+
Consumer<TermsAggregationBuilder> configure,
134+
Consumer<InternalMappedTerms> verify, ValueType valueType) throws IOException {
135+
136+
try (Directory directory = newDirectory()) {
137+
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
138+
Document document = new Document();
139+
for (Long value : dataset) {
140+
if (frequently()) {
141+
indexWriter.commit();
142+
}
143+
144+
document.add(new BinaryFieldMapper.CustomBinaryDocValuesField(BINARY_FIELD, Numbers.longToBytes(value)));
145+
indexWriter.addDocument(document);
146+
document.clear();
147+
}
148+
}
149+
150+
try (IndexReader indexReader = DirectoryReader.open(directory)) {
151+
IndexSearcher indexSearcher = newIndexSearcher(indexReader);
152+
153+
TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name");
154+
if (valueType != null) {
155+
aggregationBuilder.userValueTypeHint(valueType);
156+
}
157+
if (configure != null) {
158+
configure.accept(aggregationBuilder);
159+
}
160+
161+
MappedFieldType binaryFieldType = new BinaryFieldMapper.Builder(BINARY_FIELD).fieldType();
162+
binaryFieldType.setName(BINARY_FIELD);
163+
binaryFieldType.setHasDocValues(true);
164+
165+
InternalMappedTerms rareTerms;
166+
if (reduced) {
167+
rareTerms = searchAndReduce(indexSearcher, query, aggregationBuilder, binaryFieldType);
168+
} else {
169+
rareTerms = search(indexSearcher, query, aggregationBuilder, binaryFieldType);
170+
}
171+
verify.accept(rareTerms);
172+
}
173+
}
174+
}
175+
176+
}

0 commit comments

Comments
 (0)