Skip to content

Commit b7cc6eb

Browse files
Hoholimotovelasticmachine
committed
Fix wrong error upper bound when performing incremental reductions (elastic#43874)
When performing incremental reductions, 0 value of docCountError may mean that the error was not previously calculated, or that the error was indeed previously calculated and its value was 0. We end up rejecting true values set to 0 this way. This may lead to wrong upper bound of error in result. To fix it, this PR makes docCountError nullable. null values mean that error was not calculated yet. Fixes elastic#40005 Co-authored-by: Igor Motov <[email protected]> Co-authored-by: Elastic Machine <[email protected]>
1 parent d3b853d commit b7cc6eb

File tree

21 files changed

+100
-59
lines changed

21 files changed

+100
-59
lines changed

benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/TermsReduceBenchmark.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ private StringTerms newTerms(Random rand, BytesRef[] dict, boolean withNested) {
154154
true,
155155
0,
156156
buckets,
157-
0
157+
0L
158158
);
159159
}
160160

benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/bucket/terms/StringTermsSerializationBenchmark.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ private StringTerms newTerms(boolean withNested) {
7272
false,
7373
100000,
7474
resultBuckets,
75-
0
75+
0L
7676
);
7777
}
7878

client/rest-high-level/src/test/java/org/elasticsearch/client/SearchIT.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,7 @@ public void testSearchWithParentJoin() throws IOException {
577577
assertEquals(Float.NaN, searchResponse.getHits().getMaxScore(), 0f);
578578
assertEquals(1, searchResponse.getAggregations().asList().size());
579579
Terms terms = searchResponse.getAggregations().get("top-tags");
580-
assertEquals(0, terms.getDocCountError());
580+
assertEquals(0, terms.getDocCountError().longValue());
581581
assertEquals(0, terms.getSumOfOtherDocCounts());
582582
assertEquals(3, terms.getBuckets().size());
583583
for (Terms.Bucket bucket : terms.getBuckets()) {
@@ -589,7 +589,7 @@ public void testSearchWithParentJoin() throws IOException {
589589
assertEquals(2, children.getDocCount());
590590
assertEquals(1, children.getAggregations().asList().size());
591591
Terms leafTerms = children.getAggregations().get("top-names");
592-
assertEquals(0, leafTerms.getDocCountError());
592+
assertEquals(0, leafTerms.getDocCountError().longValue());
593593
assertEquals(0, leafTerms.getSumOfOtherDocCounts());
594594
assertEquals(2, leafTerms.getBuckets().size());
595595
assertEquals(2, leafTerms.getBuckets().size());

server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/TermsDocCountErrorIT.java

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.elasticsearch.search.aggregations.BucketOrder;
2121
import org.elasticsearch.test.ESIntegTestCase;
2222

23+
import java.io.IOException;
2324
import java.util.ArrayList;
2425
import java.util.HashMap;
2526
import java.util.List;
@@ -89,8 +90,6 @@ public void setupSuiteScopeCluster() throws Exception {
8990
.field(DOUBLE_FIELD_NAME, 1.0 * randomInt(numUniqueTerms))
9091
.endObject()));
9192
}
92-
assertAcked(prepareCreate("idx_fixed_docs_0").addMapping("type", STRING_FIELD_NAME, "type=keyword")
93-
.setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)));
9493
Map<String, Integer> shard0DocsPerTerm = new HashMap<>();
9594
shard0DocsPerTerm.put("A", 25);
9695
shard0DocsPerTerm.put("B", 18);
@@ -102,16 +101,8 @@ public void setupSuiteScopeCluster() throws Exception {
102101
shard0DocsPerTerm.put("H", 2);
103102
shard0DocsPerTerm.put("I", 1);
104103
shard0DocsPerTerm.put("J", 1);
105-
for (Map.Entry<String, Integer> entry : shard0DocsPerTerm.entrySet()) {
106-
for (int i = 0; i < entry.getValue(); i++) {
107-
String term = entry.getKey();
108-
builders.add(client().prepareIndex("idx_fixed_docs_0", "type", term + "-" + i)
109-
.setSource(jsonBuilder().startObject().field(STRING_FIELD_NAME, term).endObject()));
110-
}
111-
}
104+
buildIndex(shard0DocsPerTerm, "idx_fixed_docs_0", 0, builders);
112105

113-
assertAcked(prepareCreate("idx_fixed_docs_1").addMapping("type", STRING_FIELD_NAME, "type=keyword")
114-
.setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)));
115106
Map<String, Integer> shard1DocsPerTerm = new HashMap<>();
116107
shard1DocsPerTerm.put("A", 30);
117108
shard1DocsPerTerm.put("B", 25);
@@ -123,17 +114,8 @@ public void setupSuiteScopeCluster() throws Exception {
123114
shard1DocsPerTerm.put("Q", 6);
124115
shard1DocsPerTerm.put("J", 8);
125116
shard1DocsPerTerm.put("C", 4);
126-
for (Map.Entry<String, Integer> entry : shard1DocsPerTerm.entrySet()) {
127-
for (int i = 0; i < entry.getValue(); i++) {
128-
String term = entry.getKey();
129-
builders.add(client().prepareIndex("idx_fixed_docs_1", "type", term + "-" + i)
130-
.setSource(jsonBuilder().startObject().field(STRING_FIELD_NAME, term).field("shard", 1).endObject()));
131-
}
132-
}
117+
buildIndex(shard1DocsPerTerm, "idx_fixed_docs_1", 1, builders);
133118

134-
assertAcked(prepareCreate("idx_fixed_docs_2")
135-
.addMapping("type", STRING_FIELD_NAME, "type=keyword")
136-
.setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)));
137119
Map<String, Integer> shard2DocsPerTerm = new HashMap<>();
138120
shard2DocsPerTerm.put("A", 45);
139121
shard2DocsPerTerm.put("C", 44);
@@ -143,16 +125,46 @@ public void setupSuiteScopeCluster() throws Exception {
143125
shard2DocsPerTerm.put("H", 28);
144126
shard2DocsPerTerm.put("Q", 2);
145127
shard2DocsPerTerm.put("D", 1);
146-
for (Map.Entry<String, Integer> entry : shard2DocsPerTerm.entrySet()) {
128+
buildIndex(shard2DocsPerTerm, "idx_fixed_docs_2", 2, builders);
129+
130+
Map<String, Integer> shard3DocsPerTerm = new HashMap<>();
131+
shard3DocsPerTerm.put("A", 1);
132+
shard3DocsPerTerm.put("B", 1);
133+
shard3DocsPerTerm.put("C", 1);
134+
buildIndex(shard3DocsPerTerm, "idx_fixed_docs_3", 3, builders);
135+
136+
Map<String, Integer> shard4DocsPerTerm = new HashMap<>();
137+
shard4DocsPerTerm.put("K", 1);
138+
shard4DocsPerTerm.put("L", 1);
139+
shard4DocsPerTerm.put("M", 1);
140+
buildIndex(shard4DocsPerTerm, "idx_fixed_docs_4", 4, builders);
141+
142+
Map<String, Integer> shard5DocsPerTerm = new HashMap<>();
143+
shard5DocsPerTerm.put("X", 1);
144+
shard5DocsPerTerm.put("Y", 1);
145+
shard5DocsPerTerm.put("Z", 1);
146+
buildIndex(shard5DocsPerTerm, "idx_fixed_docs_5", 5, builders);
147+
148+
indexRandom(true, builders);
149+
ensureSearchable();
150+
}
151+
152+
private void buildIndex(Map<String, Integer> docsPerTerm, String index, int shard, List<IndexRequestBuilder> builders)
153+
throws IOException {
154+
assertAcked(
155+
prepareCreate(index).addMapping("type", STRING_FIELD_NAME, "type=keyword")
156+
.setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1))
157+
);
158+
for (Map.Entry<String, Integer> entry : docsPerTerm.entrySet()) {
147159
for (int i = 0; i < entry.getValue(); i++) {
148160
String term = entry.getKey();
149-
builders.add(client().prepareIndex("idx_fixed_docs_2", "type", term + "-" + i)
150-
.setSource(jsonBuilder().startObject().field(STRING_FIELD_NAME, term).field("shard", 2).endObject()));
161+
builders.add(
162+
client().prepareIndex(index, "type")
163+
.setId(term + "-" + i)
164+
.setSource(jsonBuilder().startObject().field(STRING_FIELD_NAME, term).field("shard", shard).endObject())
165+
);
151166
}
152167
}
153-
154-
indexRandom(true, builders);
155-
ensureSearchable();
156168
}
157169

158170
private void assertDocCountErrorWithinBounds(int size, SearchResponse accurateResponse, SearchResponse testResponse) {
@@ -1015,4 +1027,21 @@ public void testFixedDocs() throws Exception {
10151027
assertThat(bucket.getDocCountError(), equalTo(29L));
10161028
}
10171029

1030+
/**
1031+
* Tests the upper bounds are correct when performing incremental reductions
1032+
* See https://github.com/elastic/elasticsearch/issues/40005 for more details
1033+
*/
1034+
public void testIncrementalReduction() {
1035+
SearchResponse response = client().prepareSearch("idx_fixed_docs_3", "idx_fixed_docs_4", "idx_fixed_docs_5")
1036+
.addAggregation(terms("terms")
1037+
.executionHint(randomExecutionHint())
1038+
.field(STRING_FIELD_NAME)
1039+
.showTermDocCountError(true)
1040+
.size(5).shardSize(5)
1041+
.collectMode(randomFrom(SubAggCollectionMode.values())))
1042+
.get();
1043+
assertSearchResponse(response);
1044+
Terms terms = response.getAggregations().get("terms");
1045+
assertThat(terms.getDocCountError(), equalTo(0L));
1046+
}
10181047
}

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractInternalTerms.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ public abstract static class AbstractTermsBucket extends InternalMultiBucketAggr
7676

7777
protected abstract long getSumOfOtherDocCounts();
7878

79-
protected abstract long getDocCountError();
79+
protected abstract Long getDocCountError();
8080

8181
protected abstract void setDocCountError(long docCountError);
8282

@@ -133,7 +133,7 @@ private long getDocCountError(A terms) {
133133
if (size == 0 || size < terms.getShardSize() || isKeyOrder(terms.getOrder())) {
134134
return 0;
135135
} else if (InternalOrder.isCountDesc(terms.getOrder())) {
136-
if (terms.getDocCountError() > 0) {
136+
if (terms.getDocCountError() != null && terms.getDocCountError() > 0) {
137137
// If there is an existing docCountError for this agg then
138138
// use this as the error for this aggregation
139139
return terms.getDocCountError();

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractStringTermsAggregator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ abstract class AbstractStringTermsAggregator extends TermsAggregator {
3333

3434
protected StringTerms buildEmptyTermsAggregation() {
3535
return new StringTerms(name, order, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(),
36-
metadata(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, 0, emptyList(), 0);
36+
metadata(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, 0, emptyList(), 0L);
3737
}
3838

3939
protected SignificantStringTerms buildEmptySignificantTermsAggregation(long subsetSize, SignificanceHeuristic significanceHeuristic) {

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTerms.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ public int hashCode() {
9191

9292
public DoubleTerms(String name, BucketOrder reduceOrder, BucketOrder order, int requiredSize, long minDocCount,
9393
Map<String, Object> metadata, DocValueFormat format, int shardSize, boolean showTermDocCountError, long otherDocCount,
94-
List<Bucket> buckets, long docCountError) {
94+
List<Bucket> buckets, Long docCountError) {
9595
super(name, reduceOrder, order, requiredSize, minDocCount, metadata, format, shardSize, showTermDocCountError,
9696
otherDocCount, buckets, docCountError);
9797
}

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -765,7 +765,7 @@ StringTerms buildResult(long owningBucketOrd, long otherDocCount, StringTerms.Bu
765765
}
766766
return new StringTerms(name, reduceOrder, order, bucketCountThresholds.getRequiredSize(),
767767
bucketCountThresholds.getMinDocCount(), metadata(), format, bucketCountThresholds.getShardSize(), showTermDocCountError,
768-
otherDocCount, Arrays.asList(topBuckets), 0);
768+
otherDocCount, Arrays.asList(topBuckets), 0L);
769769
}
770770

771771
@Override

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedTerms.java

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
package org.elasticsearch.search.aggregations.bucket.terms;
1010

11+
import org.elasticsearch.Version;
1112
import org.elasticsearch.common.io.stream.StreamInput;
1213
import org.elasticsearch.common.io.stream.StreamOutput;
1314
import org.elasticsearch.common.xcontent.XContentBuilder;
@@ -32,11 +33,11 @@ public abstract class InternalMappedTerms<A extends InternalTerms<A, B>, B exten
3233
protected final List<B> buckets;
3334
protected Map<String, B> bucketMap;
3435

35-
protected long docCountError;
36+
protected Long docCountError;
3637

3738
protected InternalMappedTerms(String name, BucketOrder reduceOrder, BucketOrder order, int requiredSize, long minDocCount,
3839
Map<String, Object> metadata, DocValueFormat format, int shardSize,
39-
boolean showTermDocCountError, long otherDocCount, List<B> buckets, long docCountError) {
40+
boolean showTermDocCountError, long otherDocCount, List<B> buckets, Long docCountError) {
4041
super(name, reduceOrder, order, requiredSize, minDocCount, metadata);
4142
this.format = format;
4243
this.shardSize = shardSize;
@@ -51,7 +52,14 @@ protected InternalMappedTerms(String name, BucketOrder reduceOrder, BucketOrder
5152
*/
5253
protected InternalMappedTerms(StreamInput in, Bucket.Reader<B> bucketReader) throws IOException {
5354
super(in);
54-
docCountError = in.readZLong();
55+
if (in.getVersion().onOrAfter(Version.V_7_15_0)) {
56+
docCountError = in.readOptionalLong();
57+
} else {
58+
docCountError = in.readZLong();
59+
if (docCountError == 0) {
60+
docCountError = null;
61+
}
62+
}
5563
format = in.readNamedWriteable(DocValueFormat.class);
5664
shardSize = readSize(in);
5765
showTermDocCountError = in.readBoolean();
@@ -61,7 +69,11 @@ protected InternalMappedTerms(StreamInput in, Bucket.Reader<B> bucketReader) thr
6169

6270
@Override
6371
protected final void writeTermTypeInfoTo(StreamOutput out) throws IOException {
64-
out.writeZLong(docCountError);
72+
if (out.getVersion().onOrAfter(Version.V_7_15_0)) {
73+
out.writeOptionalLong(docCountError);
74+
} else {
75+
out.writeZLong(docCountError == null ? 0 : docCountError);
76+
}
6577
out.writeNamedWriteable(format);
6678
writeSize(shardSize, out);
6779
out.writeBoolean(showTermDocCountError);
@@ -80,7 +92,7 @@ protected int getShardSize() {
8092
}
8193

8294
@Override
83-
public long getDocCountError() {
95+
public Long getDocCountError() {
8496
return docCountError;
8597
}
8698

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTerms.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ public int hashCode() {
103103

104104
public LongTerms(String name, BucketOrder reduceOrder, BucketOrder order, int requiredSize, long minDocCount,
105105
Map<String, Object> metadata, DocValueFormat format, int shardSize, boolean showTermDocCountError, long otherDocCount,
106-
List<Bucket> buckets, long docCountError) {
106+
List<Bucket> buckets, Long docCountError) {
107107
super(name, reduceOrder, order, requiredSize, minDocCount, metadata, format, shardSize, showTermDocCountError,
108108
otherDocCount, buckets, docCountError);
109109
}

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ StringTerms buildResult(long owningBucketOrd, long otherDocCount, StringTerms.Bu
446446
}
447447
return new StringTerms(name, reduceOrder, order, bucketCountThresholds.getRequiredSize(),
448448
bucketCountThresholds.getMinDocCount(), metadata(), format, bucketCountThresholds.getShardSize(), showTermDocCountError,
449-
otherDocCount, Arrays.asList(topBuckets), 0);
449+
otherDocCount, Arrays.asList(topBuckets), 0L);
450450
}
451451

452452
@Override

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ LongTerms buildResult(long owningBucketOrd, long otherDocCount, LongTerms.Bucket
372372
showTermDocCountError,
373373
otherDocCount,
374374
List.of(topBuckets),
375-
0
375+
0L
376376
);
377377
}
378378

@@ -390,7 +390,7 @@ LongTerms buildEmptyResult() {
390390
showTermDocCountError,
391391
0,
392392
emptyList(),
393-
0
393+
0L
394394
);
395395
}
396396
}
@@ -454,7 +454,7 @@ DoubleTerms buildResult(long owningBucketOrd, long otherDocCount, DoubleTerms.Bu
454454
showTermDocCountError,
455455
otherDocCount,
456456
List.of(topBuckets),
457-
0
457+
0L
458458
);
459459
}
460460

@@ -472,7 +472,7 @@ DoubleTerms buildEmptyResult() {
472472
showTermDocCountError,
473473
0,
474474
emptyList(),
475-
0
475+
0L
476476
);
477477
}
478478
}

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/ParsedTerms.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public abstract class ParsedTerms extends ParsedMultiBucketAggregation<ParsedTer
3232
protected long sumOtherDocCount;
3333

3434
@Override
35-
public long getDocCountError() {
35+
public Long getDocCountError() {
3636
return docCountErrorUpperBound;
3737
}
3838

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTerms.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ public int hashCode() {
9494

9595
public StringTerms(String name, BucketOrder reduceOrder, BucketOrder order, int requiredSize, long minDocCount,
9696
Map<String, Object> metadata, DocValueFormat format, int shardSize, boolean showTermDocCountError, long otherDocCount,
97-
List<Bucket> buckets, long docCountError) {
97+
List<Bucket> buckets, Long docCountError) {
9898
super(name, reduceOrder, order, requiredSize, minDocCount, metadata, format,
9999
shardSize, showTermDocCountError, otherDocCount, buckets, docCountError);
100100
}

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ protected boolean lessThan(OrdBucket a, OrdBucket b) {
227227
showTermDocCountError,
228228
otherDocsCount,
229229
buckets,
230-
0
230+
0L
231231
);
232232
}
233233

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/Terms.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ interface Bucket extends MultiBucketsAggregation.Bucket {
4141
/**
4242
* Get an upper bound of the error on document counts in this aggregation.
4343
*/
44-
long getDocCountError();
44+
Long getDocCountError();
4545

4646
/**
4747
* Return the sum of the document counts of all buckets that did not make

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedTerms.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ protected int getShardSize() {
110110
}
111111

112112
@Override
113-
public long getDocCountError() {
114-
return 0;
113+
public Long getDocCountError() {
114+
return 0L;
115115
}
116116

117117
@Override

server/src/test/java/org/elasticsearch/search/aggregations/InternalAggregationsTests.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ public void testReduceEmptyAggs() {
5252

5353
public void testNonFinalReduceTopLevelPipelineAggs() {
5454
InternalAggregation terms = new StringTerms("name", BucketOrder.key(true), BucketOrder.key(true),
55-
10, 1, Collections.emptyMap(), DocValueFormat.RAW, 25, false, 10, Collections.emptyList(), 0);
55+
10, 1, Collections.emptyMap(), DocValueFormat.RAW, 25, false, 10, Collections.emptyList(), 0L);
5656
List<InternalAggregations> aggs = singletonList(InternalAggregations.from(Collections.singletonList(terms)));
5757
InternalAggregations reducedAggs = InternalAggregations.topLevelReduce(aggs, maxBucketReduceContext().forPartialReduction());
5858
assertEquals(1, reducedAggs.getTopLevelPipelineAggregators().size());
@@ -61,7 +61,7 @@ public void testNonFinalReduceTopLevelPipelineAggs() {
6161

6262
public void testFinalReduceTopLevelPipelineAggs() {
6363
InternalAggregation terms = new StringTerms("name", BucketOrder.key(true), BucketOrder.key(true),
64-
10, 1, Collections.emptyMap(), DocValueFormat.RAW, 25, false, 10, Collections.emptyList(), 0);
64+
10, 1, Collections.emptyMap(), DocValueFormat.RAW, 25, false, 10, Collections.emptyList(), 0L);
6565

6666
InternalAggregations aggs = InternalAggregations.from(Collections.singletonList(terms));
6767
InternalAggregations reducedAggs = InternalAggregations.topLevelReduce(Collections.singletonList(aggs),

0 commit comments

Comments
 (0)