From b7cc6eb74682a8439c75d5fc76959a674a031089 Mon Sep 17 00:00:00 2001 From: Nikita Glashenko Date: Thu, 22 Jul 2021 22:18:24 +0400 Subject: [PATCH 1/2] Fix wrong error upper bound when performing incremental reductions (#43874) When performing incremental reductions, 0 value of docCountError may mean that the error was not previously calculated, or that the error was indeed previously calculated and its value was 0. We end up rejecting true values set to 0 this way. This may lead to wrong upper bound of error in result. To fix it, this PR makes docCountError nullable. null values mean that error was not calculated yet. Fixes #40005 Co-authored-by: Igor Motov Co-authored-by: Elastic Machine --- .../aggregations/TermsReduceBenchmark.java | 2 +- .../StringTermsSerializationBenchmark.java | 2 +- .../org/elasticsearch/client/SearchIT.java | 4 +- .../bucket/TermsDocCountErrorIT.java | 83 +++++++++++++------ .../bucket/terms/AbstractInternalTerms.java | 4 +- .../terms/AbstractStringTermsAggregator.java | 2 +- .../bucket/terms/DoubleTerms.java | 2 +- .../GlobalOrdinalsStringTermsAggregator.java | 2 +- .../bucket/terms/InternalMappedTerms.java | 22 +++-- .../aggregations/bucket/terms/LongTerms.java | 2 +- .../terms/MapStringTermsAggregator.java | 2 +- .../bucket/terms/NumericTermsAggregator.java | 8 +- .../bucket/terms/ParsedTerms.java | 2 +- .../bucket/terms/StringTerms.java | 2 +- .../StringTermsAggregatorFromFilters.java | 2 +- .../aggregations/bucket/terms/Terms.java | 2 +- .../bucket/terms/UnmappedTerms.java | 4 +- .../InternalAggregationsTests.java | 4 +- .../InternalMultiBucketAggregationTests.java | 4 +- .../multiterms/InternalMultiTerms.java | 2 +- .../xpack/search/AsyncSearchTaskTests.java | 2 +- 21 files changed, 100 insertions(+), 59 deletions(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/TermsReduceBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/TermsReduceBenchmark.java index 08e037aa24bbf..ab3a701db77f6 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/TermsReduceBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/TermsReduceBenchmark.java @@ -154,7 +154,7 @@ private StringTerms newTerms(Random rand, BytesRef[] dict, boolean withNested) { true, 0, buckets, - 0 + 0L ); } diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/bucket/terms/StringTermsSerializationBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/bucket/terms/StringTermsSerializationBenchmark.java index 29e9e885f436d..86826a2ca73de 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/bucket/terms/StringTermsSerializationBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/bucket/terms/StringTermsSerializationBenchmark.java @@ -72,7 +72,7 @@ private StringTerms newTerms(boolean withNested) { false, 100000, resultBuckets, - 0 + 0L ); } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/SearchIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/SearchIT.java index 9587971feb8ed..12409c43a8277 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/SearchIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/SearchIT.java @@ -577,7 +577,7 @@ public void testSearchWithParentJoin() throws IOException { assertEquals(Float.NaN, searchResponse.getHits().getMaxScore(), 0f); assertEquals(1, searchResponse.getAggregations().asList().size()); Terms terms = searchResponse.getAggregations().get("top-tags"); - assertEquals(0, terms.getDocCountError()); + assertEquals(0, terms.getDocCountError().longValue()); assertEquals(0, terms.getSumOfOtherDocCounts()); assertEquals(3, terms.getBuckets().size()); for (Terms.Bucket bucket : terms.getBuckets()) { @@ -589,7 +589,7 @@ public void testSearchWithParentJoin() throws IOException { assertEquals(2, children.getDocCount()); assertEquals(1, children.getAggregations().asList().size()); Terms leafTerms = children.getAggregations().get("top-names"); - assertEquals(0, leafTerms.getDocCountError()); + assertEquals(0, leafTerms.getDocCountError().longValue()); assertEquals(0, leafTerms.getSumOfOtherDocCounts()); assertEquals(2, leafTerms.getBuckets().size()); assertEquals(2, leafTerms.getBuckets().size()); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/TermsDocCountErrorIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/TermsDocCountErrorIT.java index 255e51115c5d9..4b9072c7d73a6 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/TermsDocCountErrorIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/TermsDocCountErrorIT.java @@ -20,6 +20,7 @@ import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.test.ESIntegTestCase; +import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -89,8 +90,6 @@ public void setupSuiteScopeCluster() throws Exception { .field(DOUBLE_FIELD_NAME, 1.0 * randomInt(numUniqueTerms)) .endObject())); } - assertAcked(prepareCreate("idx_fixed_docs_0").addMapping("type", STRING_FIELD_NAME, "type=keyword") - .setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1))); Map shard0DocsPerTerm = new HashMap<>(); shard0DocsPerTerm.put("A", 25); shard0DocsPerTerm.put("B", 18); @@ -102,16 +101,8 @@ public void setupSuiteScopeCluster() throws Exception { shard0DocsPerTerm.put("H", 2); shard0DocsPerTerm.put("I", 1); shard0DocsPerTerm.put("J", 1); - for (Map.Entry entry : shard0DocsPerTerm.entrySet()) { - for (int i = 0; i < entry.getValue(); i++) { - String term = entry.getKey(); - builders.add(client().prepareIndex("idx_fixed_docs_0", "type", term + "-" + i) - .setSource(jsonBuilder().startObject().field(STRING_FIELD_NAME, term).endObject())); - } - } + buildIndex(shard0DocsPerTerm, "idx_fixed_docs_0", 0, builders); - assertAcked(prepareCreate("idx_fixed_docs_1").addMapping("type", STRING_FIELD_NAME, "type=keyword") - .setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1))); Map shard1DocsPerTerm = new HashMap<>(); shard1DocsPerTerm.put("A", 30); shard1DocsPerTerm.put("B", 25); @@ -123,17 +114,8 @@ public void setupSuiteScopeCluster() throws Exception { shard1DocsPerTerm.put("Q", 6); shard1DocsPerTerm.put("J", 8); shard1DocsPerTerm.put("C", 4); - for (Map.Entry entry : shard1DocsPerTerm.entrySet()) { - for (int i = 0; i < entry.getValue(); i++) { - String term = entry.getKey(); - builders.add(client().prepareIndex("idx_fixed_docs_1", "type", term + "-" + i) - .setSource(jsonBuilder().startObject().field(STRING_FIELD_NAME, term).field("shard", 1).endObject())); - } - } + buildIndex(shard1DocsPerTerm, "idx_fixed_docs_1", 1, builders); - assertAcked(prepareCreate("idx_fixed_docs_2") - .addMapping("type", STRING_FIELD_NAME, "type=keyword") - .setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1))); Map shard2DocsPerTerm = new HashMap<>(); shard2DocsPerTerm.put("A", 45); shard2DocsPerTerm.put("C", 44); @@ -143,16 +125,46 @@ public void setupSuiteScopeCluster() throws Exception { shard2DocsPerTerm.put("H", 28); shard2DocsPerTerm.put("Q", 2); shard2DocsPerTerm.put("D", 1); - for (Map.Entry entry : shard2DocsPerTerm.entrySet()) { + buildIndex(shard2DocsPerTerm, "idx_fixed_docs_2", 2, builders); + + Map shard3DocsPerTerm = new HashMap<>(); + shard3DocsPerTerm.put("A", 1); + shard3DocsPerTerm.put("B", 1); + shard3DocsPerTerm.put("C", 1); + buildIndex(shard3DocsPerTerm, "idx_fixed_docs_3", 3, builders); + + Map shard4DocsPerTerm = new HashMap<>(); + shard4DocsPerTerm.put("K", 1); + shard4DocsPerTerm.put("L", 1); + shard4DocsPerTerm.put("M", 1); + buildIndex(shard4DocsPerTerm, "idx_fixed_docs_4", 4, builders); + + Map shard5DocsPerTerm = new HashMap<>(); + shard5DocsPerTerm.put("X", 1); + shard5DocsPerTerm.put("Y", 1); + shard5DocsPerTerm.put("Z", 1); + buildIndex(shard5DocsPerTerm, "idx_fixed_docs_5", 5, builders); + + indexRandom(true, builders); + ensureSearchable(); + } + + private void buildIndex(Map docsPerTerm, String index, int shard, List builders) + throws IOException { + assertAcked( + prepareCreate(index).addMapping("type", STRING_FIELD_NAME, "type=keyword") + .setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)) + ); + for (Map.Entry entry : docsPerTerm.entrySet()) { for (int i = 0; i < entry.getValue(); i++) { String term = entry.getKey(); - builders.add(client().prepareIndex("idx_fixed_docs_2", "type", term + "-" + i) - .setSource(jsonBuilder().startObject().field(STRING_FIELD_NAME, term).field("shard", 2).endObject())); + builders.add( + client().prepareIndex(index, "type") + .setId(term + "-" + i) + .setSource(jsonBuilder().startObject().field(STRING_FIELD_NAME, term).field("shard", shard).endObject()) + ); } } - - indexRandom(true, builders); - ensureSearchable(); } private void assertDocCountErrorWithinBounds(int size, SearchResponse accurateResponse, SearchResponse testResponse) { @@ -1015,4 +1027,21 @@ public void testFixedDocs() throws Exception { assertThat(bucket.getDocCountError(), equalTo(29L)); } + /** + * Tests the upper bounds are correct when performing incremental reductions + * See https://github.com/elastic/elasticsearch/issues/40005 for more details + */ + public void testIncrementalReduction() { + SearchResponse response = client().prepareSearch("idx_fixed_docs_3", "idx_fixed_docs_4", "idx_fixed_docs_5") + .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) + .field(STRING_FIELD_NAME) + .showTermDocCountError(true) + .size(5).shardSize(5) + .collectMode(randomFrom(SubAggCollectionMode.values()))) + .get(); + assertSearchResponse(response); + Terms terms = response.getAggregations().get("terms"); + assertThat(terms.getDocCountError(), equalTo(0L)); + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractInternalTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractInternalTerms.java index 017fa354e2769..89707fcb06e3b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractInternalTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractInternalTerms.java @@ -76,7 +76,7 @@ public abstract static class AbstractTermsBucket extends InternalMultiBucketAggr protected abstract long getSumOfOtherDocCounts(); - protected abstract long getDocCountError(); + protected abstract Long getDocCountError(); protected abstract void setDocCountError(long docCountError); @@ -133,7 +133,7 @@ private long getDocCountError(A terms) { if (size == 0 || size < terms.getShardSize() || isKeyOrder(terms.getOrder())) { return 0; } else if (InternalOrder.isCountDesc(terms.getOrder())) { - if (terms.getDocCountError() > 0) { + if (terms.getDocCountError() != null && terms.getDocCountError() > 0) { // If there is an existing docCountError for this agg then // use this as the error for this aggregation return terms.getDocCountError(); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractStringTermsAggregator.java index cc4995e0da326..aadec9b36e330 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractStringTermsAggregator.java @@ -33,7 +33,7 @@ abstract class AbstractStringTermsAggregator extends TermsAggregator { protected StringTerms buildEmptyTermsAggregation() { return new StringTerms(name, order, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), - metadata(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, 0, emptyList(), 0); + metadata(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, 0, emptyList(), 0L); } protected SignificantStringTerms buildEmptySignificantTermsAggregation(long subsetSize, SignificanceHeuristic significanceHeuristic) { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTerms.java index 74b5d3aaac603..439d4fb6e2df8 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTerms.java @@ -91,7 +91,7 @@ public int hashCode() { public DoubleTerms(String name, BucketOrder reduceOrder, BucketOrder order, int requiredSize, long minDocCount, Map metadata, DocValueFormat format, int shardSize, boolean showTermDocCountError, long otherDocCount, - List buckets, long docCountError) { + List buckets, Long docCountError) { super(name, reduceOrder, order, requiredSize, minDocCount, metadata, format, shardSize, showTermDocCountError, otherDocCount, buckets, docCountError); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index ea04db5a4d09e..7ca079f5aefdd 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -765,7 +765,7 @@ StringTerms buildResult(long owningBucketOrd, long otherDocCount, StringTerms.Bu } return new StringTerms(name, reduceOrder, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), metadata(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, - otherDocCount, Arrays.asList(topBuckets), 0); + otherDocCount, Arrays.asList(topBuckets), 0L); } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedTerms.java index 5328c8c142a08..f8ce3d324bc44 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedTerms.java @@ -8,6 +8,7 @@ package org.elasticsearch.search.aggregations.bucket.terms; +import org.elasticsearch.Version; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -32,11 +33,11 @@ public abstract class InternalMappedTerms, B exten protected final List buckets; protected Map bucketMap; - protected long docCountError; + protected Long docCountError; protected InternalMappedTerms(String name, BucketOrder reduceOrder, BucketOrder order, int requiredSize, long minDocCount, Map metadata, DocValueFormat format, int shardSize, - boolean showTermDocCountError, long otherDocCount, List buckets, long docCountError) { + boolean showTermDocCountError, long otherDocCount, List buckets, Long docCountError) { super(name, reduceOrder, order, requiredSize, minDocCount, metadata); this.format = format; this.shardSize = shardSize; @@ -51,7 +52,14 @@ protected InternalMappedTerms(String name, BucketOrder reduceOrder, BucketOrder */ protected InternalMappedTerms(StreamInput in, Bucket.Reader bucketReader) throws IOException { super(in); - docCountError = in.readZLong(); + if (in.getVersion().onOrAfter(Version.V_7_15_0)) { + docCountError = in.readOptionalLong(); + } else { + docCountError = in.readZLong(); + if (docCountError == 0) { + docCountError = null; + } + } format = in.readNamedWriteable(DocValueFormat.class); shardSize = readSize(in); showTermDocCountError = in.readBoolean(); @@ -61,7 +69,11 @@ protected InternalMappedTerms(StreamInput in, Bucket.Reader bucketReader) thr @Override protected final void writeTermTypeInfoTo(StreamOutput out) throws IOException { - out.writeZLong(docCountError); + if (out.getVersion().onOrAfter(Version.V_7_15_0)) { + out.writeOptionalLong(docCountError); + } else { + out.writeZLong(docCountError == null ? 0 : docCountError); + } out.writeNamedWriteable(format); writeSize(shardSize, out); out.writeBoolean(showTermDocCountError); @@ -80,7 +92,7 @@ protected int getShardSize() { } @Override - public long getDocCountError() { + public Long getDocCountError() { return docCountError; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTerms.java index 6f7eef36b94dc..e87de401c8b0f 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTerms.java @@ -103,7 +103,7 @@ public int hashCode() { public LongTerms(String name, BucketOrder reduceOrder, BucketOrder order, int requiredSize, long minDocCount, Map metadata, DocValueFormat format, int shardSize, boolean showTermDocCountError, long otherDocCount, - List buckets, long docCountError) { + List buckets, Long docCountError) { super(name, reduceOrder, order, requiredSize, minDocCount, metadata, format, shardSize, showTermDocCountError, otherDocCount, buckets, docCountError); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java index 7c704a75bb3d0..bb12d9049e65e 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java @@ -446,7 +446,7 @@ StringTerms buildResult(long owningBucketOrd, long otherDocCount, StringTerms.Bu } return new StringTerms(name, reduceOrder, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), metadata(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, - otherDocCount, Arrays.asList(topBuckets), 0); + otherDocCount, Arrays.asList(topBuckets), 0L); } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java index 49ec3e04e4196..5b07f65abc3f2 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java @@ -372,7 +372,7 @@ LongTerms buildResult(long owningBucketOrd, long otherDocCount, LongTerms.Bucket showTermDocCountError, otherDocCount, List.of(topBuckets), - 0 + 0L ); } @@ -390,7 +390,7 @@ LongTerms buildEmptyResult() { showTermDocCountError, 0, emptyList(), - 0 + 0L ); } } @@ -454,7 +454,7 @@ DoubleTerms buildResult(long owningBucketOrd, long otherDocCount, DoubleTerms.Bu showTermDocCountError, otherDocCount, List.of(topBuckets), - 0 + 0L ); } @@ -472,7 +472,7 @@ DoubleTerms buildEmptyResult() { showTermDocCountError, 0, emptyList(), - 0 + 0L ); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/ParsedTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/ParsedTerms.java index d258fb0fada58..6aa51a9a1b4ce 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/ParsedTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/ParsedTerms.java @@ -32,7 +32,7 @@ public abstract class ParsedTerms extends ParsedMultiBucketAggregation metadata, DocValueFormat format, int shardSize, boolean showTermDocCountError, long otherDocCount, - List buckets, long docCountError) { + List buckets, Long docCountError) { super(name, reduceOrder, order, requiredSize, minDocCount, metadata, format, shardSize, showTermDocCountError, otherDocCount, buckets, docCountError); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java index 510faeb2071cb..cde2e22ccf5f3 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java @@ -227,7 +227,7 @@ protected boolean lessThan(OrdBucket a, OrdBucket b) { showTermDocCountError, otherDocsCount, buckets, - 0 + 0L ); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/Terms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/Terms.java index 3a4d4f5ca9620..b4662b451449c 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/Terms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/Terms.java @@ -41,7 +41,7 @@ interface Bucket extends MultiBucketsAggregation.Bucket { /** * Get an upper bound of the error on document counts in this aggregation. */ - long getDocCountError(); + Long getDocCountError(); /** * Return the sum of the document counts of all buckets that did not make diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedTerms.java index 0ff8e9949f760..6376b8d161143 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedTerms.java @@ -110,8 +110,8 @@ protected int getShardSize() { } @Override - public long getDocCountError() { - return 0; + public Long getDocCountError() { + return 0L; } @Override diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/InternalAggregationsTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/InternalAggregationsTests.java index 9a568182c3b74..acb5ad0d4023e 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/InternalAggregationsTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/InternalAggregationsTests.java @@ -52,7 +52,7 @@ public void testReduceEmptyAggs() { public void testNonFinalReduceTopLevelPipelineAggs() { InternalAggregation terms = new StringTerms("name", BucketOrder.key(true), BucketOrder.key(true), - 10, 1, Collections.emptyMap(), DocValueFormat.RAW, 25, false, 10, Collections.emptyList(), 0); + 10, 1, Collections.emptyMap(), DocValueFormat.RAW, 25, false, 10, Collections.emptyList(), 0L); List aggs = singletonList(InternalAggregations.from(Collections.singletonList(terms))); InternalAggregations reducedAggs = InternalAggregations.topLevelReduce(aggs, maxBucketReduceContext().forPartialReduction()); assertEquals(1, reducedAggs.getTopLevelPipelineAggregators().size()); @@ -61,7 +61,7 @@ public void testNonFinalReduceTopLevelPipelineAggs() { public void testFinalReduceTopLevelPipelineAggs() { InternalAggregation terms = new StringTerms("name", BucketOrder.key(true), BucketOrder.key(true), - 10, 1, Collections.emptyMap(), DocValueFormat.RAW, 25, false, 10, Collections.emptyList(), 0); + 10, 1, Collections.emptyMap(), DocValueFormat.RAW, 25, false, 10, Collections.emptyList(), 0L); InternalAggregations aggs = InternalAggregations.from(Collections.singletonList(terms)); InternalAggregations reducedAggs = InternalAggregations.topLevelReduce(Collections.singletonList(aggs), diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/InternalMultiBucketAggregationTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/InternalMultiBucketAggregationTests.java index d2baf23c0ef1f..c41c6d7021077 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/InternalMultiBucketAggregationTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/InternalMultiBucketAggregationTests.java @@ -131,7 +131,7 @@ public void testResolveToSpecificBucket() { internalStringAggs, false, 0, DocValueFormat.RAW)); InternalTerms termsAgg = new StringTerms("string_terms", BucketOrder.count(false), BucketOrder.count(false), 1, 0, - Collections.emptyMap(), DocValueFormat.RAW, 1, false, 0, stringBuckets, 0); + Collections.emptyMap(), DocValueFormat.RAW, 1, false, 0, stringBuckets, 0L); InternalAggregations internalAggregations = InternalAggregations.from(Collections.singletonList(termsAgg)); LongTerms.Bucket bucket = new LongTerms.Bucket(19, 1, internalAggregations, false, 0, DocValueFormat.RAW); buckets.add(bucket); @@ -151,7 +151,7 @@ public void testResolveToMissingSpecificBucket() { internalStringAggs, false, 0, DocValueFormat.RAW)); InternalTerms termsAgg = new StringTerms("string_terms", BucketOrder.count(false), BucketOrder.count(false), 1, 0, - Collections.emptyMap(), DocValueFormat.RAW, 1, false, 0, stringBuckets, 0); + Collections.emptyMap(), DocValueFormat.RAW, 1, false, 0, stringBuckets, 0L); InternalAggregations internalAggregations = InternalAggregations.from(Collections.singletonList(termsAgg)); LongTerms.Bucket bucket = new LongTerms.Bucket(19, 1, internalAggregations, false, 0, DocValueFormat.RAW); buckets.add(bucket); diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/InternalMultiTerms.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/InternalMultiTerms.java index c833259749038..f2a6268f6e05d 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/InternalMultiTerms.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/InternalMultiTerms.java @@ -373,7 +373,7 @@ protected long getSumOfOtherDocCounts() { } @Override - protected long getDocCountError() { + protected Long getDocCountError() { return docCountError; } diff --git a/x-pack/plugin/async-search/src/test/java/org/elasticsearch/xpack/search/AsyncSearchTaskTests.java b/x-pack/plugin/async-search/src/test/java/org/elasticsearch/xpack/search/AsyncSearchTaskTests.java index da12dec4d55f6..ae2000cd54782 100644 --- a/x-pack/plugin/async-search/src/test/java/org/elasticsearch/xpack/search/AsyncSearchTaskTests.java +++ b/x-pack/plugin/async-search/src/test/java/org/elasticsearch/xpack/search/AsyncSearchTaskTests.java @@ -158,7 +158,7 @@ public void testWithFailureAndGetResponseFailureDuringReduction() throws Interru task.getSearchProgressActionListener().onListShards(Collections.emptyList(), Collections.emptyList(), SearchResponse.Clusters.EMPTY, false); InternalAggregations aggs = InternalAggregations.from(Collections.singletonList(new StringTerms("name", BucketOrder.key(true), - BucketOrder.key(true), 1, 1, Collections.emptyMap(), DocValueFormat.RAW, 1, false, 1, Collections.emptyList(), 0))); + BucketOrder.key(true), 1, 1, Collections.emptyMap(), DocValueFormat.RAW, 1, false, 1, Collections.emptyList(), 0L))); task.getSearchProgressActionListener().onPartialReduce(Collections.emptyList(), new TotalHits(0, TotalHits.Relation.EQUAL_TO), aggs, 1); task.getSearchProgressActionListener().onFailure(new CircuitBreakingException("boom", CircuitBreaker.Durability.TRANSIENT)); From 5db7196409fcee62a73a5b1eae36e2ac824106b7 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Thu, 12 Aug 2021 11:50:17 -1000 Subject: [PATCH 2/2] Fix docCountError calculation for multiple reduces (#76391) Fix docCountError calculation in case of multiple reduces. It fixes 2 mistakes in #43874. The first error was introduced in the original PR, where unknown doc count errors were initialized equal to 0, the second was introduced during in order to fix the first one by ignoring these 0s, which essentially disabled the original fix. Fixes #75667 --- .../aggregations/TermsReduceBenchmark.java | 2 +- .../terms/StringTermsSerializationBenchmark.java | 2 +- .../bucket/terms/AbstractInternalTerms.java | 4 ++-- .../GlobalOrdinalsStringTermsAggregator.java | 2 +- .../bucket/terms/InternalMappedTerms.java | 16 +++++++++++----- .../bucket/terms/MapStringTermsAggregator.java | 2 +- .../bucket/terms/NumericTermsAggregator.java | 4 ++-- .../terms/StringTermsAggregatorFromFilters.java | 2 +- .../aggregations/bucket/terms/UnmappedTerms.java | 2 +- 9 files changed, 21 insertions(+), 15 deletions(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/TermsReduceBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/TermsReduceBenchmark.java index ab3a701db77f6..ea5134410e2d4 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/TermsReduceBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/TermsReduceBenchmark.java @@ -154,7 +154,7 @@ private StringTerms newTerms(Random rand, BytesRef[] dict, boolean withNested) { true, 0, buckets, - 0L + null ); } diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/bucket/terms/StringTermsSerializationBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/bucket/terms/StringTermsSerializationBenchmark.java index 86826a2ca73de..54167db695f6f 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/bucket/terms/StringTermsSerializationBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/bucket/terms/StringTermsSerializationBenchmark.java @@ -72,7 +72,7 @@ private StringTerms newTerms(boolean withNested) { false, 100000, resultBuckets, - 0L + null ); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractInternalTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractInternalTerms.java index 89707fcb06e3b..b5363e2c2c8a8 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractInternalTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractInternalTerms.java @@ -133,7 +133,7 @@ private long getDocCountError(A terms) { if (size == 0 || size < terms.getShardSize() || isKeyOrder(terms.getOrder())) { return 0; } else if (InternalOrder.isCountDesc(terms.getOrder())) { - if (terms.getDocCountError() != null && terms.getDocCountError() > 0) { + if (terms.getDocCountError() != null) { // If there is an existing docCountError for this agg then // use this as the error for this aggregation return terms.getDocCountError(); @@ -340,7 +340,7 @@ public InternalAggregation reduce(List aggregations, Intern protected static XContentBuilder doXContentCommon(XContentBuilder builder, Params params, - long docCountError, + Long docCountError, long otherDocCount, List buckets) throws IOException { builder.field(DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME.getPreferredName(), docCountError); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 7ca079f5aefdd..e4d44114166ca 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -765,7 +765,7 @@ StringTerms buildResult(long owningBucketOrd, long otherDocCount, StringTerms.Bu } return new StringTerms(name, reduceOrder, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), metadata(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, - otherDocCount, Arrays.asList(topBuckets), 0L); + otherDocCount, Arrays.asList(topBuckets), null); } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedTerms.java index f8ce3d324bc44..b9a519c8f7f6b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedTerms.java @@ -53,12 +53,13 @@ protected InternalMappedTerms(String name, BucketOrder reduceOrder, BucketOrder protected InternalMappedTerms(StreamInput in, Bucket.Reader bucketReader) throws IOException { super(in); if (in.getVersion().onOrAfter(Version.V_7_15_0)) { - docCountError = in.readOptionalLong(); - } else { - docCountError = in.readZLong(); - if (docCountError == 0) { + if (in.readBoolean()) { + docCountError = in.readZLong(); + } else { docCountError = null; } + } else { + docCountError = in.readZLong(); } format = in.readNamedWriteable(DocValueFormat.class); shardSize = readSize(in); @@ -70,7 +71,12 @@ protected InternalMappedTerms(StreamInput in, Bucket.Reader bucketReader) thr @Override protected final void writeTermTypeInfoTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_7_15_0)) { - out.writeOptionalLong(docCountError); + if (docCountError != null) { + out.writeBoolean(true); + out.writeZLong(docCountError); + } else { + out.writeBoolean(false); + } } else { out.writeZLong(docCountError == null ? 0 : docCountError); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java index bb12d9049e65e..a7911bcdc3590 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java @@ -446,7 +446,7 @@ StringTerms buildResult(long owningBucketOrd, long otherDocCount, StringTerms.Bu } return new StringTerms(name, reduceOrder, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), metadata(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, - otherDocCount, Arrays.asList(topBuckets), 0L); + otherDocCount, Arrays.asList(topBuckets), null); } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java index 5b07f65abc3f2..38338bc8e5668 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java @@ -372,7 +372,7 @@ LongTerms buildResult(long owningBucketOrd, long otherDocCount, LongTerms.Bucket showTermDocCountError, otherDocCount, List.of(topBuckets), - 0L + null ); } @@ -454,7 +454,7 @@ DoubleTerms buildResult(long owningBucketOrd, long otherDocCount, DoubleTerms.Bu showTermDocCountError, otherDocCount, List.of(topBuckets), - 0L + null ); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java index cde2e22ccf5f3..6acd90a910547 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java @@ -227,7 +227,7 @@ protected boolean lessThan(OrdBucket a, OrdBucket b) { showTermDocCountError, otherDocsCount, buckets, - 0L + null ); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedTerms.java index 6376b8d161143..a87680c215629 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedTerms.java @@ -97,7 +97,7 @@ public boolean isMapped() { @Override public final XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException { - return doXContentCommon(builder, params, 0, 0, Collections.emptyList()); + return doXContentCommon(builder, params, 0L, 0, Collections.emptyList()); } @Override