From a17d99ddb2da79a89153c410e1a3dcc70120e2a8 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Thu, 24 Jan 2019 14:15:34 +0100 Subject: [PATCH 1/2] initial --- .../test/search.aggregation/20_terms.yml | 71 +++++++++++++++++++ .../bucket/terms/TermsAggregatorFactory.java | 21 ++++-- .../bucket/terms/TermsAggregatorTests.java | 26 +++---- 3 files changed, 101 insertions(+), 17 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml index 379824211794a..54307dd92e957 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml @@ -737,3 +737,74 @@ setup: - is_false: aggregations.str_terms.buckets.1.key_as_string - match: { aggregations.str_terms.buckets.1.doc_count: 2 } + +--- +"Global ordinals are not loaded with the map execution hint": + + - skip: + version: " - 6.99.99" + reason: bug fixed in 7.0 + + - do: + index: + refresh: true + index: test_1 + id: 1 + routing: 1 + body: { "str": "abc" } + + - do: + index: + refresh: true + index: test_1 + id: 2 + routing: 1 + body: { "str": "abc" } + + - do: + index: + refresh: true + index: test_1 + id: 3 + routing: 1 + body: { "str": "bcd" } + + - do: + indices.refresh: {} + + - do: + search: + index: test_1 + body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str", "execution_hint" : "map" } } } } + + - match: { hits.total.value: 3} + - length: { aggregations.str_terms.buckets: 2 } + + - do: + indices.stats: + index: test_1 + metric: fielddata + fielddata_fields: str + + - match: { indices.test_1.total.fielddata.memory_size_in_bytes: 0} + + - do: + search: + index: test_1 + body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str", "execution_hint" : "global_ordinals" } } } } + + - match: { hits.total.value: 3} + - length: { aggregations.str_terms.buckets: 2 } + + - do: + indices.stats: + index: test_1 + metric: fielddata + fielddata_fields: str + + - gt: { indices.test_1.total.fielddata.memory_size_in_bytes: 0} + + + + + diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index 1ff0efd3e8307..346da32763bd8 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.aggregations.bucket.terms; import org.apache.logging.log4j.LogManager; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.IndexSearcher; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.logging.DeprecationLogger; @@ -133,7 +134,7 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals == false) { execution = ExecutionMode.MAP; } - final long maxOrd = getMaxOrd(valuesSource, context.searcher()); + final long maxOrd = getMaxOrd(context.searcher(), valuesSource, execution); if (execution == null) { execution = ExecutionMode.GLOBAL_ORDINALS; } @@ -207,13 +208,23 @@ static SubAggCollectionMode subAggCollectionMode(int expectedSize, long maxOrd) } /** - * Get the maximum global ordinal value for the provided {@link ValuesSource} or -1 + * Get the maximum ordinal value for the provided {@link ValuesSource} or -1 * if the values source is not an instance of {@link ValuesSource.Bytes.WithOrdinals}. */ - static long getMaxOrd(ValuesSource source, IndexSearcher searcher) throws IOException { + static long getMaxOrd(IndexSearcher searcher, ValuesSource source, ExecutionMode executionMode) throws IOException { if (source instanceof ValuesSource.Bytes.WithOrdinals) { ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) source; - return valueSourceWithOrdinals.globalMaxOrd(searcher); + if (executionMode == ExecutionMode.MAP) { + // global ordinals are not requested so we don't load them + // and return the biggest cardinality per segment instead. + long maxOrd = -1; + for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) { + maxOrd = Math.max(maxOrd, valueSourceWithOrdinals.ordinalsValues(leaf).getValueCount()); + } + return maxOrd; + } else { + return valueSourceWithOrdinals.globalMaxOrd(searcher); + } } else { return -1; } @@ -258,7 +269,7 @@ Aggregator create(String name, List pipelineAggregators, Map metaData) throws IOException { - final long maxOrd = getMaxOrd(valuesSource, context.searcher()); + final long maxOrd = getMaxOrd(context.searcher(), valuesSource, ExecutionMode.GLOBAL_ORDINALS); assert maxOrd != -1; final double ratio = maxOrd / ((double) context.searcher().getIndexReader().numDocs()); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java index 8acf78d301af5..909477d980070 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java @@ -90,6 +90,7 @@ import java.util.function.Function; import static org.elasticsearch.index.mapper.SeqNoFieldMapper.PRIMARY_TERM_NAME; +import static org.elasticsearch.mock.orig.Mockito.when; import static org.elasticsearch.search.aggregations.AggregationBuilders.terms; import static org.elasticsearch.search.aggregations.PipelineAggregatorBuilders.bucketScript; import static org.hamcrest.Matchers.equalTo; @@ -154,35 +155,35 @@ public void testGlobalOrdinalsExecutionHint() throws Exception { directory.close(); } - public void testSimple() throws Exception { + public void testSimpleKeyword() throws Exception { try (Directory directory = newDirectory()) { try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { Document document = new Document(); - document.add(new SortedSetDocValuesField("string", new BytesRef("a"))); - document.add(new SortedSetDocValuesField("string", new BytesRef("b"))); + document.add(new SortedSetDocValuesField("keyword", new BytesRef("a"))); + document.add(new SortedSetDocValuesField("keyword", new BytesRef("b"))); indexWriter.addDocument(document); document = new Document(); - document.add(new SortedSetDocValuesField("string", new BytesRef(""))); - document.add(new SortedSetDocValuesField("string", new BytesRef("c"))); - document.add(new SortedSetDocValuesField("string", new BytesRef("a"))); + document.add(new SortedSetDocValuesField("keyword", new BytesRef(""))); + document.add(new SortedSetDocValuesField("keyword", new BytesRef("c"))); + document.add(new SortedSetDocValuesField("keyword", new BytesRef("a"))); indexWriter.addDocument(document); document = new Document(); - document.add(new SortedSetDocValuesField("string", new BytesRef("b"))); - document.add(new SortedSetDocValuesField("string", new BytesRef("d"))); + document.add(new SortedSetDocValuesField("keyword", new BytesRef("b"))); + document.add(new SortedSetDocValuesField("keyword", new BytesRef("d"))); indexWriter.addDocument(document); document = new Document(); - document.add(new SortedSetDocValuesField("string", new BytesRef(""))); + document.add(new SortedSetDocValuesField("keyword", new BytesRef(""))); indexWriter.addDocument(document); try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) { IndexSearcher indexSearcher = newIndexSearcher(indexReader); for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) { TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name", ValueType.STRING) .executionHint(executionMode.toString()) - .field("string") + .field("keyword") .order(BucketOrder.key(true)); MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType(); - fieldType.setName("string"); - fieldType.setHasDocValues(true ); + fieldType.setName("keyword"); + fieldType.setHasDocValues(true); TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType); aggregator.preCollection(); @@ -682,6 +683,7 @@ private void termsAggregator(ValueType valueType, MappedFieldType fieldType, .collectMode(randomFrom(Aggregator.SubAggCollectionMode.values())) .field("field")); aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType); + fieldType.fielddataBuilder() aggregator.preCollection(); indexSearcher.search(new MatchAllDocsQuery(), aggregator); aggregator.postCollection(); From be373dbd7e0a112c06bf13d9d6917451d46518fe Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Thu, 24 Jan 2019 21:16:26 +0100 Subject: [PATCH 2/2] remove unrelated change --- .../bucket/terms/TermsAggregatorTests.java | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java index 909477d980070..8acf78d301af5 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java @@ -90,7 +90,6 @@ import java.util.function.Function; import static org.elasticsearch.index.mapper.SeqNoFieldMapper.PRIMARY_TERM_NAME; -import static org.elasticsearch.mock.orig.Mockito.when; import static org.elasticsearch.search.aggregations.AggregationBuilders.terms; import static org.elasticsearch.search.aggregations.PipelineAggregatorBuilders.bucketScript; import static org.hamcrest.Matchers.equalTo; @@ -155,35 +154,35 @@ public void testGlobalOrdinalsExecutionHint() throws Exception { directory.close(); } - public void testSimpleKeyword() throws Exception { + public void testSimple() throws Exception { try (Directory directory = newDirectory()) { try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { Document document = new Document(); - document.add(new SortedSetDocValuesField("keyword", new BytesRef("a"))); - document.add(new SortedSetDocValuesField("keyword", new BytesRef("b"))); + document.add(new SortedSetDocValuesField("string", new BytesRef("a"))); + document.add(new SortedSetDocValuesField("string", new BytesRef("b"))); indexWriter.addDocument(document); document = new Document(); - document.add(new SortedSetDocValuesField("keyword", new BytesRef(""))); - document.add(new SortedSetDocValuesField("keyword", new BytesRef("c"))); - document.add(new SortedSetDocValuesField("keyword", new BytesRef("a"))); + document.add(new SortedSetDocValuesField("string", new BytesRef(""))); + document.add(new SortedSetDocValuesField("string", new BytesRef("c"))); + document.add(new SortedSetDocValuesField("string", new BytesRef("a"))); indexWriter.addDocument(document); document = new Document(); - document.add(new SortedSetDocValuesField("keyword", new BytesRef("b"))); - document.add(new SortedSetDocValuesField("keyword", new BytesRef("d"))); + document.add(new SortedSetDocValuesField("string", new BytesRef("b"))); + document.add(new SortedSetDocValuesField("string", new BytesRef("d"))); indexWriter.addDocument(document); document = new Document(); - document.add(new SortedSetDocValuesField("keyword", new BytesRef(""))); + document.add(new SortedSetDocValuesField("string", new BytesRef(""))); indexWriter.addDocument(document); try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) { IndexSearcher indexSearcher = newIndexSearcher(indexReader); for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) { TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name", ValueType.STRING) .executionHint(executionMode.toString()) - .field("keyword") + .field("string") .order(BucketOrder.key(true)); MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType(); - fieldType.setName("keyword"); - fieldType.setHasDocValues(true); + fieldType.setName("string"); + fieldType.setHasDocValues(true ); TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType); aggregator.preCollection(); @@ -683,7 +682,6 @@ private void termsAggregator(ValueType valueType, MappedFieldType fieldType, .collectMode(randomFrom(Aggregator.SubAggCollectionMode.values())) .field("field")); aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType); - fieldType.fielddataBuilder() aggregator.preCollection(); indexSearcher.search(new MatchAllDocsQuery(), aggregator); aggregator.postCollection();