diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml index 5f6060da7b695..645be9418258f 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml @@ -567,6 +567,58 @@ setup: date: type: date + - do: + bulk: + index: test_2 + refresh: true + body: + - '{"index": {}}' + - '{"date": "2000-01-01"}' # This date is intenationally very far in the past so we end up not being able to use the date_histo -> range -> filters optimization + - '{"index": {}}' + - '{"date": "2000-01-02"}' + - '{"index": {}}' + - '{"date": "2016-02-01"}' + - '{"index": {}}' + - '{"date": "2016-03-01"}' + + - do: + search: + index: test_2 + body: + size: 0 + profile: true + aggs: + histo: + date_histogram: + field: date + calendar_interval: month + - match: { hits.total.value: 4 } + - length: { aggregations.histo.buckets: 195 } + - match: { aggregations.histo.buckets.0.key_as_string: "2000-01-01T00:00:00.000Z" } + - match: { aggregations.histo.buckets.0.doc_count: 2 } + - match: { profile.shards.0.aggregations.0.type: DateHistogramAggregator } + - match: { profile.shards.0.aggregations.0.description: histo } + - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 } + - match: { profile.shards.0.aggregations.0.debug.total_buckets: 3 } + +--- +"date_histogram run as filters profiler": + - skip: + version: " - 7.99.99" + reason: optimization added in 7.11.0, backport pending + + - do: + indices.create: + index: test_2 + body: + settings: + number_of_replicas: 0 + number_of_shards: 1 + mappings: + properties: + date: + type: date + - do: bulk: index: test_2 @@ -596,10 +648,13 @@ setup: - length: { aggregations.histo.buckets: 3 } - match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" } - match: { aggregations.histo.buckets.0.doc_count: 2 } - - match: { profile.shards.0.aggregations.0.type: DateHistogramAggregator } + - match: { profile.shards.0.aggregations.0.type: DateHistogramAggregator.FromDateRange } - match: { profile.shards.0.aggregations.0.description: histo } - - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 } - - match: { profile.shards.0.aggregations.0.debug.total_buckets: 3 } + # ultimately this ends up as a filters agg that uses filter by filter collection which is tracked in build_leaf_collector + - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 0 } + - match: { profile.shards.0.aggregations.0.debug.delegate: RangeAggregator.FromFilters } + - match: { profile.shards.0.aggregations.0.debug.delegate_debug.delegate: FiltersAggregator.FilterByFilter } + - match: { profile.shards.0.aggregations.0.debug.delegate_debug.delegate_debug.segments_with_deleted_docs: 0 } --- "histogram with hard bounds": diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/search/TransportSearchIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/search/TransportSearchIT.java index 8dc72a17c82ed..6fa8b7cd58d73 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/search/TransportSearchIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/search/TransportSearchIT.java @@ -593,5 +593,10 @@ public ScoreMode scoreMode() { @Override public void preCollection() throws IOException {} + + @Override + public Aggregator[] subAggregators() { + throw new UnsupportedOperationException(); + } } } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java index ffa7000d567d7..4f24018a59b16 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java @@ -39,10 +39,10 @@ import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval; -import org.elasticsearch.search.aggregations.bucket.histogram.LongBounds; import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; import org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket; import org.elasticsearch.search.aggregations.bucket.histogram.InternalDateHistogram; +import org.elasticsearch.search.aggregations.bucket.histogram.LongBounds; import org.elasticsearch.search.aggregations.metrics.Avg; import org.elasticsearch.search.aggregations.metrics.Sum; import org.elasticsearch.test.ESIntegTestCase; diff --git a/server/src/main/java/org/elasticsearch/common/Rounding.java b/server/src/main/java/org/elasticsearch/common/Rounding.java index 1cf10cf346388..5a33ca29698af 100644 --- a/server/src/main/java/org/elasticsearch/common/Rounding.java +++ b/server/src/main/java/org/elasticsearch/common/Rounding.java @@ -292,6 +292,13 @@ public interface Prepared { * next rounded value in specified units if possible. */ double roundingSize(long utcMillis, DateTimeUnit timeUnit); + /** + * If this rounding mechanism precalculates rounding points then + * this array stores dates such that each date between each entry. + * if the rounding mechanism doesn't precalculate points then this + * is {@code null}. + */ + long[] fixedRoundingPoints(); } /** * Prepare to round many times. @@ -436,6 +443,11 @@ protected Prepared maybeUseArray(long minUtcMillis, long maxUtcMillis, int max) } return new ArrayRounding(values, i, this); } + + @Override + public long[] fixedRoundingPoints() { + return null; + } } static class TimeUnitRounding extends Rounding { @@ -1267,6 +1279,12 @@ public long nextRoundingValue(long utcMillis) { public double roundingSize(long utcMillis, DateTimeUnit timeUnit) { return delegatePrepared.roundingSize(utcMillis, timeUnit); } + + @Override + public long[] fixedRoundingPoints() { + // TODO we can likely translate here + return null; + } }; } @@ -1349,5 +1367,10 @@ public long nextRoundingValue(long utcMillis) { public double roundingSize(long utcMillis, DateTimeUnit timeUnit) { return delegate.roundingSize(utcMillis, timeUnit); } + + @Override + public long[] fixedRoundingPoints() { + return Arrays.copyOf(values, max); + } } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java new file mode 100644 index 0000000000000..832ecce4ed7eb --- /dev/null +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java @@ -0,0 +1,130 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.ScoreMode; +import org.elasticsearch.common.CheckedFunction; +import org.elasticsearch.search.profile.aggregation.InternalAggregationProfileTree; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.function.BiConsumer; + +/** + * An {@linkplain Aggregator} that delegates collection to another + * {@linkplain Aggregator} and then translates its results into the results + * you'd expect from another aggregation. + */ +public abstract class AdaptingAggregator extends Aggregator { + private final Aggregator parent; + private final Aggregator delegate; + + public AdaptingAggregator( + Aggregator parent, + AggregatorFactories subAggregators, + CheckedFunction delegate + ) throws IOException { + // Its important we set parent first or else when we build the sub-aggregators they can fail because they'll call this.parent. + this.parent = parent; + /* + * Lock the parent of the sub-aggregators to *this* instead of to + * the delegate. This keeps the parent link shaped like the requested + * agg tree. Thisis how it has always been and some aggs rely on it. + */ + this.delegate = delegate.apply(subAggregators.fixParent(this)); + assert this.delegate.parent() == parent : "invalid parent set on delegate"; + } + + /** + * Adapt the result from the collecting {@linkplain Aggregator} into the + * result expected by this {@linkplain Aggregator}. + */ + protected abstract InternalAggregation adapt(InternalAggregation delegateResult); + + @Override + public final void close() { + delegate.close(); + } + + @Override + public final ScoreMode scoreMode() { + return delegate.scoreMode(); + } + + @Override + public final String name() { + return delegate.name(); + } + + @Override + public final Aggregator parent() { + return parent; + } + + @Override + public final Aggregator subAggregator(String name) { + return delegate.subAggregator(name); + } + + @Override + public final LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException { + return delegate.getLeafCollector(ctx); + } + + @Override + public final void preCollection() throws IOException { + delegate.preCollection(); + } + + @Override + public final InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + InternalAggregation[] delegateResults = delegate.buildAggregations(owningBucketOrds); + InternalAggregation[] result = new InternalAggregation[owningBucketOrds.length]; + for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { + result[ordIdx] = adapt(delegateResults[ordIdx]); + } + return result; + } + + @Override + public final InternalAggregation buildEmptyAggregation() { + return adapt(delegate.buildEmptyAggregation()); + } + + @Override + public final Aggregator[] subAggregators() { + return delegate.subAggregators(); + } + + @Override + public void collectDebugInfo(BiConsumer add) { + super.collectDebugInfo(add); + add.accept("delegate", InternalAggregationProfileTree.typeFromAggregator(delegate)); + Map delegateDebug = new HashMap<>(); + delegate.collectDebugInfo(delegateDebug::put); + add.accept("delegate_debug", delegateDebug); + } + + public Aggregator delegate() { + return delegate; + } +} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java index 5e232829aaa7e..5c58b3ac0414b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java @@ -172,6 +172,11 @@ public final InternalAggregation buildTopLevel() throws IOException { */ public void collectDebugInfo(BiConsumer add) {} + /** + * Get the aggregators running under this one. + */ + public abstract Aggregator[] subAggregators(); + /** Aggregation mode for sub aggregations. */ public enum SubAggCollectionMode implements Writeable { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java index be9c81b5759b9..5cbb3225c17a4 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java @@ -224,6 +224,7 @@ public Aggregator parent() { return parent; } + @Override public Aggregator[] subAggregators() { return subAggregators; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorFactories.java b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorFactories.java index ffba5ca28fea5..9f9d090515a8a 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorFactories.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorFactories.java @@ -227,6 +227,27 @@ public int countAggregators() { return factories.length; } + /** + * This returns a copy of {@link AggregatorFactories} modified so that + * calls to {@link #createSubAggregators} will ignore the provided parent + * aggregator and always use {@code fixedParent} provided in to this + * method. + *

+ * {@link AdaptingAggregator} uses this to make sure that sub-aggregators + * get the {@link AdaptingAggregator} aggregator itself as the parent. + */ + public AggregatorFactories fixParent(Aggregator fixedParent) { + AggregatorFactories previous = this; + return new AggregatorFactories(factories) { + @Override + public Aggregator[] createSubAggregators(SearchContext searchContext, Aggregator parent, CardinalityUpperBound cardinality) + throws IOException { + // Note that we're throwing out the "parent" passed in to this method and using the parent passed to fixParent + return previous.createSubAggregators(searchContext, fixedParent, cardinality); + } + }; + } + /** * A mutable collection of {@link AggregationBuilder}s and * {@link PipelineAggregationBuilder}s. diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DeferringBucketCollector.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DeferringBucketCollector.java index e75b7f0c308f0..3d0bb2bec1e23 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DeferringBucketCollector.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DeferringBucketCollector.java @@ -119,6 +119,11 @@ public Aggregator resolveSortPath(PathElement next, Iterator path) public BucketComparator bucketComparator(String key, SortOrder order) { throw new UnsupportedOperationException("Can't sort on deferred aggregations"); } + + @Override + public Aggregator[] subAggregators() { + return in.subAggregators(); + } } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index 4687ccd323d0b..ff710b99f6f23 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -20,6 +20,17 @@ package org.elasticsearch.search.aggregations.bucket.filter; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BulkScorer; +import org.apache.lucene.search.CollectionTerminatedException; +import org.apache.lucene.search.IndexOrDocValuesQuery; +import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.PointRangeQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; import org.elasticsearch.common.ParseField; @@ -45,9 +56,16 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.function.Supplier; +import java.util.function.BiConsumer; -public class FiltersAggregator extends BucketsAggregator { +/** + * Aggregator for {@code filters}. There are two known subclasses, + * {@link FilterByFilter} which is fast but only works in some cases and + * {@link Compatible} which works in all cases. + * {@link FiltersAggregator#build} will build the fastest version that + * works with the configuration. + */ +public abstract class FiltersAggregator extends BucketsAggregator { public static final ParseField FILTERS_FIELD = new ParseField("filters"); public static final ParseField OTHER_BUCKET_FIELD = new ParseField("other_bucket"); @@ -115,58 +133,110 @@ public boolean equals(Object obj) { } } + /** + * Build an {@link Aggregator} for a {@code filters} aggregation. If there + * isn't a parent, there aren't children, and we don't collect "other" + * buckets then this will a faster {@link FilterByFilter} aggregator. + * Otherwise it'll fall back to a slower aggregator that is + * {@link Compatible} with parent, children, and "other" buckets. + */ + public static FiltersAggregator build( + String name, + AggregatorFactories factories, + String[] keys, + Query[] filters, + boolean keyed, + String otherBucketKey, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + FiltersAggregator filterOrder = buildFilterOrderOrNull( + name, + factories, + keys, + filters, + keyed, + otherBucketKey, + context, + parent, + cardinality, + metadata + ); + if (filterOrder != null) { + return filterOrder; + } + return new FiltersAggregator.Compatible( + name, + factories, + keys, + filters, + keyed, + otherBucketKey, + context, + parent, + cardinality, + metadata + ); + } + + /** + * Build an {@link Aggregator} for a {@code filters} aggregation if we + * can collect {@link FilterByFilter}, otherwise return {@code null}. We can + * collect filter by filter if there isn't a parent, there aren't children, + * and we don't collect "other" buckets. Collecting {@link FilterByFilter} + * is generally going to be much faster than the {@link Compatible} aggregator. + */ + public static FiltersAggregator buildFilterOrderOrNull( + String name, + AggregatorFactories factories, + String[] keys, + Query[] filters, + boolean keyed, + String otherBucketKey, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + if (parent != null) { + return null; + } + if (factories.countAggregators() != 0) { + return null; + } + if (otherBucketKey != null) { + return null; + } + return new FiltersAggregator.FilterByFilter( + name, + keys, + filters, + keyed, + context, + parent, + cardinality, + metadata + ); + } + private final String[] keys; - private Supplier filters; private final boolean keyed; - private final boolean showOtherBucket; - private final String otherBucketKey; - private final int totalNumKeys; + protected final String otherBucketKey; - public FiltersAggregator(String name, AggregatorFactories factories, String[] keys, Supplier filters, boolean keyed, + private FiltersAggregator(String name, AggregatorFactories factories, String[] keys, boolean keyed, String otherBucketKey, SearchContext context, Aggregator parent, CardinalityUpperBound cardinality, Map metadata) throws IOException { super(name, factories, context, parent, cardinality.multiply(keys.length + (otherBucketKey == null ? 0 : 1)), metadata); this.keyed = keyed; this.keys = keys; - this.filters = filters; - this.showOtherBucket = otherBucketKey != null; this.otherBucketKey = otherBucketKey; - if (showOtherBucket) { - this.totalNumKeys = keys.length + 1; - } else { - this.totalNumKeys = keys.length; - } - } - - @Override - public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, - final LeafBucketCollector sub) throws IOException { - // no need to provide deleted docs to the filter - Weight[] filters = this.filters.get(); - final Bits[] bits = new Bits[filters.length]; - for (int i = 0; i < filters.length; ++i) { - bits[i] = Lucene.asSequentialAccessBits(ctx.reader().maxDoc(), filters[i].scorerSupplier(ctx)); - } - return new LeafBucketCollectorBase(sub, null) { - @Override - public void collect(int doc, long bucket) throws IOException { - boolean matched = false; - for (int i = 0; i < bits.length; i++) { - if (bits[i].get(doc)) { - collectBucket(sub, doc, bucketOrd(bucket, i)); - matched = true; - } - } - if (showOtherBucket && !matched) { - collectBucket(sub, doc, bucketOrd(bucket, bits.length)); - } - } - }; } @Override public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { - return buildAggregationsForFixedBucketCount(owningBucketOrds, keys.length + (showOtherBucket ? 1 : 0), + return buildAggregationsForFixedBucketCount(owningBucketOrds, keys.length + (otherBucketKey == null ? 0 : 1), (offsetInOwningOrd, docCount, subAggregationResults) -> { if (offsetInOwningOrd < keys.length) { return new InternalFilters.InternalBucket(keys[offsetInOwningOrd], docCount, @@ -185,7 +255,7 @@ public InternalAggregation buildEmptyAggregation() { buckets.add(bucket); } - if (showOtherBucket) { + if (otherBucketKey != null) { InternalFilters.InternalBucket bucket = new InternalFilters.InternalBucket(otherBucketKey, 0, subAggs, keyed); buckets.add(bucket); } @@ -193,8 +263,174 @@ public InternalAggregation buildEmptyAggregation() { return new InternalFilters(name, buckets, keyed, metadata()); } - final long bucketOrd(long owningBucketOrdinal, int filterOrd) { - return owningBucketOrdinal * totalNumKeys + filterOrd; + /** + * Collects results by running each filter against the searcher and doesn't + * build any {@link LeafBucketCollector}s which is generally faster than + * {@link Compatible} but doesn't support when there is a parent aggregator + * or any child aggregators. + */ + private static class FilterByFilter extends FiltersAggregator { + private final Query[] filters; + private Weight[] filterWeights; + private int segmentsWithDeletedDocs; + + FilterByFilter( + String name, + String[] keys, + Query[] filters, + boolean keyed, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + super(name, AggregatorFactories.EMPTY, keys, keyed, null, context, parent, cardinality, metadata); + this.filters = filters; + } + + /** + * Instead of returning a {@link LeafBucketCollector} we do the + * collection ourselves by running the filters directly. This is safe + * because we only use this aggregator if there isn't a {@code parent} + * which would change how we collect buckets and because we take the + * top level query into account when building the filters. + */ + @Override + protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { + if (filterWeights == null) { + filterWeights = buildWeights(topLevelQuery(), filters); + } + Bits live = ctx.reader().getLiveDocs(); + for (int filterOrd = 0; filterOrd < filters.length; filterOrd++) { + BulkScorer scorer = filterWeights[filterOrd].bulkScorer(ctx); + if (scorer == null) { + // the filter doesn't match any docs + continue; + } + TotalHitCountCollector collector = new TotalHitCountCollector(); + scorer.score(collector, live); + incrementBucketDocCount(filterOrd, collector.getTotalHits()); + } + // Throwing this exception is how we communicate to the collection mechanism that we don't need the segment. + throw new CollectionTerminatedException(); + } + + @Override + public void collectDebugInfo(BiConsumer add) { + super.collectDebugInfo(add); + add.accept("segments_with_deleted_docs", segmentsWithDeletedDocs); + } + } + + /** + * Collects results by building a {@link Bits} per filter and testing if + * each doc sent to its {@link LeafBucketCollector} is in each filter + * which is generally slower than {@link FilterByFilter} but is compatible + * with parent and child aggregations. + */ + private static class Compatible extends FiltersAggregator { + private final Query[] filters; + private Weight[] filterWeights; + + private final int totalNumKeys; + + Compatible( + String name, + AggregatorFactories factories, + String[] keys, + Query[] filters, + boolean keyed, + String otherBucketKey, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + super(name, factories, keys, keyed, otherBucketKey, context, parent, cardinality, metadata); + this.filters = filters; + if (otherBucketKey == null) { + this.totalNumKeys = keys.length; + } else { + this.totalNumKeys = keys.length + 1; + } + } + + @Override + protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { + if (filterWeights == null) { + filterWeights = buildWeights(new MatchAllDocsQuery(), filters); + } + final Bits[] bits = new Bits[filters.length]; + for (int i = 0; i < filters.length; ++i) { + bits[i] = Lucene.asSequentialAccessBits(ctx.reader().maxDoc(), filterWeights[i].scorerSupplier(ctx)); + } + return new LeafBucketCollectorBase(sub, null) { + @Override + public void collect(int doc, long bucket) throws IOException { + boolean matched = false; + for (int i = 0; i < bits.length; i++) { + if (bits[i].get(doc)) { + collectBucket(sub, doc, bucketOrd(bucket, i)); + matched = true; + } + } + if (otherBucketKey != null && false == matched) { + collectBucket(sub, doc, bucketOrd(bucket, bits.length)); + } + } + }; + } + + final long bucketOrd(long owningBucketOrdinal, int filterOrd) { + return owningBucketOrdinal * totalNumKeys + filterOrd; + } + } + + protected Weight[] buildWeights(Query topLevelQuery, Query filters[]) throws IOException{ + Weight[] weights = new Weight[filters.length]; + for (int i = 0; i < filters.length; ++i) { + Query filter = filterMatchingBoth(topLevelQuery, filters[i]); + weights[i] = searcher().createWeight(searcher().rewrite(filter), ScoreMode.COMPLETE_NO_SCORES, 1); + } + return weights; } + /** + * Make a filter that matches both queries, merging the + * {@link PointRangeQuery}s together if possible. The "merging together" + * part is provides a fairly substantial speed boost then executing a + * top level query on a date and a filter on a date. This kind of thing + * is very common when visualizing logs and metrics. + */ + private Query filterMatchingBoth(Query lhs, Query rhs) { + if (lhs instanceof MatchAllDocsQuery) { + return rhs; + } + if (rhs instanceof MatchAllDocsQuery) { + return lhs; + } + Query unwrappedLhs = unwrap(lhs); + Query unwrappedRhs = unwrap(rhs); + if (unwrappedLhs instanceof PointRangeQuery && unwrappedRhs instanceof PointRangeQuery) { + Query merged = MergedPointRangeQuery.merge((PointRangeQuery) unwrappedLhs, (PointRangeQuery) unwrappedRhs); + if (merged != null) { + // Should we rewrap here? + return merged; + } + } + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(lhs, BooleanClause.Occur.MUST); + builder.add(rhs, BooleanClause.Occur.MUST); + return builder.build(); + } + + private Query unwrap(Query query) { + if (query instanceof IndexSortSortedNumericDocValuesRangeQuery) { + query = ((IndexSortSortedNumericDocValuesRangeQuery) query).getFallbackQuery(); + } + if (query instanceof IndexOrDocValuesQuery) { + query = ((IndexOrDocValuesQuery) query).getIndexQuery(); + } + return query; + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java index 83d49dd12d626..0594286bd77f3 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java @@ -19,11 +19,8 @@ package org.elasticsearch.search.aggregations.bucket.filter; -import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Weight; -import org.elasticsearch.search.aggregations.AggregationInitializationException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.AggregatorFactory; @@ -66,31 +63,18 @@ public FiltersAggregatorFactory(String name, List filters, boolean * necessary. This is done lazily so that the {@link Weight}s are only * created if the aggregation collects documents reducing the overhead of * the aggregation in the case where no documents are collected. - * - * Note that as aggregations are initialsed and executed in a serial manner, + *

+ * Note that as aggregations are initialized and executed in a serial manner, * no concurrency considerations are necessary here. */ - public Weight[] getWeights(SearchContext searchContext) { - if (weights == null) { - try { - IndexSearcher contextSearcher = searchContext.searcher(); - weights = new Weight[filters.length]; - for (int i = 0; i < filters.length; ++i) { - this.weights[i] = contextSearcher.createWeight(contextSearcher.rewrite(filters[i]), ScoreMode.COMPLETE_NO_SCORES, 1); - } - } catch (IOException e) { - throw new AggregationInitializationException("Failed to initialse filters for aggregation [" + name() + "]", e); - } - } - return weights; - } + @Override public Aggregator createInternal(SearchContext searchContext, Aggregator parent, CardinalityUpperBound cardinality, Map metadata) throws IOException { - return new FiltersAggregator(name, factories, keys, () -> getWeights(searchContext), keyed, + return FiltersAggregator.build(name, factories, keys, filters, keyed, otherBucket ? otherBucketKey : null, searchContext, parent, cardinality, metadata); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java new file mode 100644 index 0000000000000..4114727d91b36 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java @@ -0,0 +1,209 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket.filter; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BulkScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.PointRangeQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.ScorerSupplier; +import org.apache.lucene.search.Weight; + +import java.io.IOException; +import java.util.Objects; + +import static org.apache.lucene.util.FutureArrays.compareUnsigned; + +/** + * Query merging two point in range queries. + */ +public class MergedPointRangeQuery extends Query { + /** + * Merge two {@linkplain PointRangeQuery}s into a {@linkplain MergedPointRangeQuery} + * that matches points that match both filters. + */ + public static Query merge(PointRangeQuery lhs, PointRangeQuery rhs) { + if (lhs.equals(rhs)) { + // Lucky case! The queries were the same so their UNION is just the query itself. + return lhs; + } + if (lhs.getField() != rhs.getField() || lhs.getNumDims() != rhs.getNumDims() || lhs.getBytesPerDim() != rhs.getBytesPerDim()) { + return null; + } + return new MergedPointRangeQuery(lhs, rhs); + } + + private final String field; + private final Query delegateForMultiValuedSegments; + private final Query delegateForSingleValuedSegments; + + private MergedPointRangeQuery(PointRangeQuery lhs, PointRangeQuery rhs) { + field = lhs.getField(); + delegateForMultiValuedSegments = new BooleanQuery.Builder().add(lhs, Occur.MUST).add(rhs, Occur.MUST).build(); + int numDims = lhs.getNumDims(); + int bytesPerDim = lhs.getBytesPerDim(); + this.delegateForSingleValuedSegments = pickDelegateForSingleValuedSegments( + mergeBound(lhs.getLowerPoint(), rhs.getLowerPoint(), numDims, bytesPerDim, true), + mergeBound(lhs.getUpperPoint(), rhs.getUpperPoint(), numDims, bytesPerDim, false), + numDims, + bytesPerDim + ); + } + + private Query pickDelegateForSingleValuedSegments(byte[] lower, byte[] upper, int numDims, int bytesPerDim) { + // If we ended up with disjoint ranges in any dimension then on single valued segments we can't match any docs. + for (int dim = 0; dim < numDims; dim++) { + int offset = dim * bytesPerDim; + if (compareUnsigned(lower, offset, offset + bytesPerDim, upper, offset, offset + bytesPerDim) > 0) { + return new MatchNoDocsQuery("disjoint ranges"); + } + } + // Otherwise on single valued segments we can only match docs the match the UNION of the two ranges. + return new PointRangeQuery(field, lower, upper, numDims) { + @Override + protected String toString(int dimension, byte[] value) { + // Stolen from Lucene's Binary range query. It'd be best to delegate, but the method isn't visible. + StringBuilder sb = new StringBuilder(); + sb.append("("); + for (int i = 0; i < value.length; i++) { + if (i > 0) { + sb.append(' '); + } + sb.append(Integer.toHexString(value[i] & 0xFF)); + } + sb.append(')'); + return sb.toString(); + } + }; + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new ConstantScoreWeight(this, boost) { + Weight multiValuedSegmentWeight; + Weight singleValuedSegmentWeight; + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return true; + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); + if (scorerSupplier == null) { + return null; + } + return scorerSupplier.get(Long.MAX_VALUE); + } + + @Override + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + /* + * If we're sure docs only have a single value for the field + * we can pick the most compact bounds. If there are multiple values + * for the field we have to run the boolean query. + */ + PointValues points = context.reader().getPointValues(field); + if (points == null) { + return null; + } + if (points.size() == points.getDocCount()) { + // Each doc that has points has exactly one point. + return singleValuedSegmentWeight().scorerSupplier(context); + } + return multiValuedSegmentWeight().scorerSupplier(context); + } + + @Override + public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { + PointValues points = context.reader().getPointValues(field); + if (points == null) { + return null; + } + if (points.size() == points.getDocCount()) { + // Each doc that has points has exactly one point. + return singleValuedSegmentWeight().bulkScorer(context); + } + return multiValuedSegmentWeight().bulkScorer(context); + } + + private Weight singleValuedSegmentWeight() throws IOException { + if (singleValuedSegmentWeight == null) { + singleValuedSegmentWeight = delegateForSingleValuedSegments.createWeight(searcher, scoreMode, boost); + } + return singleValuedSegmentWeight; + } + + private Weight multiValuedSegmentWeight() throws IOException { + if (multiValuedSegmentWeight == null) { + multiValuedSegmentWeight = delegateForMultiValuedSegments.createWeight(searcher, scoreMode, boost); + } + return multiValuedSegmentWeight; + } + }; + } + + /** + * The query used when we have single valued segments. + */ + Query delegateForSingleValuedSegments() { + return delegateForSingleValuedSegments; + } + + @Override + public String toString(String field) { + return "MergedPointRange[" + delegateForMultiValuedSegments.toString(field) + "]"; + } + + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != getClass()) { + return false; + } + MergedPointRangeQuery other = (MergedPointRangeQuery) obj; + return delegateForMultiValuedSegments.equals(other.delegateForMultiValuedSegments) + && delegateForSingleValuedSegments.equals(other.delegateForSingleValuedSegments); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), delegateForMultiValuedSegments, delegateForSingleValuedSegments); + } + + private static byte[] mergeBound(byte[] lhs, byte[] rhs, int numDims, int bytesPerDim, boolean lower) { + byte[] merged = new byte[lhs.length]; + for (int dim = 0; dim < numDims; dim++) { + int offset = dim * bytesPerDim; + boolean cmp = compareUnsigned(lhs, offset, offset + bytesPerDim, rhs, offset, offset + bytesPerDim) <= 0; + byte[] from = (cmp ^ lower) ? lhs : rhs; + System.arraycopy(from, offset, merged, offset, bytesPerDim); + } + return merged; + } +} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 0ce24385f72b7..125ca3d2c50c6 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -22,35 +22,184 @@ import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.util.CollectionUtil; +import org.elasticsearch.common.CheckedFunction; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Rounding; +import org.elasticsearch.common.Rounding.DateTimeUnit; import org.elasticsearch.common.lease.Releasables; import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.aggregations.AdaptingAggregator; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.CardinalityUpperBound; import org.elasticsearch.search.aggregations.InternalAggregation; +import org.elasticsearch.search.aggregations.InternalAggregations; import org.elasticsearch.search.aggregations.LeafBucketCollector; import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; +import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator; +import org.elasticsearch.search.aggregations.bucket.range.InternalDateRange; +import org.elasticsearch.search.aggregations.bucket.range.RangeAggregationBuilder; +import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator; +import org.elasticsearch.search.aggregations.bucket.range.RangeAggregatorSupplier; import org.elasticsearch.search.aggregations.bucket.terms.LongKeyedBucketOrds; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.function.BiConsumer; /** - * An aggregator for date values. Every date is rounded down using a configured - * {@link Rounding}. - * - * @see Rounding + * Aggregator for {@code date_histogram} that rounds values using + * {@link Rounding}. See {@link FromDateRange} which also aggregates for + * {@code date_histogram} but does so by running a {@code range} aggregation + * over the date and transforming the results. In general + * {@link FromDateRange} is faster than {@link DateHistogramAggregator} + * but {@linkplain DateHistogramAggregator} works when we can't precalculate + * all of the {@link Rounding.Prepared#fixedRoundingPoints() fixed rounding points}. */ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAggregator { + /** + * Build an {@link Aggregator} for a {@code date_histogram} aggregation. + * If we can determine the bucket boundaries from + * {@link Rounding.Prepared#fixedRoundingPoints()} we use + * {@link RangeAggregator} to do the actual collecting, otherwise we use + * an specialized {@link DateHistogramAggregator Aggregator} specifically + * for the {@code date_histogram}s. We prefer to delegate to the + * {@linkplain RangeAggregator} because it can sometimes be further + * optimized into a {@link FiltersAggregator}. Even when it can't be + * optimized, it is going to be marginally faster and consume less memory + * than the {@linkplain DateHistogramAggregator} because it doesn't need + * to the round points and because it can pass precise cardinality + * estimates to its child aggregations. + */ + public static Aggregator build( + String name, + AggregatorFactories factories, + Rounding rounding, + BucketOrder order, + boolean keyed, + long minDocCount, + @Nullable LongBounds extendedBounds, + @Nullable LongBounds hardBounds, + ValuesSourceConfig valuesSourceConfig, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + Rounding.Prepared preparedRounding = valuesSourceConfig.roundingPreparer().apply(rounding); + Aggregator asRange = adaptIntoRangeOrNull( + name, + factories, + rounding, + preparedRounding, + order, + keyed, + minDocCount, + extendedBounds, + hardBounds, + valuesSourceConfig, + context, + parent, + cardinality, + metadata + ); + if (asRange != null) { + return asRange; + } + return new DateHistogramAggregator( + name, + factories, + rounding, + preparedRounding, + order, + keyed, + minDocCount, + extendedBounds, + hardBounds, + valuesSourceConfig, + context, + parent, + cardinality, + metadata + ); + } + + private static FromDateRange adaptIntoRangeOrNull( + String name, + AggregatorFactories factories, + Rounding rounding, + Rounding.Prepared preparedRounding, + BucketOrder order, + boolean keyed, + long minDocCount, + @Nullable LongBounds extendedBounds, + @Nullable LongBounds hardBounds, + ValuesSourceConfig valuesSourceConfig, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + if (hardBounds != null || extendedBounds != null) { + return null; + } + long[] fixedRoundingPoints = preparedRounding.fixedRoundingPoints(); + if (fixedRoundingPoints == null) { + return null; + } + // Range aggs use a double to aggregate and we don't want to lose precision. + long min = fixedRoundingPoints[0]; + long max = fixedRoundingPoints[fixedRoundingPoints.length - 1]; + if (min < -RangeAggregator.MAX_ACCURATE_BOUND || min > RangeAggregator.MAX_ACCURATE_BOUND) { + return null; + } + if (max < -RangeAggregator.MAX_ACCURATE_BOUND || max > RangeAggregator.MAX_ACCURATE_BOUND) { + return null; + } + RangeAggregatorSupplier rangeSupplier = context.getQueryShardContext() + .getValuesSourceRegistry() + .getAggregator(RangeAggregationBuilder.REGISTRY_KEY, valuesSourceConfig); + if (rangeSupplier == null) { + return null; + } + RangeAggregator.Range[] ranges = new RangeAggregator.Range[fixedRoundingPoints.length]; + for (int i = 0; i < fixedRoundingPoints.length - 1; i++) { + ranges[i] = new RangeAggregator.Range(null, (double) fixedRoundingPoints[i], (double) fixedRoundingPoints[i + 1]); + } + ranges[ranges.length - 1] = new RangeAggregator.Range(null, (double) fixedRoundingPoints[fixedRoundingPoints.length - 1], null); + return new DateHistogramAggregator.FromDateRange( + parent, + factories, + subAggregators -> rangeSupplier.build( + name, + subAggregators, + valuesSourceConfig, + InternalDateRange.FACTORY, + ranges, + false, + context, + parent, + cardinality, + metadata + ), + valuesSourceConfig.format(), + rounding, + preparedRounding, + order, + minDocCount, + extendedBounds, + keyed, + fixedRoundingPoints + ); + } private final ValuesSource.Numeric valuesSource; private final DocValueFormat formatter; @@ -72,6 +221,7 @@ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAg String name, AggregatorFactories factories, Rounding rounding, + Rounding.Prepared preparedRounding, BucketOrder order, boolean keyed, long minDocCount, @@ -86,7 +236,7 @@ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAg super(name, factories, aggregationContext, parent, CardinalityUpperBound.MANY, metadata); this.rounding = rounding; - this.preparedRounding = valuesSourceConfig.roundingPreparer().apply(rounding); + this.preparedRounding = preparedRounding; this.order = order; order.validate(this); this.keyed = keyed; @@ -153,8 +303,6 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I // the contract of the histogram aggregation is that shards must return buckets ordered by key in ascending order CollectionUtil.introSort(buckets, BucketOrder.key(true).comparator()); - // value source will be null for unmapped fields - // Important: use `rounding` here, not `shardRounding` InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0 ? new InternalDateHistogram.EmptyBucketInfo(rounding.withoutOffset(), buildEmptySubAggregations(), extendedBounds) : null; @@ -195,4 +343,93 @@ public double bucketSize(long bucket, Rounding.DateTimeUnit unitSize) { return 1.0; } } + + static class FromDateRange extends AdaptingAggregator implements SizedBucketAggregator { + private final DocValueFormat format; + private final Rounding rounding; + private final Rounding.Prepared preparedRounding; + private final BucketOrder order; + private final long minDocCount; + private final LongBounds extendedBounds; + private final boolean keyed; + private final long[] fixedRoundingPoints; + + FromDateRange( + Aggregator parent, + AggregatorFactories subAggregators, + CheckedFunction delegate, + DocValueFormat format, + Rounding rounding, + Rounding.Prepared preparedRounding, + BucketOrder order, + long minDocCount, + LongBounds extendedBounds, + boolean keyed, + long[] fixedRoundingPoints + ) throws IOException { + super(parent, subAggregators, delegate); + this.format = format; + this.rounding = rounding; + this.preparedRounding = preparedRounding; + this.order = order; + order.validate(this); + this.minDocCount = minDocCount; + this.extendedBounds = extendedBounds; + this.keyed = keyed; + this.fixedRoundingPoints = fixedRoundingPoints; + } + + @Override + protected InternalAggregation adapt(InternalAggregation delegateResult) { + InternalDateRange range = (InternalDateRange) delegateResult; + List buckets = new ArrayList<>(range.getBuckets().size()); + for (InternalDateRange.Bucket rangeBucket : range.getBuckets()) { + if (rangeBucket.getDocCount() > 0) { + buckets.add( + new InternalDateHistogram.Bucket( + rangeBucket.getFrom().toInstant().toEpochMilli(), + rangeBucket.getDocCount(), + keyed, + format, + rangeBucket.getAggregations() + ) + ); + } + } + CollectionUtil.introSort(buckets, BucketOrder.key(true).comparator()); + + InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0 + ? new InternalDateHistogram.EmptyBucketInfo(rounding.withoutOffset(), buildEmptySubAggregations(), extendedBounds) + : null; + return new InternalDateHistogram( + range.getName(), + buckets, + order, + minDocCount, + rounding.offset(), + emptyBucketInfo, + format, + keyed, + range.getMetadata() + ); + } + + public final InternalAggregations buildEmptySubAggregations() { + List aggs = new ArrayList<>(); + for (Aggregator aggregator : subAggregators()) { + aggs.add(aggregator.buildEmptyAggregation()); + } + return InternalAggregations.from(aggs); + } + + @Override + public double bucketSize(long bucket, DateTimeUnit unitSize) { + if (unitSize != null) { + long startPoint = bucket < fixedRoundingPoints.length ? fixedRoundingPoints[(int) bucket] : Long.MIN_VALUE; + return preparedRounding.roundingSize(startPoint, unitSize); + } else { + return 1.0; + } + } + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java index 2ace765317f52..36d0dd3a6334b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java @@ -42,7 +42,7 @@ public static void registerAggregators(ValuesSourceRegistry.Builder builder) { builder.register( DateHistogramAggregationBuilder.REGISTRY_KEY, List.of(CoreValuesSourceType.DATE, CoreValuesSourceType.NUMERIC, CoreValuesSourceType.BOOLEAN), - DateHistogramAggregator::new, + DateHistogramAggregator::build, true); builder.register(DateHistogramAggregationBuilder.REGISTRY_KEY, CoreValuesSourceType.RANGE, DateRangeHistogramAggregator::new, true); @@ -111,7 +111,7 @@ protected Aggregator doCreateInternal( protected Aggregator createUnmapped(SearchContext searchContext, Aggregator parent, Map metadata) throws IOException { - return new DateHistogramAggregator(name, factories, rounding, order, keyed, minDocCount, extendedBounds, hardBounds, + return new DateHistogramAggregator(name, factories, rounding, null, order, keyed, minDocCount, extendedBounds, hardBounds, config, searchContext, parent, CardinalityUpperBound.NONE, metadata); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/AbstractRangeAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/AbstractRangeAggregatorFactory.java index 16f6ecba9c0bc..6087346295744 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/AbstractRangeAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/AbstractRangeAggregatorFactory.java @@ -27,7 +27,6 @@ import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator.Unmapped; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry; @@ -50,7 +49,7 @@ public static void registerAggregators( builder.register( registryKey, org.elasticsearch.common.collect.List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN), - RangeAggregator::new, + RangeAggregator::build, true); } @@ -91,8 +90,7 @@ protected Aggregator doCreateInternal( .build( name, factories, - (Numeric) config.getValuesSource(), - config.format(), + config, rangeFactory, ranges, keyed, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/GeoDistanceRangeAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/GeoDistanceRangeAggregatorFactory.java index 790ae59c66b31..8e37af016987e 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/GeoDistanceRangeAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/GeoDistanceRangeAggregatorFactory.java @@ -66,7 +66,7 @@ public static void registerAggregators(ValuesSourceRegistry.Builder builder) { cardinality, metadata) -> { DistanceSource distanceSource = new DistanceSource((ValuesSource.GeoPoint) valuesSource, distanceType, origin, units); - return new RangeAggregator( + return RangeAggregator.buildWithoutAttemptedToAdaptToFilters( name, factories, distanceSource, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalDateRange.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalDateRange.java index 2c937ab104c54..fd68e3e1ac4b5 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalDateRange.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalDateRange.java @@ -27,6 +27,7 @@ import java.io.IOException; import java.time.Instant; import java.time.ZoneOffset; +import java.time.ZonedDateTime; import java.util.List; import java.util.Map; @@ -46,13 +47,13 @@ public Bucket(String key, double from, double to, long docCount, InternalAggrega } @Override - public Object getFrom() { + public ZonedDateTime getFrom() { return Double.isInfinite(((Number) from).doubleValue()) ? null : Instant.ofEpochMilli(((Number) from).longValue()).atZone(ZoneOffset.UTC); } @Override - public Object getTo() { + public ZonedDateTime getTo() { return Double.isInfinite(((Number) to).doubleValue()) ? null : Instant.ofEpochMilli(((Number) to).longValue()).atZone(ZoneOffset.UTC); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalRange.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalRange.java index a8c61c0ad368b..af166a01b0ece 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalRange.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalRange.java @@ -23,7 +23,6 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.search.DocValueFormat; -import org.elasticsearch.search.aggregations.Aggregations; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.InternalAggregations; import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation; @@ -114,7 +113,7 @@ public long getDocCount() { } @Override - public Aggregations getAggregations() { + public InternalAggregations getAggregations() { return aggregations; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 82005aaaf40d4..d28107318e788 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -19,8 +19,11 @@ package org.elasticsearch.search.aggregations.bucket.range; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; +import org.elasticsearch.common.CheckedFunction; import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.geo.ShapeRelation; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; @@ -31,7 +34,10 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; +import org.elasticsearch.index.mapper.DateFieldMapper.DateFieldType; +import org.elasticsearch.index.mapper.DateFieldMapper.Resolution; import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.aggregations.AdaptingAggregator; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.CardinalityUpperBound; @@ -41,7 +47,12 @@ import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; import org.elasticsearch.search.aggregations.NonCollectingAggregator; import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; +import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator; +import org.elasticsearch.search.aggregations.bucket.filter.InternalFilters; +import org.elasticsearch.search.aggregations.bucket.range.InternalRange.Factory; import org.elasticsearch.search.aggregations.support.ValuesSource; +import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; +import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -52,7 +63,21 @@ import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; -public class RangeAggregator extends BucketsAggregator { +/** + * Aggregator for {@code range}. There are two known subclasses, + * {@link NoOverlap} which is fast but only compatible with ranges that + * don't have overlaps and {@link Overlap} which handles overlapping + * ranges. There is also {@link FromFilters} which isn't a subclass + * but is also a functional aggregator for {@code range}. + * {@link RangeAggregator#build} will build the fastest of the three + * that is compatible with the requested configuration. + */ +public abstract class RangeAggregator extends BucketsAggregator { + /** + * The maximum {@code long} that can accurately fit into the + * {@code double} precision floating point bounds. + */ + public static final long MAX_ACCURATE_BOUND = 1L << 53; public static final ParseField RANGES_FIELD = new ParseField("ranges"); public static final ParseField KEYED_FIELD = new ParseField("keyed"); @@ -215,15 +240,198 @@ public boolean equals(Object obj) { } } - final ValuesSource.Numeric valuesSource; - final DocValueFormat format; - final Range[] ranges; - final boolean keyed; - final InternalRange.Factory rangeFactory; + /** + * Build an {@link Aggregator} for a {@code range} aggregation. If the + * {@code ranges} can be converted into filters then it builds a + * {@link FiltersAggregator} and uses that to collect the results + * if that aggregator can run in "filter by filter" + * collection mode. If it can't then we'll collect the ranges using + * a native {@link RangeAggregator} which is significantly faster + * than the "compatible" collection mechanism for the filters agg. + */ + public static Aggregator build( + String name, + AggregatorFactories factories, + ValuesSourceConfig valuesSourceConfig, + InternalRange.Factory rangeFactory, + Range[] ranges, + boolean keyed, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + Aggregator adapted = adaptIntoFiltersOrNull( + name, + factories, + valuesSourceConfig, + rangeFactory, + ranges, + keyed, + context, + parent, + cardinality, + metadata + ); + if (adapted != null) { + return adapted; + } + return buildWithoutAttemptedToAdaptToFilters( + name, + factories, + (ValuesSource.Numeric) valuesSourceConfig.getValuesSource(), + valuesSourceConfig.format(), + rangeFactory, + ranges, + keyed, + context, + parent, + cardinality, + metadata + ); + } + + public static Aggregator adaptIntoFiltersOrNull( + String name, + AggregatorFactories factories, + ValuesSourceConfig valuesSourceConfig, + InternalRange.Factory rangeFactory, + Range[] ranges, + boolean keyed, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + if (false == valuesSourceConfig.alignesWithSearchIndex()) { + return null; + } + // TODO bail here for runtime fields. They'll be slower this way. Maybe we can somehow look at the Query? + if (valuesSourceConfig.fieldType() instanceof DateFieldType + && ((DateFieldType) valuesSourceConfig.fieldType()).resolution() == Resolution.NANOSECONDS) { + // We don't generate sensible Queries for nanoseconds. + return null; + } + boolean wholeNumbersOnly = false == ((ValuesSource.Numeric) valuesSourceConfig.getValuesSource()).isFloatingPoint(); + String[] keys = new String[ranges.length]; + Query[] filters = new Query[ranges.length]; + for (int i = 0; i < ranges.length; i++) { + /* + * If the bounds on the ranges are too high then the `double`s + * that we work with will round differently in the native range + * aggregator than in the filters aggregator. So we can't use + * the filters. That is, if the input data type is a `long` in + * the first place. If it isn't then + */ + if (wholeNumbersOnly && ranges[i].from != Double.NEGATIVE_INFINITY && Math.abs(ranges[i].from) > MAX_ACCURATE_BOUND) { + return null; + } + if (wholeNumbersOnly && ranges[i].to != Double.POSITIVE_INFINITY && Math.abs(ranges[i].to) > MAX_ACCURATE_BOUND) { + return null; + } + keys[i] = Integer.toString(i); + /* + * Use the native format on the field rather than the one provided + * on the valuesSourceConfig because the format on the field is what + * we parse. With https://github.com/elastic/elasticsearch/pull/63692 + * we can just cast to a long here and it'll be taken as millis. + */ + DocValueFormat format = valuesSourceConfig.fieldType().docValueFormat(null, null); + // TODO correct the loss of precision from the range somehow.....? + filters[i] = valuesSourceConfig.fieldType() + .rangeQuery( + ranges[i].from == Double.NEGATIVE_INFINITY ? null : format.format(ranges[i].from), + ranges[i].to == Double.POSITIVE_INFINITY ? null : format.format(ranges[i].to), + true, + false, + ShapeRelation.CONTAINS, + null, + null, + context.getQueryShardContext() + ); + } + FiltersAggregator delegate = FiltersAggregator.buildFilterOrderOrNull( + name, + factories, + keys, + filters, + false, + null, + context, + parent, + cardinality, + metadata + ); + if (delegate == null) { + return null; + } + RangeAggregator.FromFilters fromFilters = new RangeAggregator.FromFilters<>( + parent, + factories, + subAggregators -> { + if (subAggregators.countAggregators() > 0) { + throw new IllegalStateException("didn't expect to have a delegate if there are child aggs"); + } + return delegate; + }, + valuesSourceConfig.format(), + ranges, + keyed, + rangeFactory + ); + return fromFilters; + } + + public static Aggregator buildWithoutAttemptedToAdaptToFilters( + String name, + AggregatorFactories factories, + ValuesSource.Numeric valuesSource, + DocValueFormat format, + InternalRange.Factory rangeFactory, + Range[] ranges, + boolean keyed, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + if (hasOverlap(ranges)) { + return new RangeAggregator.Overlap( + name, + factories, + valuesSource, + format, + rangeFactory, + ranges, + keyed, + context, + parent, + cardinality, + metadata + ); + } + return new RangeAggregator.NoOverlap( + name, + factories, + valuesSource, + format, + rangeFactory, + ranges, + keyed, + context, + parent, + cardinality, + metadata + ); + } - final double[] maxTo; + private final ValuesSource.Numeric valuesSource; + private final DocValueFormat format; + protected final Range[] ranges; + private final boolean keyed; + private final InternalRange.Factory rangeFactory; - public RangeAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, DocValueFormat format, + private RangeAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, DocValueFormat format, InternalRange.Factory rangeFactory, Range[] ranges, boolean keyed, SearchContext context, Aggregator parent, CardinalityUpperBound cardinality, Map metadata) throws IOException { @@ -233,15 +441,7 @@ public RangeAggregator(String name, AggregatorFactories factories, ValuesSource. this.format = format; this.keyed = keyed; this.rangeFactory = rangeFactory; - this.ranges = ranges; - - maxTo = new double[this.ranges.length]; - maxTo[0] = this.ranges[0].to; - for (int i = 1; i < this.ranges.length; ++i) { - maxTo[i] = Math.max(this.ranges[i].to,maxTo[i-1]); - } - } @Override @@ -253,8 +453,7 @@ public ScoreMode scoreMode() { } @Override - public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, - final LeafBucketCollector sub) throws IOException { + public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); return new LeafBucketCollectorBase(sub, values) { @Override @@ -263,63 +462,14 @@ public void collect(int doc, long bucket) throws IOException { final int valuesCount = values.docValueCount(); for (int i = 0, lo = 0; i < valuesCount; ++i) { final double value = values.nextValue(); - lo = collect(doc, value, bucket, lo); + lo = RangeAggregator.this.collect(sub, doc, value, bucket, lo); } } } - - private int collect(int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException { - int lo = lowBound, hi = ranges.length - 1; // all candidates are between these indexes - int mid = (lo + hi) >>> 1; - while (lo <= hi) { - if (value < ranges[mid].from) { - hi = mid - 1; - } else if (value >= maxTo[mid]) { - lo = mid + 1; - } else { - break; - } - mid = (lo + hi) >>> 1; - } - if (lo > hi) return lo; // no potential candidate - - // binary search the lower bound - int startLo = lo, startHi = mid; - while (startLo <= startHi) { - final int startMid = (startLo + startHi) >>> 1; - if (value >= maxTo[startMid]) { - startLo = startMid + 1; - } else { - startHi = startMid - 1; - } - } - - // binary search the upper bound - int endLo = mid, endHi = hi; - while (endLo <= endHi) { - final int endMid = (endLo + endHi) >>> 1; - if (value < ranges[endMid].from) { - endHi = endMid - 1; - } else { - endLo = endMid + 1; - } - } - - assert startLo == lowBound || value >= maxTo[startLo - 1]; - assert endHi == ranges.length - 1 || value < ranges[endHi + 1].from; - - for (int i = startLo; i <= endHi; ++i) { - if (ranges[i].matches(value)) { - collectBucket(sub, doc, subBucketOrdinal(owningBucketOrdinal, i)); - } - } - - return endHi + 1; - } }; } - private long subBucketOrdinal(long owningBucketOrdinal, int rangeOrd) { + protected long subBucketOrdinal(long owningBucketOrdinal, int rangeOrd) { return owningBucketOrdinal * ranges.length + rangeOrd; } @@ -383,4 +533,179 @@ public InternalAggregation buildEmptyAggregation() { } } + protected abstract int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound) + throws IOException; + + private static class NoOverlap extends RangeAggregator { + NoOverlap( + String name, + AggregatorFactories factories, + Numeric valuesSource, + DocValueFormat format, + Factory rangeFactory, + Range[] ranges, + boolean keyed, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + super(name, factories, valuesSource, format, rangeFactory, ranges, keyed, context, parent, cardinality, metadata); + } + + @Override + protected int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException { + int lo = lowBound, hi = ranges.length - 1; + while (lo <= hi) { + final int mid = (lo + hi) >>> 1; + if (value < ranges[mid].from) { + hi = mid - 1; + } else if (value >= ranges[mid].to) { + lo = mid + 1; + } else { + collectBucket(sub, doc, subBucketOrdinal(owningBucketOrdinal, mid)); + // The next value must fall in the next bucket to be collected. + return mid + 1; + } + } + return lo; + } + } + + private static class Overlap extends RangeAggregator { + Overlap( + String name, + AggregatorFactories factories, + Numeric valuesSource, + DocValueFormat format, + Factory rangeFactory, + Range[] ranges, + boolean keyed, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + super(name, factories, valuesSource, format, rangeFactory, ranges, keyed, context, parent, cardinality, metadata); + maxTo = new double[ranges.length]; + maxTo[0] = ranges[0].to; + for (int i = 1; i < ranges.length; ++i) { + maxTo[i] = Math.max(ranges[i].to, maxTo[i - 1]); + } + } + + private final double[] maxTo; + + @Override + protected int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException { + int lo = lowBound, hi = ranges.length - 1; // all candidates are between these indexes + int mid = (lo + hi) >>> 1; + while (lo <= hi) { + if (value < ranges[mid].from) { + hi = mid - 1; + } else if (value >= maxTo[mid]) { + lo = mid + 1; + } else { + break; + } + mid = (lo + hi) >>> 1; + } + if (lo > hi) return lo; // no potential candidate + + // binary search the lower bound + int startLo = lo, startHi = mid; + while (startLo <= startHi) { + final int startMid = (startLo + startHi) >>> 1; + if (value >= maxTo[startMid]) { + startLo = startMid + 1; + } else { + startHi = startMid - 1; + } + } + + // binary search the upper bound + int endLo = mid, endHi = hi; + while (endLo <= endHi) { + final int endMid = (endLo + endHi) >>> 1; + if (value < ranges[endMid].from) { + endHi = endMid - 1; + } else { + endLo = endMid + 1; + } + } + + assert startLo == lowBound || value >= maxTo[startLo - 1]; + assert endHi == ranges.length - 1 || value < ranges[endHi + 1].from; + + for (int i = startLo; i <= endHi; ++i) { + if (ranges[i].matches(value)) { + collectBucket(sub, doc, subBucketOrdinal(owningBucketOrdinal, i)); + } + } + + // The next value must fall in the next bucket to be collected. + return endHi + 1; + } + } + + private static class FromFilters extends AdaptingAggregator { + private final DocValueFormat format; + private final Range[] ranges; + private final boolean keyed; + private final InternalRange.Factory rangeFactory; + + FromFilters( + Aggregator parent, + AggregatorFactories subAggregators, + CheckedFunction delegate, + DocValueFormat format, + Range[] ranges, + boolean keyed, + InternalRange.Factory rangeFactory + ) throws IOException { + super(parent, subAggregators, delegate); + this.format = format; + this.ranges = ranges; + this.keyed = keyed; + this.rangeFactory = rangeFactory; + } + + @Override + protected InternalAggregation adapt(InternalAggregation delegateResult) { + InternalFilters filters = (InternalFilters) delegateResult; + if (filters.getBuckets().size() != ranges.length) { + throw new IllegalStateException( + "bad number of filters [" + filters.getBuckets().size() + "] expecting [" + ranges.length + "]" + ); + } + List buckets = new ArrayList<>(filters.getBuckets().size()); + for (int i = 0; i < ranges.length; i++) { + Range r = ranges[i]; + InternalFilters.InternalBucket b = filters.getBuckets().get(i); + buckets.add( + rangeFactory.createBucket( + r.getKey(), + r.getFrom(), + r.getTo(), + b.getDocCount(), + (InternalAggregations) b.getAggregations(), + keyed, + format + ) + ); + } + return rangeFactory.create(name(), buckets, format, keyed, filters.getMetadata()); + } + } + + private static boolean hasOverlap(Range[] ranges) { + double lastEnd = ranges[0].to; + for (int i = 1; i < ranges.length; ++i) { + if (ranges[i].from < lastEnd) { + return true; + } + lastEnd = ranges[i].to; + } + return false; + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorSupplier.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorSupplier.java index 4bbfd3050106d..2e6714ee83b3e 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorSupplier.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorSupplier.java @@ -18,11 +18,10 @@ */ package org.elasticsearch.search.aggregations.bucket.range; -import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.CardinalityUpperBound; -import org.elasticsearch.search.aggregations.support.ValuesSource; +import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -31,9 +30,8 @@ public interface RangeAggregatorSupplier { Aggregator build(String name, AggregatorFactories factories, - ValuesSource.Numeric valuesSource, - DocValueFormat format, - InternalRange.Factory rangeFactory, + ValuesSourceConfig valuesSourceConfig, + InternalRange.Factory rangeFactory, RangeAggregator.Range[] ranges, boolean keyed, SearchContext context, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java index 5afcd394d645a..bb4386360fc20 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java @@ -385,11 +385,17 @@ public boolean hasGlobalOrdinals() { */ @Nullable public Function getPointReaderOrNull() { - MappedFieldType fieldType = fieldType(); - if (fieldType != null && script() == null && missing() == null) { - return fieldType.pointReaderIfPossible(); - } - return null; + return alignesWithSearchIndex() ? fieldType().pointReaderIfPossible() : null; + } + + /** + * Do {@link ValuesSource}s built by this config line up with the search + * index of the underlying field? This'll only return true if the fields + * is searchable and there aren't missing values or a script to confuse + * the ordering. + */ + public boolean alignesWithSearchIndex() { + return script() == null && missing() == null && fieldType() != null && fieldType().isSearchable(); } /** diff --git a/server/src/main/java/org/elasticsearch/search/profile/aggregation/InternalAggregationProfileTree.java b/server/src/main/java/org/elasticsearch/search/profile/aggregation/InternalAggregationProfileTree.java index 44d47ef12245b..5f435c7bc3990 100644 --- a/server/src/main/java/org/elasticsearch/search/profile/aggregation/InternalAggregationProfileTree.java +++ b/server/src/main/java/org/elasticsearch/search/profile/aggregation/InternalAggregationProfileTree.java @@ -31,17 +31,7 @@ protected AggregationProfileBreakdown createProfileBreakdown() { @Override protected String getTypeFromElement(Aggregator element) { - - // Anonymous classes (such as NonCollectingAggregator in TermsAgg) won't have a name, - // we need to get the super class - if (element.getClass().getSimpleName().isEmpty()) { - return element.getClass().getSuperclass().getSimpleName(); - } - Class enclosing = element.getClass().getEnclosingClass(); - if (enclosing != null) { - return enclosing.getSimpleName() + "." + element.getClass().getSimpleName(); - } - return element.getClass().getSimpleName(); + return typeFromAggregator(element); } @Override @@ -49,4 +39,16 @@ protected String getDescriptionFromElement(Aggregator element) { return element.name(); } + public static String typeFromAggregator(Aggregator aggregator) { + // Anonymous classes (such as NonCollectingAggregator in TermsAgg) won't have a name, + // we need to get the super class + if (aggregator.getClass().getSimpleName().isEmpty()) { + return aggregator.getClass().getSuperclass().getSimpleName(); + } + Class enclosing = aggregator.getClass().getEnclosingClass(); + if (enclosing != null) { + return enclosing.getSimpleName() + "." + aggregator.getClass().getSimpleName(); + } + return aggregator.getClass().getSimpleName(); + } } diff --git a/server/src/main/java/org/elasticsearch/search/profile/aggregation/ProfilingAggregator.java b/server/src/main/java/org/elasticsearch/search/profile/aggregation/ProfilingAggregator.java index 83023325ac95f..9c88d387c5628 100644 --- a/server/src/main/java/org/elasticsearch/search/profile/aggregation/ProfilingAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/profile/aggregation/ProfilingAggregator.java @@ -123,6 +123,11 @@ public String toString() { return delegate.toString(); } + @Override + public Aggregator[] subAggregators() { + return delegate.subAggregators(); + } + public static Aggregator unwrap(Aggregator agg) { if (agg instanceof ProfilingAggregator) { return ((ProfilingAggregator) agg).delegate; diff --git a/server/src/test/java/org/elasticsearch/common/RoundingTests.java b/server/src/test/java/org/elasticsearch/common/RoundingTests.java index fa94cacbbe77c..5e7392caf9fdc 100644 --- a/server/src/test/java/org/elasticsearch/common/RoundingTests.java +++ b/server/src/test/java/org/elasticsearch/common/RoundingTests.java @@ -39,9 +39,11 @@ import java.time.zone.ZoneOffsetTransitionRule; import java.time.zone.ZoneRules; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.concurrent.TimeUnit; +import static java.util.stream.Collectors.toList; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @@ -1017,6 +1019,35 @@ public void testNonMillisecondsBasedUnitCalendarRoundingSize() { assertThat(prepared.roundingSize(thirdQuarter, Rounding.DateTimeUnit.HOUR_OF_DAY), closeTo(2208.0, 0.000001)); } + public void testFixedRoundingPoints() { + Rounding rounding = Rounding.builder(Rounding.DateTimeUnit.QUARTER_OF_YEAR).build(); + assertFixedRoundingPoints( + rounding.prepare(time("2020-01-01T00:00:00"), time("2021-01-01T00:00:00")), + "2020-01-01T00:00:00", + "2020-04-01T00:00:00", + "2020-07-01T00:00:00", + "2020-10-01T00:00:00", + "2021-01-01T00:00:00" + ); + rounding = Rounding.builder(Rounding.DateTimeUnit.DAY_OF_MONTH).build(); + assertFixedRoundingPoints( + rounding.prepare(time("2020-01-01T00:00:00"), time("2020-01-06T00:00:00")), + "2020-01-01T00:00:00", + "2020-01-02T00:00:00", + "2020-01-03T00:00:00", + "2020-01-04T00:00:00", + "2020-01-05T00:00:00", + "2020-01-06T00:00:00" + ); + } + + private void assertFixedRoundingPoints(Rounding.Prepared prepared, String... expected) { + assertThat( + Arrays.stream(prepared.fixedRoundingPoints()).mapToObj(Instant::ofEpochMilli).collect(toList()), + equalTo(Arrays.stream(expected).map(RoundingTests::time).map(Instant::ofEpochMilli).collect(toList())) + ); + } + private void assertInterval(long rounded, long nextRoundingValue, Rounding rounding, int minutes, ZoneId tz) { assertInterval(rounded, dateBetween(rounded, nextRoundingValue), nextRoundingValue, rounding, tz); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java new file mode 100644 index 0000000000000..ac383934e8294 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java @@ -0,0 +1,143 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations; + +import org.apache.lucene.index.LeafReaderContext; +import org.elasticsearch.common.CheckedFunction; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.MapperServiceTestCase; +import org.elasticsearch.search.aggregations.bucket.histogram.SizedBucketAggregator; +import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder; +import org.elasticsearch.search.aggregations.support.AggregationContext; +import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.Map; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.sameInstance; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class AdaptingAggregatorTests extends MapperServiceTestCase { + /** + * Its important that sub-aggregations of the {@linkplain AdaptingAggregator} + * receive a reference to the {@linkplain AdaptingAggregator} as the parent. + * Without it we can't do things like implement {@link SizedBucketAggregator}. + */ + public void testParent() throws IOException { + MapperService mapperService = createMapperService(mapping(b -> {})); + ValuesSourceRegistry.Builder registry = new ValuesSourceRegistry.Builder(); + MaxAggregationBuilder.registerAggregators(registry); + withAggregationContext(registry.build(), mapperService, org.elasticsearch.common.collect.List.of(), null, context -> { + SearchContext searchContext = mock(SearchContext.class); + when(searchContext.bigArrays()).thenReturn(context.bigArrays()); + AggregatorFactories.Builder sub = AggregatorFactories.builder(); + sub.addAggregator(new MaxAggregationBuilder("test").field("foo")); + AggregatorFactory factory = new DummyAdaptingAggregatorFactory("test", context, null, sub, null); + Aggregator adapting = factory.create(searchContext, null, CardinalityUpperBound.ONE); + assertThat(adapting.subAggregators()[0].parent(), sameInstance(adapting)); + }); + } + + public void testBuildCallsAdapt() throws IOException { + MapperService mapperService = createMapperService(mapping(b -> {})); + withAggregationContext(mapperService, org.elasticsearch.common.collect.List.of(), context -> { + SearchContext searchContext = mock(SearchContext.class); + when(searchContext.bigArrays()).thenReturn(context.bigArrays()); + AggregatorFactory factory = new DummyAdaptingAggregatorFactory("test", context, null, AggregatorFactories.builder(), null); + Aggregator adapting = factory.create(searchContext, null, CardinalityUpperBound.ONE); + assertThat(adapting.buildEmptyAggregation().getMetadata(), equalTo(org.elasticsearch.common.collect.Map.of("dog", "woof"))); + assertThat(adapting.buildTopLevel().getMetadata(), equalTo(org.elasticsearch.common.collect.Map.of("dog", "woof"))); + }); + } + + private static class DummyAdaptingAggregatorFactory extends AggregatorFactory { + DummyAdaptingAggregatorFactory( + String name, + AggregationContext context, + AggregatorFactory parent, + AggregatorFactories.Builder subFactoriesBuilder, + Map metadata + ) throws IOException { + super(name, context, parent, subFactoriesBuilder, metadata); + } + + @Override + protected Aggregator createInternal( + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + return new DummyAdaptingAggregator( + parent, + factories, + subAggs -> new DummyAggregator(name, subAggs, context, parent, CardinalityUpperBound.ONE, metadata) + ); + } + } + + private static class DummyAdaptingAggregator extends AdaptingAggregator { + DummyAdaptingAggregator( + Aggregator parent, + AggregatorFactories subAggregators, + CheckedFunction delegate + ) throws IOException { + super(parent, subAggregators, delegate); + } + + @Override + protected InternalAggregation adapt(InternalAggregation delegateResult) { + InternalAggregation result = mock(InternalAggregation.class); + when(result.getMetadata()).thenReturn(org.elasticsearch.common.collect.Map.of("dog", "woof")); + return result; + } + } + + private static class DummyAggregator extends AggregatorBase { + protected DummyAggregator( + String name, + AggregatorFactories factories, + SearchContext context, + Aggregator parent, + CardinalityUpperBound subAggregatorCardinality, + Map metadata + ) throws IOException { + super(name, factories, context, parent, subAggregatorCardinality, metadata); + } + + @Override + protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { + return null; + } + + @Override + public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + return new InternalAggregation[] {null}; + } + + @Override + public InternalAggregation buildEmptyAggregation() { + return null; + } + } +} diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java index 617eec9799a4d..0de98043af90b 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java @@ -20,24 +20,35 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; +import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.index.mapper.DateFieldMapper.Resolution; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.query.RangeQueryBuilder; +import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator.KeyedFilter; import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; import org.junit.Before; +import java.io.IOException; import java.util.HashSet; import java.util.List; import java.util.Set; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; + public class FiltersAggregatorTests extends AggregatorTestCase { private MappedFieldType fieldType; @@ -139,7 +150,7 @@ public void testRandom() throws Exception { // make sure we have more than one segment to test the merge indexWriter.commit(); } - int value = randomInt(maxTerm-1); + int value = randomInt(maxTerm - 1); expectedBucketCount[value] += 1; document.add(new Field("field", Integer.toString(value), KeywordFieldMapper.Defaults.FIELD_TYPE)); indexWriter.addDocument(document); @@ -188,4 +199,33 @@ public void testRandom() throws Exception { directory.close(); } } + + public void testMergePointRangeQueries() throws IOException { + MappedFieldType ft = new DateFieldMapper.DateFieldType("test", Resolution.MILLISECONDS); + AggregationBuilder builder = new FiltersAggregationBuilder( + "test", + new KeyedFilter("q1", new RangeQueryBuilder("test").from("2020-01-01").to("2020-03-01").includeUpper(false)) + ); + Query query = LongPoint.newRangeQuery( + "test", + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-01"), + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-02-01") + ); + testCase(builder, query, iw -> { + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2010-01-02")) + ) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-02")) + ) + ); + }, result -> { + InternalFilters filters = (InternalFilters) result; + assertThat(filters.getBuckets(), hasSize(1)); + assertThat(filters.getBucketByKey("q1").getDocCount(), equalTo(1L)); + }, ft); + } } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQueryTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQueryTests.java new file mode 100644 index 0000000000000..7a329ce447e26 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQueryTests.java @@ -0,0 +1,249 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket.filter; + +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.PointRangeQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.EqualsHashCodeTestUtils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.nullValue; + +public class MergedPointRangeQueryTests extends ESTestCase { + public void testDifferentField() { + assertThat(merge(LongPoint.newExactQuery("a", 0), LongPoint.newExactQuery("b", 0)), nullValue()); + } + + public void testDifferentDimensionCount() { + assertThat( + merge(LongPoint.newExactQuery("a", 0), LongPoint.newRangeQuery("a", new long[] { 1, 2 }, new long[] { 1, 2 })), + nullValue() + ); + } + + public void testDifferentDimensionSize() { + assertThat(merge(LongPoint.newExactQuery("a", 0), IntPoint.newExactQuery("a", 0)), nullValue()); + } + + public void testSame() { + Query lhs = LongPoint.newRangeQuery("a", 0, 100); + assertThat(merge(lhs, LongPoint.newRangeQuery("a", 0, 100)), equalTo(lhs)); + } + + public void testOverlap() throws IOException { + MergedPointRangeQuery overlapping = mergeToMergedQuery( + LongPoint.newRangeQuery("a", -100, 100), + LongPoint.newRangeQuery("a", 0, 100) + ); + assertDelegateForSingleValuedSegmentsEqualPointRange(overlapping, LongPoint.newRangeQuery("a", 0, 100)); + assertFalse(matches1d(overlapping, -50)); // Point not in range + assertTrue(matches1d(overlapping, 50)); // Point in range + assertTrue(matches1d(overlapping, -50, 10)); // Both points in range matches the doc + assertTrue(matches1d(overlapping, -200, 50)); // One point in range matches + assertFalse(matches1d(overlapping, -50, 200)); // No points in range doesn't match + } + + public void testNonOverlap() throws IOException { + MergedPointRangeQuery disjoint = mergeToMergedQuery(LongPoint.newRangeQuery("a", -100, -10), LongPoint.newRangeQuery("a", 10, 100)); + assertThat(disjoint.delegateForSingleValuedSegments(), instanceOf(MatchNoDocsQuery.class)); + assertFalse(matches1d(disjoint, randomLong())); // No single point can match + assertFalse(matches1d(disjoint, -50, -20)); // Both points in lower + assertFalse(matches1d(disjoint, 20, 50)); // Both points in upper + assertTrue(matches1d(disjoint, -50, 50)); // One in lower, one in upper + assertFalse(matches1d(disjoint, -50, 200)); // No point in lower + assertFalse(matches1d(disjoint, -200, 50)); // No point in upper + } + + public void test2dSimpleOverlap() throws IOException { + MergedPointRangeQuery overlapping = mergeToMergedQuery( + LongPoint.newRangeQuery("a", new long[] { -100, -100 }, new long[] { 100, 100 }), + LongPoint.newRangeQuery("a", new long[] { 0, 0 }, new long[] { 100, 100 }) + ); + assertDelegateForSingleValuedSegmentsEqualPointRange( + overlapping, + LongPoint.newRangeQuery("a", new long[] { 0, 0 }, new long[] { 100, 100 }) + ); + assertFalse(matches2d(overlapping, -50, -50)); + assertTrue(matches2d(overlapping, 10, 10)); + assertTrue(matches2d(overlapping, -50, -50, 10, 10)); + } + + public void test2dComplexOverlap() throws IOException { + MergedPointRangeQuery overlapping = mergeToMergedQuery( + LongPoint.newRangeQuery("a", new long[] { -100, 0 }, new long[] { 100, 100 }), + LongPoint.newRangeQuery("a", new long[] { 0, -100 }, new long[] { 100, 100 }) + ); + assertDelegateForSingleValuedSegmentsEqualPointRange( + overlapping, + LongPoint.newRangeQuery("a", new long[] { 0, 0 }, new long[] { 100, 100 }) + ); + assertFalse(matches2d(overlapping, -50, -50)); + assertTrue(matches2d(overlapping, 10, 10)); + assertTrue(matches2d(overlapping, -50, -50, 10, 10)); + } + + public void test2dNoOverlap() throws IOException { + MergedPointRangeQuery disjoint = mergeToMergedQuery( + LongPoint.newRangeQuery("a", new long[] { -100, -100 }, new long[] { -10, -10 }), + LongPoint.newRangeQuery("a", new long[] { 10, 10 }, new long[] { 100, 100 }) + ); + assertThat(disjoint.delegateForSingleValuedSegments(), instanceOf(MatchNoDocsQuery.class)); + assertFalse(matches2d(disjoint, randomLong(), randomLong())); + assertFalse(matches2d(disjoint, -50, -50)); + assertFalse(matches2d(disjoint, 50, 50)); + assertTrue(matches2d(disjoint, -50, -50, 50, 50)); + } + + public void test2dNoOverlapInOneDimension() throws IOException { + MergedPointRangeQuery disjoint = mergeToMergedQuery( + LongPoint.newRangeQuery("a", new long[] { -100, -100 }, new long[] { 100, -10 }), + LongPoint.newRangeQuery("a", new long[] { 0, 10 }, new long[] { 100, 100 }) + ); + assertThat(disjoint.delegateForSingleValuedSegments(), instanceOf(MatchNoDocsQuery.class)); + assertFalse(matches2d(disjoint, randomLong(), randomLong())); + assertFalse(matches2d(disjoint, -50, -50)); + assertFalse(matches2d(disjoint, 50, 50)); + assertTrue(matches2d(disjoint, 50, -50, 50, 50)); + } + + public void testEqualsAndHashCode() { + String field = randomAlphaOfLength(5); + int dims = randomBoolean() ? 1 : between(2, 16); + Supplier supplier = randomFrom( + org.elasticsearch.common.collect.List.of( + () -> randomIntPointRangequery(field, dims), + () -> randomLongPointRangequery(field, dims), + () -> randomDoublePointRangequery(field, dims) + ) + ); + Query lhs = supplier.get(); + Query rhs = randomValueOtherThan(lhs, supplier); + MergedPointRangeQuery query = mergeToMergedQuery(lhs, rhs); + EqualsHashCodeTestUtils.checkEqualsAndHashCode( + query, + ignored -> mergeToMergedQuery(lhs, rhs), + ignored -> mergeToMergedQuery(lhs, randomValueOtherThan(lhs, () -> randomValueOtherThan(rhs, supplier))) + ); + } + + private Query randomIntPointRangequery(String field, int dims) { + int[] lower = new int[dims]; + int[] upper = new int[dims]; + for (int i = 0; i < dims; i++) { + lower[i] = randomIntBetween(Integer.MIN_VALUE, Integer.MAX_VALUE - 1); + upper[i] = randomIntBetween(lower[i], Integer.MAX_VALUE); + } + return IntPoint.newRangeQuery(field, lower, upper); + } + + private Query randomLongPointRangequery(String field, int dims) { + long[] lower = new long[dims]; + long[] upper = new long[dims]; + for (int i = 0; i < dims; i++) { + lower[i] = randomLongBetween(Long.MIN_VALUE, Long.MAX_VALUE - 1); + upper[i] = randomLongBetween(lower[i], Long.MAX_VALUE); + } + return LongPoint.newRangeQuery(field, lower, upper); + } + + private Query randomDoublePointRangequery(String field, int dims) { + double[] lower = new double[dims]; + double[] upper = new double[dims]; + for (int i = 0; i < dims; i++) { + lower[i] = randomDoubleBetween(Double.MIN_VALUE, 0, true); + upper[i] = randomDoubleBetween(lower[i], Double.MAX_VALUE, true); + } + return DoublePoint.newRangeQuery(field, lower, upper); + } + + private Query merge(Query lhs, Query rhs) { + assertThat("error in test assumptions", lhs, instanceOf(PointRangeQuery.class)); + assertThat("error in test assumptions", rhs, instanceOf(PointRangeQuery.class)); + return MergedPointRangeQuery.merge((PointRangeQuery) lhs, (PointRangeQuery) rhs); + } + + private MergedPointRangeQuery mergeToMergedQuery(Query lhs, Query rhs) { + Query merged = merge(lhs, rhs); + assertThat(merged, instanceOf(MergedPointRangeQuery.class)); + return (MergedPointRangeQuery) merged; + } + + private boolean matches1d(Query query, long... values) throws IOException { + try (Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) { + List doc = new ArrayList<>(); + for (long v : values) { + doc.add(new LongPoint("a", v)); + } + iw.addDocument(doc); + try (IndexReader r = iw.getReader()) { + IndexSearcher searcher = new IndexSearcher(r); + return searcher.count(query) > 0; + } + } + } + + private boolean matches2d(Query query, long... values) throws IOException { + try (Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) { + List doc = new ArrayList<>(); + assertEquals(values.length % 2, 0); + for (int i = 0; i < values.length; i += 2) { + doc.add(new LongPoint("a", values[i], values[i + 1])); + } + iw.addDocument(doc); + try (IndexReader r = iw.getReader()) { + IndexSearcher searcher = new IndexSearcher(r); + return searcher.count(query) > 0; + } + } + } + + private void assertDelegateForSingleValuedSegmentsEqualPointRange(MergedPointRangeQuery actual, Query expected) { + /* + * This is a lot like asserThat(actual.delegateForSingleValuedSegments(), equalTo(expected)); but + * that doesn't work because the subclasses aren't the same. + */ + assertThat(expected, instanceOf(PointRangeQuery.class)); + assertThat(actual.delegateForSingleValuedSegments(), instanceOf(PointRangeQuery.class)); + assertThat( + ((PointRangeQuery) actual.delegateForSingleValuedSegments()).getLowerPoint(), + equalTo(((PointRangeQuery) expected).getLowerPoint()) + ); + assertThat( + ((PointRangeQuery) actual.delegateForSingleValuedSegments()).getUpperPoint(), + equalTo(((PointRangeQuery) expected).getUpperPoint()) + ); + } +} diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTestCase.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTestCase.java index 90185b50bfe4c..027e0e7547644 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTestCase.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTestCase.java @@ -25,6 +25,7 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CheckedBiConsumer; +import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.NumberFieldMapper; @@ -96,9 +97,23 @@ protected final void asSubAggTestCase( } protected final DateFieldMapper.DateFieldType aggregableDateFieldType(boolean useNanosecondResolution, boolean isSearchable) { - return new DateFieldMapper.DateFieldType(AGGREGABLE_DATE, isSearchable, false, true, - DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, + return aggregableDateFieldType(useNanosecondResolution, isSearchable, DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER); + } + + protected final DateFieldMapper.DateFieldType aggregableDateFieldType( + boolean useNanosecondResolution, + boolean isSearchable, + DateFormatter formatter + ) { + return new DateFieldMapper.DateFieldType( + AGGREGABLE_DATE, + isSearchable, + randomBoolean(), + true, + formatter, useNanosecondResolution ? DateFieldMapper.Resolution.NANOSECONDS : DateFieldMapper.Resolution.MILLISECONDS, - null, Collections.emptyMap()); + null, + Collections.emptyMap() + ); } } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java index f8ec2eb54cb58..db5301166b666 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java @@ -30,13 +30,17 @@ import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; +import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.common.time.DateFormatters; import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.search.aggregations.AggregationBuilder; +import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.bucket.terms.StringTerms; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; +import org.elasticsearch.search.internal.SearchContext; +import org.hamcrest.Matcher; import java.io.IOException; import java.util.ArrayList; @@ -44,9 +48,12 @@ import java.util.Collections; import java.util.List; import java.util.function.Consumer; +import java.util.stream.IntStream; import static java.util.stream.Collectors.toList; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.not; public class DateHistogramAggregatorTests extends DateHistogramAggregatorTestCase { /** @@ -1144,6 +1151,87 @@ public void testOverlappingBounds() { "hard bounds: [2010-01-01--2020-01-01], extended bounds: [2009-01-01--2021-01-01]")); } + public void testFewRoundingPointsUsesFromRange() throws IOException { + aggregationImplementationChoiceTestCase( + aggregableDateFieldType(false, true, DateFormatter.forPattern("yyyy")), + IntStream.range(2000, 2010).mapToObj(Integer::toString).collect(toList()), + new DateHistogramAggregationBuilder("test").field(AGGREGABLE_DATE).calendarInterval(DateHistogramInterval.YEAR), + true + ); + } + + public void testManyRoundingPointsDoesNotUseFromRange() throws IOException { + aggregationImplementationChoiceTestCase( + aggregableDateFieldType(false, true, DateFormatter.forPattern("yyyy")), + IntStream.range(2000, 3000).mapToObj(Integer::toString).collect(toList()), + new DateHistogramAggregationBuilder("test").field(AGGREGABLE_DATE).calendarInterval(DateHistogramInterval.YEAR), + false + ); + } + + /** + * Nanos doesn't use from range, but we don't get the fancy compile into + * filters because of potential loss of precision. + */ + public void testNanosDoesUseFromRange() throws IOException { + aggregationImplementationChoiceTestCase( + aggregableDateFieldType(true, true, DateFormatter.forPattern("yyyy")), + org.elasticsearch.common.collect.List.of("2017", "2018"), + new DateHistogramAggregationBuilder("test").field(AGGREGABLE_DATE).calendarInterval(DateHistogramInterval.YEAR), + true + ); + } + + public void testFarFutureDoesNotUseFromRange() throws IOException { + aggregationImplementationChoiceTestCase( + aggregableDateFieldType(false, true, DateFormatter.forPattern("yyyyyy")), + org.elasticsearch.common.collect.List.of("402017", "402018"), + new DateHistogramAggregationBuilder("test").field(AGGREGABLE_DATE).calendarInterval(DateHistogramInterval.YEAR), + false + ); + } + + public void testMissingValueDoesNotUseFromRange() throws IOException { + aggregationImplementationChoiceTestCase( + aggregableDateFieldType(false, true, DateFormatter.forPattern("yyyy")), + org.elasticsearch.common.collect.List.of("2017", "2018"), + new DateHistogramAggregationBuilder("test").field(AGGREGABLE_DATE).calendarInterval(DateHistogramInterval.YEAR).missing("2020"), + false + ); + } + + private void aggregationImplementationChoiceTestCase( + DateFieldMapper.DateFieldType ft, + List data, + DateHistogramAggregationBuilder builder, + boolean usesFromRange + ) throws IOException { + try (Directory directory = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { + for (String d : data) { + long instant = asLong(d, ft); + indexWriter.addDocument( + org.elasticsearch.common.collect.List.of( + new SortedNumericDocValuesField(AGGREGABLE_DATE, instant), + new LongPoint(AGGREGABLE_DATE, instant) + ) + ); + } + try (IndexReader reader = indexWriter.getReader()) { + SearchContext context = createSearchContext(new IndexSearcher(reader), new MatchAllDocsQuery(), ft); + Aggregator agg = createAggregator(builder, context); + Matcher matcher = instanceOf(DateHistogramAggregator.FromDateRange.class); + if (usesFromRange == false) { + matcher = not(matcher); + } + assertThat(agg, matcher); + agg.preCollection(); + context.searcher().search(context.query(), agg); + InternalDateHistogram result = (InternalDateHistogram) agg.buildTopLevel(); + assertThat(result.getBuckets().stream().map(InternalDateHistogram.Bucket::getKeyAsString).collect(toList()), equalTo(data)); + } + } + } + public void testIllegalInterval() throws IOException { IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> testSearchCase(new MatchAllDocsQuery(), Collections.emptyList(), diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java index 00def8a016ef8..0c088b96b307c 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java @@ -19,6 +19,8 @@ package org.elasticsearch.search.aggregations.bucket.range; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; @@ -34,6 +36,7 @@ import org.elasticsearch.common.CheckedBiConsumer; import org.elasticsearch.common.CheckedConsumer; import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.index.mapper.DateFieldMapper.Resolution; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; @@ -51,14 +54,16 @@ import java.util.function.Consumer; import static java.util.Collections.singleton; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; public class DateRangeAggregatorTests extends AggregatorTestCase { private static final String NUMBER_FIELD_NAME = "number"; private static final String DATE_FIELD_NAME = "date"; - private Instant t1 = ZonedDateTime.of(2015, 11, 13, 16, 14, 34, 0, ZoneOffset.UTC).toInstant(); - private Instant t2 = ZonedDateTime.of(2016, 11, 13, 16, 14, 34, 0, ZoneOffset.UTC).toInstant(); + private static final Instant T1 = ZonedDateTime.of(2015, 11, 13, 16, 14, 34, 0, ZoneOffset.UTC).toInstant(); + private static final Instant T2 = ZonedDateTime.of(2016, 11, 13, 16, 14, 34, 0, ZoneOffset.UTC).toInstant(); public void testNoMatchingField() throws IOException { testBothResolutions(new MatchAllDocsQuery(), (iw, resolution) -> { @@ -76,8 +81,18 @@ public void testNoMatchingField() throws IOException { public void testMatchesSortedNumericDocValues() throws IOException { testBothResolutions(new MatchAllDocsQuery(), (iw, resolution) -> { - iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, resolution.convert(t1)))); - iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, resolution.convert(t2)))); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new SortedNumericDocValuesField(DATE_FIELD_NAME, resolution.convert(T1)), + new LongPoint(DATE_FIELD_NAME, resolution.convert(T1)) + ) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new SortedNumericDocValuesField(DATE_FIELD_NAME, resolution.convert(T2)), + new LongPoint(DATE_FIELD_NAME, resolution.convert(T2)) + ) + ); }, range -> { List ranges = range.getBuckets(); assertEquals(2, ranges.size()); @@ -89,8 +104,18 @@ public void testMatchesSortedNumericDocValues() throws IOException { public void testMatchesNumericDocValues() throws IOException { testBothResolutions(new MatchAllDocsQuery(), (iw, resolution) -> { - iw.addDocument(singleton(new NumericDocValuesField(DATE_FIELD_NAME, resolution.convert(t1)))); - iw.addDocument(singleton(new NumericDocValuesField(DATE_FIELD_NAME, resolution.convert(t2)))); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new NumericDocValuesField(DATE_FIELD_NAME, resolution.convert(T1)), + new LongPoint(DATE_FIELD_NAME, resolution.convert(T1)) + ) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new NumericDocValuesField(DATE_FIELD_NAME, resolution.convert(T2)), + new LongPoint(DATE_FIELD_NAME, resolution.convert(T2)) + ) + ); }, range -> { List ranges = range.getBuckets(); assertEquals(2, ranges.size()); @@ -109,8 +134,8 @@ public void testMissingDateStringWithDateField() throws IOException { .addRange("2015-11-13", "2015-11-14"); testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { - iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, t1.toEpochMilli()))); - iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, t2.toEpochMilli()))); + iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, T1.toEpochMilli()))); + iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, T2.toEpochMilli()))); // Missing will apply to this document iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 7))); }, range -> { @@ -121,6 +146,67 @@ public void testMissingDateStringWithDateField() throws IOException { }, fieldType); } + public void testUnboundedRanges() throws IOException { + testCase( + new RangeAggregationBuilder("name").field(DATE_FIELD_NAME).addUnboundedTo(5).addUnboundedFrom(5), + new MatchAllDocsQuery(), + iw -> { + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new NumericDocValuesField(DATE_FIELD_NAME, Long.MIN_VALUE), + new LongPoint(DATE_FIELD_NAME, Long.MIN_VALUE) + ) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new NumericDocValuesField(DATE_FIELD_NAME, 7), + new LongPoint(DATE_FIELD_NAME, 7) + ) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new NumericDocValuesField(DATE_FIELD_NAME, 2), + new LongPoint(DATE_FIELD_NAME, 2) + ) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new NumericDocValuesField(DATE_FIELD_NAME, 3), + new LongPoint(DATE_FIELD_NAME, 3) + ) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new NumericDocValuesField(DATE_FIELD_NAME, Long.MAX_VALUE), + new LongPoint(DATE_FIELD_NAME, Long.MAX_VALUE) + ) + ); + }, + result -> { + InternalRange range = (InternalRange) result; + List ranges = range.getBuckets(); + assertThat(ranges, hasSize(2)); + assertThat(ranges.get(0).getFrom(), equalTo(Double.NEGATIVE_INFINITY)); + assertThat(ranges.get(0).getTo(), equalTo(5d)); + assertThat(ranges.get(0).getDocCount(), equalTo(3L)); + assertThat(ranges.get(1).getFrom(), equalTo(5d)); + assertThat(ranges.get(1).getTo(), equalTo(Double.POSITIVE_INFINITY)); + assertThat(ranges.get(1).getDocCount(), equalTo(2L)); + assertTrue(AggregationInspectionHelper.hasValue(range)); + }, + new DateFieldMapper.DateFieldType( + DATE_FIELD_NAME, + randomBoolean(), + randomBoolean(), + true, + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, + Resolution.MILLISECONDS, + null, + null + ) + ); + } + public void testNumberFieldDateRanges() throws IOException { DateRangeAggregationBuilder aggregationBuilder = new DateRangeAggregationBuilder("date_range") .field(NUMBER_FIELD_NAME) @@ -145,8 +231,18 @@ public void testNumberFieldNumberRanges() throws IOException { = new NumberFieldMapper.NumberFieldType(NUMBER_FIELD_NAME, NumberFieldMapper.NumberType.INTEGER); testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { - iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 7))); - iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 1))); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new NumericDocValuesField(NUMBER_FIELD_NAME, 7), + new IntPoint(NUMBER_FIELD_NAME, 7) + ) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new NumericDocValuesField(NUMBER_FIELD_NAME, 1), + new IntPoint(NUMBER_FIELD_NAME, 1) + ) + ); }, range -> { List ranges = range.getBuckets(); assertEquals(1, ranges.size()); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java index dda30646a6ca5..fe1956b451ab4 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java @@ -19,6 +19,8 @@ package org.elasticsearch.search.aggregations.bucket.range; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; @@ -32,9 +34,11 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CheckedConsumer; import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.index.mapper.DateFieldMapper.Resolution; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.CardinalityUpperBound; import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; @@ -48,6 +52,7 @@ import static java.util.Collections.singleton; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; public class RangeAggregatorTests extends AggregatorTestCase { @@ -70,9 +75,24 @@ public void testNoMatchingField() throws IOException { public void testMatchesSortedNumericDocValues() throws IOException { testCase(new MatchAllDocsQuery(), iw -> { - iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 7))); - iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 2))); - iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 3))); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 7), + new IntPoint(NUMBER_FIELD_NAME, 7) + ) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 2), + new IntPoint(NUMBER_FIELD_NAME, 2) + ) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 3), + new IntPoint(NUMBER_FIELD_NAME, 3) + ) + ); }, range -> { List ranges = range.getBuckets(); assertEquals(2, ranges.size()); @@ -84,9 +104,24 @@ public void testMatchesSortedNumericDocValues() throws IOException { public void testMatchesNumericDocValues() throws IOException { testCase(new MatchAllDocsQuery(), iw -> { - iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 7))); - iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 2))); - iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 3))); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new NumericDocValuesField(NUMBER_FIELD_NAME, 7), + new IntPoint(NUMBER_FIELD_NAME, 7) + ) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new NumericDocValuesField(NUMBER_FIELD_NAME, 2), + new IntPoint(NUMBER_FIELD_NAME, 2) + ) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new NumericDocValuesField(NUMBER_FIELD_NAME, 3), + new IntPoint(NUMBER_FIELD_NAME, 3) + ) + ); }, range -> { List ranges = range.getBuckets(); assertEquals(2, ranges.size()); @@ -96,8 +131,78 @@ public void testMatchesNumericDocValues() throws IOException { }); } + public void testUnboundedRanges() throws IOException { + testCase( + new RangeAggregationBuilder("name").field(NUMBER_FIELD_NAME).addUnboundedTo(5).addUnboundedFrom(5), + new MatchAllDocsQuery(), + iw -> { + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new NumericDocValuesField(NUMBER_FIELD_NAME, Integer.MIN_VALUE), + new IntPoint(NUMBER_FIELD_NAME, Integer.MIN_VALUE) + ) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new NumericDocValuesField(NUMBER_FIELD_NAME, 7), + new IntPoint(NUMBER_FIELD_NAME, 7) + ) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new NumericDocValuesField(NUMBER_FIELD_NAME, 2), + new IntPoint(NUMBER_FIELD_NAME, 2) + ) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new NumericDocValuesField(NUMBER_FIELD_NAME, 3), + new IntPoint(NUMBER_FIELD_NAME, 3) + ) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new NumericDocValuesField(NUMBER_FIELD_NAME, Integer.MAX_VALUE), + new IntPoint(NUMBER_FIELD_NAME, Integer.MAX_VALUE) + ) + ); + }, + result -> { + InternalRange range = (InternalRange) result; + List ranges = range.getBuckets(); + assertThat(ranges, hasSize(2)); + assertThat(ranges.get(0).getFrom(), equalTo(Double.NEGATIVE_INFINITY)); + assertThat(ranges.get(0).getTo(), equalTo(5d)); + assertThat(ranges.get(0).getDocCount(), equalTo(3L)); + assertThat(ranges.get(1).getFrom(), equalTo(5d)); + assertThat(ranges.get(1).getTo(), equalTo(Double.POSITIVE_INFINITY)); + assertThat(ranges.get(1).getDocCount(), equalTo(2L)); + assertTrue(AggregationInspectionHelper.hasValue(range)); + }, + new NumberFieldMapper.NumberFieldType( + NUMBER_FIELD_NAME, + NumberFieldMapper.NumberType.INTEGER, + randomBoolean(), + randomBoolean(), + true, + false, + null, + null + ) + ); + } + public void testDateFieldMillisecondResolution() throws IOException { - DateFieldMapper.DateFieldType fieldType = new DateFieldMapper.DateFieldType(DATE_FIELD_NAME); + DateFieldMapper.DateFieldType fieldType = new DateFieldMapper.DateFieldType( + DATE_FIELD_NAME, + randomBoolean(), + randomBoolean(), + true, + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, + Resolution.MILLISECONDS, + null, + null + ); long milli1 = ZonedDateTime.of(2015, 11, 13, 16, 14, 34, 0, ZoneOffset.UTC).toInstant().toEpochMilli(); long milli2 = ZonedDateTime.of(2016, 11, 13, 16, 14, 34, 0, ZoneOffset.UTC).toInstant().toEpochMilli(); @@ -107,8 +212,18 @@ public void testDateFieldMillisecondResolution() throws IOException { .addRange(milli1 - 1, milli1 + 1); testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { - iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, milli1))); - iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, milli2))); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new SortedNumericDocValuesField(DATE_FIELD_NAME, milli1), + new LongPoint(DATE_FIELD_NAME, milli1) + ) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new SortedNumericDocValuesField(DATE_FIELD_NAME, milli2), + new LongPoint(DATE_FIELD_NAME, milli2) + ) + ); }, range -> { List ranges = range.getBuckets(); assertEquals(1, ranges.size()); @@ -168,6 +283,44 @@ public void testMissingDateWithDateField() throws IOException { }, fieldType); } + public void testNotFitIntoDouble() throws IOException { + MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType( + NUMBER_FIELD_NAME, + NumberType.LONG, + true, + false, + true, + false, + null, + null + ); + + long start = 2L << 54; // Double stores 53 bits of mantissa, so we aggregate a bunch of bigger values + + RangeAggregationBuilder aggregationBuilder = new RangeAggregationBuilder("range") + .field(NUMBER_FIELD_NAME) + .addRange(start, start + 50) + .addRange(start + 50, start + 100) + .addUnboundedFrom(start + 100); + + testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { + for (long l = start; l < start + 150; l++) { + iw.addDocument( + org.elasticsearch.common.collect.List.of( + new SortedNumericDocValuesField(NUMBER_FIELD_NAME, l), + new LongPoint(NUMBER_FIELD_NAME, l) + ) + ); + } + }, range -> { + List ranges = range.getBuckets(); + assertThat(ranges, hasSize(3)); + // If we had a native `double` range aggregator we'd get 50, 50, 50 + assertThat(ranges.stream().mapToLong(InternalRange.Bucket::getDocCount).toArray(), equalTo(new long[] {44, 48, 58})); + assertTrue(AggregationInspectionHelper.hasValue(range)); + }, fieldType); + } + public void testMissingDateWithNumberField() throws IOException { RangeAggregationBuilder aggregationBuilder = new RangeAggregationBuilder("range") .field(NUMBER_FIELD_NAME) @@ -295,11 +448,48 @@ public void testSubAggCollectsFromManyBucketsIfManyRanges() throws IOException { }); } + public void testOverlappingRanges() throws IOException { + RangeAggregationBuilder aggregationBuilder = new RangeAggregationBuilder("test_range_agg"); + aggregationBuilder.field(NUMBER_FIELD_NAME); + aggregationBuilder.addRange(0d, 5d); + aggregationBuilder.addRange(10d, 20d); + aggregationBuilder.addRange(0d, 20d); + aggregationBuilder.missing(100); // Set a missing value to force the "normal" range collection instead of filter-based + testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 11))); + iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 7))); + iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 2))); + iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 3))); + }, result -> { + InternalRange range = (InternalRange) result; + List ranges = range.getBuckets(); + assertThat(ranges, hasSize(3)); + assertThat(ranges.get(0).getFrom(), equalTo(0d)); + assertThat(ranges.get(0).getTo(), equalTo(5d)); + assertThat(ranges.get(0).getDocCount(), equalTo(2L)); + assertThat(ranges.get(1).getFrom(), equalTo(00d)); + assertThat(ranges.get(1).getTo(), equalTo(20d)); + assertThat(ranges.get(1).getDocCount(), equalTo(4L)); + assertThat(ranges.get(2).getFrom(), equalTo(10d)); + assertThat(ranges.get(2).getTo(), equalTo(20d)); + assertThat(ranges.get(2).getDocCount(), equalTo(1L)); + assertTrue(AggregationInspectionHelper.hasValue(range)); + }, new NumberFieldMapper.NumberFieldType(NUMBER_FIELD_NAME, NumberFieldMapper.NumberType.INTEGER)); + } + private void testCase(Query query, CheckedConsumer buildIndex, Consumer> verify) throws IOException { - MappedFieldType fieldType - = new NumberFieldMapper.NumberFieldType(NUMBER_FIELD_NAME, NumberFieldMapper.NumberType.INTEGER); + MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType( + NUMBER_FIELD_NAME, + NumberFieldMapper.NumberType.INTEGER, + randomBoolean(), + randomBoolean(), + true, + false, + null, + null + ); RangeAggregationBuilder aggregationBuilder = new RangeAggregationBuilder("test_range_agg"); aggregationBuilder.field(NUMBER_FIELD_NAME); aggregationBuilder.addRange(0d, 5d); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceTypeTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceTypeTests.java index e060855aad0fc..0e4b810ccc258 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceTypeTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceTypeTests.java @@ -73,7 +73,7 @@ public void testDatePrepareRoundingWithQuery() throws IOException { MapperService mapperService = dateMapperService(); Query query = mapperService.fieldType("field") .rangeQuery(min, max, true, true, ShapeRelation.CONTAINS, null, null, createQueryShardContext(mapperService)); - withAggregationContext(mapperService, org.elasticsearch.common.collect.List.of(), query, context -> { + withAggregationContext(null, mapperService, org.elasticsearch.common.collect.List.of(), query, context -> { Rounding rounding = mock(Rounding.class); CoreValuesSourceType.DATE.getField(context.buildFieldContext("field"), null, context).roundingPreparer().apply(rounding); verify(rounding).prepare(min, max); @@ -102,7 +102,7 @@ public void testDatePrepareRoundingWithDocAndQuery() throws IOException { MapperService mapperService = dateMapperService(); Query query = mapperService.fieldType("field") .rangeQuery(minQuery, maxQuery, true, true, ShapeRelation.CONTAINS, null, null, createQueryShardContext(mapperService)); - withAggregationContext(mapperService, docsWithDatesBetween(minDocs, maxDocs), query, context -> { + withAggregationContext(null, mapperService, docsWithDatesBetween(minDocs, maxDocs), query, context -> { Rounding rounding = mock(Rounding.class); CoreValuesSourceType.DATE.getField(context.buildFieldContext("field"), null, context).roundingPreparer().apply(rounding); verify(rounding).prepare(min, max); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfigTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfigTests.java index efd3bbc5db05b..215a5a353b12d 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfigTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfigTests.java @@ -54,12 +54,15 @@ public void testEmptyKeyword() throws Exception { LeafReaderContext ctx = context.searcher().getIndexReader().leaves().get(0); SortedBinaryDocValues values = valuesSource.bytesValues(ctx); assertFalse(values.advanceExact(0)); + assertTrue(config.alignesWithSearchIndex()); + config = ValuesSourceConfig.resolve(context, null, "field", null, "abc", null, null, CoreValuesSourceType.BYTES); valuesSource = (ValuesSource.Bytes) config.getValuesSource(); values = valuesSource.bytesValues(ctx); assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(new BytesRef("abc"), values.nextValue()); + assertFalse(config.alignesWithSearchIndex()); }); } @@ -71,6 +74,7 @@ public void testUnmappedKeyword() throws Exception { ValuesSource.Bytes valuesSource = (ValuesSource.Bytes) config.getValuesSource(); assertNotNull(valuesSource); assertFalse(config.hasValues()); + assertFalse(config.alignesWithSearchIndex()); config = ValuesSourceConfig.resolve(context, ValueType.STRING, "field", null, "abc", null, null, CoreValuesSourceType.BYTES); valuesSource = (ValuesSource.Bytes) config.getValuesSource(); @@ -79,6 +83,7 @@ public void testUnmappedKeyword() throws Exception { assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(new BytesRef("abc"), values.nextValue()); + assertFalse(config.alignesWithSearchIndex()); }); } @@ -93,6 +98,7 @@ public void testLong() throws Exception { assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(42, values.nextValue()); + assertTrue(config.alignesWithSearchIndex()); }); } @@ -105,6 +111,7 @@ public void testEmptyLong() throws Exception { LeafReaderContext ctx = context.searcher().getIndexReader().leaves().get(0); SortedNumericDocValues values = valuesSource.longValues(ctx); assertFalse(values.advanceExact(0)); + assertTrue(config.alignesWithSearchIndex()); config = ValuesSourceConfig.resolve(context, null, "field", null, 42, null, null, CoreValuesSourceType.BYTES); valuesSource = (ValuesSource.Numeric) config.getValuesSource(); @@ -112,6 +119,7 @@ public void testEmptyLong() throws Exception { assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(42, values.nextValue()); + assertFalse(config.alignesWithSearchIndex()); }); } @@ -123,6 +131,7 @@ public void testUnmappedLong() throws Exception { ValuesSource.Numeric valuesSource = (ValuesSource.Numeric) config.getValuesSource(); assertNotNull(valuesSource); assertFalse(config.hasValues()); + assertFalse(config.alignesWithSearchIndex()); config = ValuesSourceConfig.resolve(context, ValueType.NUMBER, "field", null, 42, null, null, CoreValuesSourceType.BYTES); valuesSource = (ValuesSource.Numeric) config.getValuesSource(); @@ -131,6 +140,7 @@ public void testUnmappedLong() throws Exception { assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(42, values.nextValue()); + assertFalse(config.alignesWithSearchIndex()); }); } @@ -145,6 +155,7 @@ public void testBoolean() throws Exception { assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(1, values.nextValue()); + assertTrue(config.alignesWithSearchIndex()); }); } @@ -157,6 +168,7 @@ public void testEmptyBoolean() throws Exception { LeafReaderContext ctx = context.searcher().getIndexReader().leaves().get(0); SortedNumericDocValues values = valuesSource.longValues(ctx); assertFalse(values.advanceExact(0)); + assertTrue(config.alignesWithSearchIndex()); config = ValuesSourceConfig.resolve(context, null, "field", null, true, null, null, CoreValuesSourceType.BYTES); valuesSource = (ValuesSource.Numeric) config.getValuesSource(); @@ -164,6 +176,7 @@ public void testEmptyBoolean() throws Exception { assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(1, values.nextValue()); + assertFalse(config.alignesWithSearchIndex()); }); } @@ -175,6 +188,7 @@ public void testUnmappedBoolean() throws Exception { ValuesSource.Numeric valuesSource = (ValuesSource.Numeric) config.getValuesSource(); assertNotNull(valuesSource); assertFalse(config.hasValues()); + assertFalse(config.alignesWithSearchIndex()); config = ValuesSourceConfig.resolve(context, ValueType.BOOLEAN, "field", null, true, null, null, CoreValuesSourceType.BYTES); valuesSource = (ValuesSource.Numeric) config.getValuesSource(); @@ -183,6 +197,7 @@ public void testUnmappedBoolean() throws Exception { assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(1, values.nextValue()); + assertFalse(config.alignesWithSearchIndex()); }); } @@ -213,6 +228,7 @@ public void testFieldAlias() throws Exception { assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(new BytesRef("value"), values.nextValue()); + assertTrue(config.alignesWithSearchIndex()); }); } } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java index 5233a66188dbf..b8ac57de07d90 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java @@ -290,7 +290,12 @@ protected final XContentBuilder fieldMapping(CheckedConsumer docs, CheckedConsumer test ) throws IOException { - withAggregationContext(mapperService, docs, null, test); + withAggregationContext(null, mapperService, docs, null, test); } protected final void withAggregationContext( + ValuesSourceRegistry valuesSourceRegistry, MapperService mapperService, List docs, Query query, @@ -390,7 +396,7 @@ protected final void withAggregationContext( writer.addDocuments(mapperService.documentMapper().parse(doc).docs()); } }, - reader -> test.accept(aggregationContext(mapperService, new IndexSearcher(reader), query)) + reader -> test.accept(aggregationContext(valuesSourceRegistry, mapperService, new IndexSearcher(reader), query)) ); } diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/rate/RateAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/rate/RateAggregatorTests.java index 6a271375a4bc3..5d17f12f78443 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/rate/RateAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/rate/RateAggregatorTests.java @@ -6,23 +6,9 @@ package org.elasticsearch.xpack.analytics.rate; -import static org.elasticsearch.xpack.analytics.AnalyticsTestsUtils.histogramFieldDocValues; -import static org.hamcrest.Matchers.closeTo; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.hasSize; -import static org.hamcrest.Matchers.instanceOf; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.function.Consumer; -import java.util.function.Function; - import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; @@ -58,6 +44,22 @@ import org.elasticsearch.xpack.analytics.AnalyticsPlugin; import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; +import java.util.function.Function; + +import static org.elasticsearch.xpack.analytics.AnalyticsTestsUtils.histogramFieldDocValues; +import static org.hamcrest.Matchers.closeTo; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.instanceOf; + public class RateAggregatorTests extends AggregatorTestCase { /** @@ -330,16 +332,36 @@ public void testKeywordSandwich() throws IOException { testCase(dateHistogramAggregationBuilder, new MatchAllDocsQuery(), iw -> { iw.addDocument( - doc("2010-03-11T01:07:45", new NumericDocValuesField("val", 1), new SortedSetDocValuesField("term", new BytesRef("a"))) + doc( + "2010-03-11T01:07:45", + new NumericDocValuesField("val", 1), + new IntPoint("val", 1), + new SortedSetDocValuesField("term", new BytesRef("a")) + ) ); iw.addDocument( - doc("2010-03-12T01:07:45", new NumericDocValuesField("val", 2), new SortedSetDocValuesField("term", new BytesRef("a"))) + doc( + "2010-03-12T01:07:45", + new NumericDocValuesField("val", 2), + new IntPoint("val", 2), + new SortedSetDocValuesField("term", new BytesRef("a")) + ) ); iw.addDocument( - doc("2010-04-01T03:43:34", new NumericDocValuesField("val", 3), new SortedSetDocValuesField("term", new BytesRef("a"))) + doc( + "2010-04-01T03:43:34", + new NumericDocValuesField("val", 3), + new IntPoint("val", 3), + new SortedSetDocValuesField("term", new BytesRef("a")) + ) ); iw.addDocument( - doc("2010-04-27T03:43:34", new NumericDocValuesField("val", 4), new SortedSetDocValuesField("term", new BytesRef("b"))) + doc( + "2010-04-27T03:43:34", + new NumericDocValuesField("val", 4), + new IntPoint("val", 4), + new SortedSetDocValuesField("term", new BytesRef("b")) + ) ); }, (Consumer) dh -> { assertThat(dh.getBuckets(), hasSize(2)); @@ -606,6 +628,7 @@ private Iterable doc(String date, IndexableField... fields) { List indexableFields = new ArrayList<>(); long instant = dateFieldType(DATE_FIELD).parse(date); indexableFields.add(new SortedNumericDocValuesField(DATE_FIELD, instant)); + indexableFields.add(new LongPoint(DATE_FIELD, instant)); indexableFields.addAll(Arrays.asList(fields)); return indexableFields; } diff --git a/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongTests.java b/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongTests.java index e19b2d322bef5..d1f3bbaf09237 100644 --- a/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongTests.java +++ b/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongTests.java @@ -42,6 +42,7 @@ import static org.elasticsearch.search.aggregations.AggregationBuilders.min; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; import static org.elasticsearch.search.aggregations.AggregationBuilders.terms; import static org.hamcrest.Matchers.equalTo;