Skip to content

Commit a428f17

Browse files
committed
Add histogram field type support to boxplot aggs
Add support for the histogram field type to boxplot aggs. Closes elastic#52233 Relates to elastic#33112
1 parent 6b62ec5 commit a428f17

File tree

6 files changed

+73
-25
lines changed

6 files changed

+73
-25
lines changed

docs/reference/aggregations/metrics/boxplot-aggregation.asciidoc

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
=== Boxplot Aggregation
55

66
A `boxplot` metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents.
7-
These values can be extracted either from specific numeric fields in the documents, or be generated by a provided script.
7+
These values can be generated by a provided script or extracted from specific numeric or
8+
<<histogram,histogram fields>> in the documents.
89

910
The `boxplot` aggregation returns essential information for making a https://en.wikipedia.org/wiki/Box_plot[box plot]: minimum, maximum
1011
median, first quartile (25th percentile) and third quartile (75th percentile) values.

docs/reference/mapping/types/histogram.asciidoc

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ following aggregations and queries:
3737

3838
* <<search-aggregations-metrics-percentile-aggregation,percentiles>> aggregation
3939
* <<search-aggregations-metrics-percentile-rank-aggregation,percentile ranks>> aggregation
40+
* <<search-aggregations-metrics-boxplot-aggregation,boxplot>> aggregation
4041
* <<query-dsl-exists-query,exists>> query
4142

4243
[[mapping-types-histogram-building-histogram]]

x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregationBuilder.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,15 @@
2929

3030
import static org.elasticsearch.search.aggregations.metrics.PercentilesMethod.COMPRESSION_FIELD;
3131

32-
public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.LeafOnly<ValuesSource.Numeric,
32+
public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.LeafOnly<ValuesSource,
3333
BoxplotAggregationBuilder> {
3434
public static final String NAME = "boxplot";
3535

3636
private static final ObjectParser<BoxplotAggregationBuilder, Void> PARSER;
3737

3838
static {
3939
PARSER = new ObjectParser<>(BoxplotAggregationBuilder.NAME);
40-
ValuesSourceParserHelper.declareNumericFields(PARSER, true, true, false);
40+
ValuesSourceParserHelper.declareAnyFields(PARSER, true, true);
4141
PARSER.declareDouble(BoxplotAggregationBuilder::compression, COMPRESSION_FIELD);
4242
}
4343

@@ -98,7 +98,7 @@ public double compression() {
9898

9999
@Override
100100
protected BoxplotAggregatorFactory innerBuild(QueryShardContext queryShardContext,
101-
ValuesSourceConfig<ValuesSource.Numeric> config,
101+
ValuesSourceConfig<ValuesSource> config,
102102
AggregatorFactory parent,
103103
AggregatorFactories.Builder subFactoriesBuilder) throws IOException {
104104
return new BoxplotAggregatorFactory(name, config, compression, queryShardContext, parent, subFactoriesBuilder, metaData);

x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregator.java

+31-14
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
import org.elasticsearch.common.lease.Releasables;
1212
import org.elasticsearch.common.util.BigArrays;
1313
import org.elasticsearch.common.util.ObjectArray;
14+
import org.elasticsearch.index.fielddata.HistogramValue;
15+
import org.elasticsearch.index.fielddata.HistogramValues;
1416
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
1517
import org.elasticsearch.search.DocValueFormat;
1618
import org.elasticsearch.search.aggregations.Aggregator;
@@ -29,12 +31,12 @@
2931

3032
public class BoxplotAggregator extends NumericMetricsAggregator.MultiValue {
3133

32-
private final ValuesSource.Numeric valuesSource;
34+
private final ValuesSource valuesSource;
3335
private final DocValueFormat format;
3436
protected ObjectArray<TDigestState> states;
3537
protected final double compression;
3638

37-
BoxplotAggregator(String name, ValuesSource.Numeric valuesSource, DocValueFormat formatter, double compression,
39+
BoxplotAggregator(String name, ValuesSource valuesSource, DocValueFormat formatter, double compression,
3840
SearchContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
3941
Map<String, Object> metaData) throws IOException {
4042
super(name, context, parent, pipelineAggregators, metaData);
@@ -58,23 +60,38 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
5860
return LeafBucketCollector.NO_OP_COLLECTOR;
5961
}
6062
final BigArrays bigArrays = context.bigArrays();
61-
final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx);
62-
return new LeafBucketCollectorBase(sub, values) {
63-
@Override
64-
public void collect(int doc, long bucket) throws IOException {
65-
states = bigArrays.grow(states, bucket + 1);
66-
67-
if (values.advanceExact(doc)) {
63+
if (valuesSource instanceof ValuesSource.Histogram) {
64+
final HistogramValues values = ((ValuesSource.Histogram)valuesSource).getHistogramValues(ctx);
65+
return new LeafBucketCollectorBase(sub, values) {
66+
@Override
67+
public void collect(int doc, long bucket) throws IOException {
6868
TDigestState state = getExistingOrNewHistogram(bigArrays, bucket);
6969
if (values.advanceExact(doc)) {
70-
final int valueCount = values.docValueCount();
71-
for (int i = 0; i < valueCount; i++) {
72-
state.add(values.nextValue());
70+
final HistogramValue sketch = values.histogram();
71+
while(sketch.next()) {
72+
state.add(sketch.value(), sketch.count());
7373
}
7474
}
7575
}
76-
}
77-
};
76+
};
77+
} else {
78+
final SortedNumericDoubleValues values = ((ValuesSource.Numeric)valuesSource).doubleValues(ctx);
79+
return new LeafBucketCollectorBase(sub, values) {
80+
@Override
81+
public void collect(int doc, long bucket) throws IOException {
82+
states = bigArrays.grow(states, bucket + 1);
83+
if (values.advanceExact(doc)) {
84+
TDigestState state = getExistingOrNewHistogram(bigArrays, bucket);
85+
if (values.advanceExact(doc)) {
86+
final int valueCount = values.docValueCount();
87+
for (int i = 0; i < valueCount; i++) {
88+
state.add(values.nextValue());
89+
}
90+
}
91+
}
92+
}
93+
};
94+
}
7895
}
7996

8097
private TDigestState getExistingOrNewHistogram(final BigArrays bigArrays, long bucket) {

x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregatorFactory.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@
2020
import java.util.List;
2121
import java.util.Map;
2222

23-
public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource.Numeric> {
23+
public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource> {
2424

2525
private final double compression;
2626

2727
BoxplotAggregatorFactory(String name,
28-
ValuesSourceConfig<ValuesSource.Numeric> config,
28+
ValuesSourceConfig<ValuesSource> config,
2929
double compression,
3030
QueryShardContext queryShardContext,
3131
AggregatorFactory parent,
@@ -46,7 +46,7 @@ protected Aggregator createUnmapped(SearchContext searchContext,
4646
}
4747

4848
@Override
49-
protected Aggregator doCreateInternal(ValuesSource.Numeric valuesSource,
49+
protected Aggregator doCreateInternal(ValuesSource valuesSource,
5050
SearchContext searchContext,
5151
Aggregator parent,
5252
boolean collectsFromSingleBucket,

x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramPercentileAggregationTests.java

+33-4
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
import org.elasticsearch.search.aggregations.metrics.TDigestState;
2828
import org.elasticsearch.test.ESSingleNodeTestCase;
2929
import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
30+
import org.elasticsearch.xpack.analytics.boxplot.Boxplot;
31+
import org.elasticsearch.xpack.analytics.boxplot.BoxplotAggregationBuilder;
3032
import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
3133

3234
import java.util.ArrayList;
@@ -131,8 +133,7 @@ public void testHDRHistogram() throws Exception {
131133
}
132134
}
133135

134-
public void testTDigestHistogram() throws Exception {
135-
136+
private void setupTDigestHistogram(int compression) throws Exception {
136137
XContentBuilder xContentBuilder = XContentFactory.jsonBuilder()
137138
.startObject()
138139
.startObject("_doc")
@@ -170,8 +171,6 @@ public void testTDigestHistogram() throws Exception {
170171
PutMappingRequest request2 = new PutMappingRequest("pre_agg").source(xContentBuilder2);
171172
client().admin().indices().putMapping(request2).actionGet();
172173

173-
174-
int compression = TestUtil.nextInt(random(), 200, 300);
175174
TDigestState histogram = new TDigestState(compression);
176175
BulkRequest bulkRequest = new BulkRequest();
177176

@@ -218,6 +217,11 @@ public void testTDigestHistogram() throws Exception {
218217

219218
response = client().prepareSearch("pre_agg").get();
220219
assertEquals(numDocs / frq, response.getHits().getTotalHits().value);
220+
}
221+
222+
public void testTDigestHistogram() throws Exception {
223+
int compression = TestUtil.nextInt(random(), 200, 300);
224+
setupTDigestHistogram(compression);
221225

222226
PercentilesAggregationBuilder builder =
223227
AggregationBuilders.percentiles("agg").field("inner.data").method(PercentilesMethod.TDIGEST)
@@ -236,6 +240,31 @@ public void testTDigestHistogram() throws Exception {
236240
}
237241
}
238242

243+
public void testBoxplotHistogram() throws Exception {
244+
int compression = TestUtil.nextInt(random(), 200, 300);
245+
setupTDigestHistogram(compression);
246+
BoxplotAggregationBuilder bpBuilder = new BoxplotAggregationBuilder("agg").field("inner.data").compression(compression);
247+
248+
SearchResponse bpResponseRaw = client().prepareSearch("raw").addAggregation(bpBuilder).get();
249+
SearchResponse bpResponsePreAgg = client().prepareSearch("pre_agg").addAggregation(bpBuilder).get();
250+
SearchResponse bpResponseBoth = client().prepareSearch("raw", "pre_agg").addAggregation(bpBuilder).get();
251+
252+
Boxplot bpRaw = bpResponseRaw.getAggregations().get("agg");
253+
Boxplot bpPreAgg = bpResponsePreAgg.getAggregations().get("agg");
254+
Boxplot bpBoth = bpResponseBoth.getAggregations().get("agg");
255+
assertEquals(bpRaw.getMax(), bpPreAgg.getMax(), 0.0);
256+
assertEquals(bpRaw.getMax(), bpBoth.getMax(), 0.0);
257+
assertEquals(bpRaw.getMin(), bpPreAgg.getMin(), 0.0);
258+
assertEquals(bpRaw.getMin(), bpBoth.getMin(), 0.0);
259+
260+
assertEquals(bpRaw.getQ1(), bpPreAgg.getQ1(), 1.0);
261+
assertEquals(bpRaw.getQ1(), bpBoth.getQ1(), 1.0);
262+
assertEquals(bpRaw.getQ2(), bpPreAgg.getQ2(), 1.0);
263+
assertEquals(bpRaw.getQ2(), bpBoth.getQ2(), 1.0);
264+
assertEquals(bpRaw.getQ3(), bpPreAgg.getQ3(), 1.0);
265+
assertEquals(bpRaw.getQ3(), bpBoth.getQ3(), 1.0);
266+
}
267+
239268
@Override
240269
protected Collection<Class<? extends Plugin>> getPlugins() {
241270
List<Class<? extends Plugin>> plugins = new ArrayList<>(super.getPlugins());

0 commit comments

Comments
 (0)