Skip to content

Commit 556f5fa

Browse files
authored
SQL: Use calendar_interval of 1d for HISTOGRAMs with 1 DAY intervals (#52749)
1 parent 0a4c5e0 commit 556f5fa

File tree

5 files changed

+81
-4
lines changed

5 files changed

+81
-4
lines changed

docs/reference/sql/functions/grouping.asciidoc

+2-2
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ actually used will be `INTERVAL '2' DAY`. If the interval specified is less than
8787

8888
[IMPORTANT]
8989
All intervals specified for a date/time HISTOGRAM will use a <<search-aggregations-bucket-datehistogram-aggregation,fixed interval>>
90-
in their `date_histogram` aggregation definition, with the notable exceptions of `INTERVAL '1' YEAR` AND `INTERVAL '1' MONTH` where a calendar interval is used.
91-
The choice for a calendar interval was made for having a more intuitive result for YEAR and MONTH groupings. In the case of YEAR, for example, the calendar intervals consider a one year
90+
in their `date_histogram` aggregation definition, with the notable exceptions of `INTERVAL '1' YEAR`, `INTERVAL '1' MONTH` and `INTERVAL '1' DAY` where a calendar interval is used.
91+
The choice for a calendar interval was made for having a more intuitive result for YEAR, MONTH and DAY groupings. In the case of YEAR, for example, the calendar intervals consider a one year
9292
bucket as the one starting on January 1st that specific year, whereas a fixed interval one-year-bucket considers one year as a number
9393
of milliseconds (for example, `31536000000ms` corresponding to 365 days, 24 hours per day, 60 minutes per hour etc.). With fixed intervals,
9494
the day of February 5th, 2019 for example, belongs to a bucket that starts on December 20th, 2018 and {es} (and implicitly {es-sql}) would

x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec

+24
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,30 @@ null |10 |null
531531
1953-11-01T00:00:00.000Z|1 |1953-11-07T00:00:00.000Z
532532
;
533533

534+
histogramOneDay
535+
schema::h:ts|c:l|birth_date:ts
536+
SELECT HISTOGRAM(birth_date, INTERVAL 1 DAY) AS h, COUNT(*) as c, birth_date FROM test_emp WHERE YEAR(birth_date) BETWEEN 1959 AND 1960 GROUP BY h, birth_date ORDER BY h ASC;
537+
538+
h | c | birth_date
539+
------------------------+---------------+------------------------
540+
1959-01-27T00:00:00.000Z|1 |1959-01-27T00:00:00.000Z
541+
1959-04-07T00:00:00.000Z|1 |1959-04-07T00:00:00.000Z
542+
1959-07-23T00:00:00.000Z|2 |1959-07-23T00:00:00.000Z
543+
1959-08-10T00:00:00.000Z|1 |1959-08-10T00:00:00.000Z
544+
1959-08-19T00:00:00.000Z|1 |1959-08-19T00:00:00.000Z
545+
1959-10-01T00:00:00.000Z|1 |1959-10-01T00:00:00.000Z
546+
1959-12-03T00:00:00.000Z|1 |1959-12-03T00:00:00.000Z
547+
1959-12-25T00:00:00.000Z|1 |1959-12-25T00:00:00.000Z
548+
1960-02-20T00:00:00.000Z|1 |1960-02-20T00:00:00.000Z
549+
1960-03-09T00:00:00.000Z|1 |1960-03-09T00:00:00.000Z
550+
1960-05-25T00:00:00.000Z|1 |1960-05-25T00:00:00.000Z
551+
1960-07-20T00:00:00.000Z|1 |1960-07-20T00:00:00.000Z
552+
1960-08-09T00:00:00.000Z|1 |1960-08-09T00:00:00.000Z
553+
1960-09-06T00:00:00.000Z|1 |1960-09-06T00:00:00.000Z
554+
1960-10-04T00:00:00.000Z|1 |1960-10-04T00:00:00.000Z
555+
1960-12-17T00:00:00.000Z|1 |1960-12-17T00:00:00.000Z
556+
;
557+
534558
histogramDateTimeWithMonthOnTop
535559
schema::h:i|c:l
536560
SELECT HISTOGRAM(MONTH(birth_date), 2) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/Histogram.java

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ public class Histogram extends GroupingFunction {
3131
private final ZoneId zoneId;
3232
public static String YEAR_INTERVAL = DateHistogramInterval.YEAR.toString();
3333
public static String MONTH_INTERVAL = DateHistogramInterval.MONTH.toString();
34+
public static String DAY_INTERVAL = DateHistogramInterval.DAY.toString();
3435

3536
public Histogram(Source source, Expression field, Expression interval, ZoneId zoneId) {
3637
super(source, field, Collections.singletonList(interval));

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java

+15-2
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import org.elasticsearch.xpack.sql.expression.function.aggregate.TopHits;
4242
import org.elasticsearch.xpack.sql.expression.function.grouping.Histogram;
4343
import org.elasticsearch.xpack.sql.expression.function.scalar.datetime.DateTimeHistogramFunction;
44+
import org.elasticsearch.xpack.sql.expression.literal.interval.IntervalDayTime;
4445
import org.elasticsearch.xpack.sql.expression.literal.interval.IntervalYearMonth;
4546
import org.elasticsearch.xpack.sql.expression.literal.interval.Intervals;
4647
import org.elasticsearch.xpack.sql.plan.logical.Pivot;
@@ -80,6 +81,7 @@
8081
import org.elasticsearch.xpack.sql.util.Check;
8182
import org.elasticsearch.xpack.sql.util.DateUtils;
8283

84+
import java.time.Duration;
8385
import java.time.Period;
8486
import java.util.ArrayList;
8587
import java.util.Arrays;
@@ -90,6 +92,7 @@
9092
import java.util.concurrent.atomic.AtomicReference;
9193

9294
import static org.elasticsearch.xpack.ql.util.CollectionUtils.combine;
95+
import static org.elasticsearch.xpack.sql.expression.function.grouping.Histogram.DAY_INTERVAL;
9396
import static org.elasticsearch.xpack.sql.expression.function.grouping.Histogram.MONTH_INTERVAL;
9497
import static org.elasticsearch.xpack.sql.expression.function.grouping.Histogram.YEAR_INTERVAL;
9598
import static org.elasticsearch.xpack.sql.planner.QueryTranslator.toAgg;
@@ -332,14 +335,24 @@ else if (exp instanceof GroupingFunction) {
332335

333336
// When the histogram is `INTERVAL '1' YEAR` or `INTERVAL '1' MONTH`, the interval used in
334337
// the ES date_histogram will be a calendar_interval with value "1y" or "1M" respectively.
335-
// All other intervals will be fixed_intervals expressed in ms.
336338
if (field instanceof FieldAttribute) {
337339
key = new GroupByDateHistogram(aggId, QueryTranslator.nameOf(field), calendarInterval, h.zoneId());
338340
} else if (field instanceof Function) {
339341
key = new GroupByDateHistogram(aggId, ((Function) field).asScript(), calendarInterval, h.zoneId());
340342
}
341343
}
342-
// typical interval
344+
// interval of exactly 1 day
345+
else if (value instanceof IntervalDayTime
346+
&& ((IntervalDayTime) value).interval().equals(Duration.ofDays(1))) {
347+
// When the histogram is `INTERVAL '1' DAY` the interval used in
348+
// the ES date_histogram will be a calendar_interval with value "1d"
349+
if (field instanceof FieldAttribute) {
350+
key = new GroupByDateHistogram(aggId, QueryTranslator.nameOf(field), DAY_INTERVAL, h.zoneId());
351+
} else if (field instanceof Function) {
352+
key = new GroupByDateHistogram(aggId, ((Function) field).asScript(), DAY_INTERVAL, h.zoneId());
353+
}
354+
}
355+
// All other intervals will be fixed_intervals expressed in ms.
343356
else {
344357
long intervalAsMillis = Intervals.inMillis(h.interval());
345358

x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java

+39
Original file line numberDiff line numberDiff line change
@@ -1078,6 +1078,45 @@ public void testGroupByMoreMonthsHistogramQueryTranslator() {
10781078
+ "\"fixed_interval\":\"12960000000ms\",\"time_zone\":\"Z\"}}}]}}}"));
10791079
}
10801080

1081+
public void testGroupByOneDayHistogramQueryTranslator() {
1082+
PhysicalPlan p = optimizeAndPlan("SELECT HISTOGRAM(date, INTERVAL 1 DAY) AS h FROM test GROUP BY h");
1083+
assertEquals(EsQueryExec.class, p.getClass());
1084+
EsQueryExec eqe = (EsQueryExec) p;
1085+
assertEquals(1, eqe.output().size());
1086+
assertEquals("h", eqe.output().get(0).qualifiedName());
1087+
assertEquals(DATETIME, eqe.output().get(0).dataType());
1088+
assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
1089+
endsWith("\"date_histogram\":{\"field\":\"date\",\"missing_bucket\":true,\"value_type\":\"date\",\"order\":\"asc\","
1090+
+ "\"calendar_interval\":\"1d\",\"time_zone\":\"Z\"}}}]}}}"));
1091+
}
1092+
1093+
public void testGroupByMoreDaysHistogramQueryTranslator() {
1094+
PhysicalPlan p = optimizeAndPlan("SELECT HISTOGRAM(date, INTERVAL '1 5' DAY TO HOUR) AS h FROM test GROUP BY h");
1095+
assertEquals(EsQueryExec.class, p.getClass());
1096+
EsQueryExec eqe = (EsQueryExec) p;
1097+
assertEquals(1, eqe.output().size());
1098+
assertEquals("h", eqe.output().get(0).qualifiedName());
1099+
assertEquals(DATETIME, eqe.output().get(0).dataType());
1100+
assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
1101+
endsWith("\"date_histogram\":{\"field\":\"date\",\"missing_bucket\":true,\"value_type\":\"date\",\"order\":\"asc\","
1102+
+ "\"fixed_interval\":\"104400000ms\",\"time_zone\":\"Z\"}}}]}}}"));
1103+
}
1104+
1105+
public void testGroupByMoreDaysHistogram_WithFunction_QueryTranslator() {
1106+
PhysicalPlan p = optimizeAndPlan("SELECT HISTOGRAM(date + INTERVAL 5 DAYS, INTERVAL 1 DAY) AS h FROM test GROUP BY h");
1107+
assertEquals(EsQueryExec.class, p.getClass());
1108+
EsQueryExec eqe = (EsQueryExec) p;
1109+
assertEquals(1, eqe.output().size());
1110+
assertEquals("h", eqe.output().get(0).qualifiedName());
1111+
assertEquals(DATETIME, eqe.output().get(0).dataType());
1112+
assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
1113+
endsWith("\"date_histogram\":{\"script\":{\"source\":\"InternalSqlScriptUtils.add(" +
1114+
"InternalSqlScriptUtils.docValue(doc,params.v0),InternalSqlScriptUtils.intervalDayTime(params.v1,params.v2))\"," +
1115+
"\"lang\":\"painless\",\"params\":{\"v0\":\"date\",\"v1\":\"PT120H\",\"v2\":\"INTERVAL_DAY\"}}," +
1116+
"\"missing_bucket\":true,\"value_type\":\"long\",\"order\":\"asc\"," +
1117+
"\"calendar_interval\":\"1d\",\"time_zone\":\"Z\"}}}]}}}"));
1118+
}
1119+
10811120
public void testGroupByYearAndScalarsQueryTranslator() {
10821121
PhysicalPlan p = optimizeAndPlan("SELECT YEAR(CAST(date + INTERVAL 5 months AS DATE)) FROM test GROUP BY 1");
10831122
assertEquals(EsQueryExec.class, p.getClass());

0 commit comments

Comments
 (0)