Skip to content

Commit d9acac2

Browse files
authored
Speed up date_histogram by precomputing ranges (#61467)
A few of us were talking about ways to speed up the `date_histogram` using the index for the timestamp rather than the doc values. To do that we'd have to pre-compute all of the "round down" points in the index. It turns out that *just* precomputing those values speeds up rounding fairly significantly: ``` Benchmark (count) (interval) (range) (zone) Mode Cnt Score Error Units before 10000000 calendar month 2000-10-28 to 2000-10-31 UTC avgt 10 96461080.982 ± 616373.011 ns/op before 10000000 calendar month 2000-10-28 to 2000-10-31 America/New_York avgt 10 130598950.850 ± 1249189.867 ns/op after 10000000 calendar month 2000-10-28 to 2000-10-31 UTC avgt 10 52311775.080 ± 107171.092 ns/op after 10000000 calendar month 2000-10-28 to 2000-10-31 America/New_York avgt 10 54800134.968 ± 373844.796 ns/op ``` That's a 46% speed up when there isn't a time zone and a 58% speed up when there is. This doesn't work for every time zone, specifically those that have two midnights in a single day due to daylight savings time will produce wonky results. So they don't get the optimization. Second, this requires a few expensive computation up front to make the transition array. And if the transition array is too large then we give up and use the original mechanism, throwing away all of the work we did to build the array. This seems appropriate for most usages of `round`, but this change uses it for *all* usages of `round`. That seems ok for now, but it might be worth investigating in a follow up. I ran a macrobenchmark as well which showed an 11% preformance improvement. *BUT* the benchmark wasn't tuned for my desktop so it overwhelmed it and might have produced "funny" results. I think it is pretty clear that this is an improvement, but know the measurement is weird: ``` Benchmark (count) (interval) (range) (zone) Mode Cnt Score Error Units before 10000000 calendar month 2000-10-28 to 2000-10-31 UTC avgt 10 96461080.982 ± 616373.011 ns/op before 10000000 calendar month 2000-10-28 to 2000-10-31 America/New_York avgt 10 g± 1249189.867 ns/op after 10000000 calendar month 2000-10-28 to 2000-10-31 UTC avgt 10 52311775.080 ± 107171.092 ns/op after 10000000 calendar month 2000-10-28 to 2000-10-31 America/New_York avgt 10 54800134.968 ± 373844.796 ns/op Before: | Min Throughput | hourly_agg | 0.11 | ops/s | | Median Throughput | hourly_agg | 0.11 | ops/s | | Max Throughput | hourly_agg | 0.11 | ops/s | | 50th percentile latency | hourly_agg | 650623 | ms | | 90th percentile latency | hourly_agg | 821478 | ms | | 99th percentile latency | hourly_agg | 859780 | ms | | 100th percentile latency | hourly_agg | 864030 | ms | | 50th percentile service time | hourly_agg | 9268.71 | ms | | 90th percentile service time | hourly_agg | 9380 | ms | | 99th percentile service time | hourly_agg | 9626.88 | ms | |100th percentile service time | hourly_agg | 9884.27 | ms | | error rate | hourly_agg | 0 | % | After: | Min Throughput | hourly_agg | 0.12 | ops/s | | Median Throughput | hourly_agg | 0.12 | ops/s | | Max Throughput | hourly_agg | 0.12 | ops/s | | 50th percentile latency | hourly_agg | 519254 | ms | | 90th percentile latency | hourly_agg | 653099 | ms | | 99th percentile latency | hourly_agg | 683276 | ms | | 100th percentile latency | hourly_agg | 686611 | ms | | 50th percentile service time | hourly_agg | 8371.41 | ms | | 90th percentile service time | hourly_agg | 8407.02 | ms | | 99th percentile service time | hourly_agg | 8536.64 | ms | |100th percentile service time | hourly_agg | 8538.54 | ms | | error rate | hourly_agg | 0 | % | ```
1 parent 952c13c commit d9acac2

File tree

4 files changed

+327
-55
lines changed

4 files changed

+327
-55
lines changed

server/src/main/java/org/elasticsearch/common/LocalTimeOffset.java

+76-4
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.time.Instant;
2323
import java.time.LocalDate;
2424
import java.time.ZoneId;
25+
import java.time.temporal.ChronoField;
2526
import java.time.zone.ZoneOffsetTransition;
2627
import java.time.zone.ZoneOffsetTransitionRule;
2728
import java.time.zone.ZoneRules;
@@ -174,6 +175,12 @@ public interface Strategy {
174175
*/
175176
protected abstract LocalTimeOffset offsetContaining(long utcMillis);
176177

178+
/**
179+
* Does this transition or any previous transitions move back to the
180+
* previous day? See {@link Lookup#anyMoveBackToPreviousDay()} for rules.
181+
*/
182+
protected abstract boolean anyMoveBackToPreviousDay();
183+
177184
@Override
178185
public String toString() {
179186
return toString(millis);
@@ -195,6 +202,15 @@ public abstract static class Lookup {
195202
*/
196203
public abstract LocalTimeOffset fixedInRange(long minUtcMillis, long maxUtcMillis);
197204

205+
/**
206+
* Do any of the transitions move back to the previous day?
207+
* <p>
208+
* Note: If an overlap occurs at, say, 1 am and jumps back to
209+
* <strong>exactly</strong> midnight then it doesn't count because
210+
* midnight is still counted as being in the "next" day.
211+
*/
212+
public abstract boolean anyMoveBackToPreviousDay();
213+
198214
/**
199215
* The number of offsets in the lookup. Package private for testing.
200216
*/
@@ -225,6 +241,11 @@ protected LocalTimeOffset offsetContaining(long utcMillis) {
225241
return this;
226242
}
227243

244+
@Override
245+
protected boolean anyMoveBackToPreviousDay() {
246+
return false;
247+
}
248+
228249
@Override
229250
protected String toString(long millis) {
230251
return Long.toString(millis);
@@ -298,6 +319,11 @@ public long firstMissingLocalTime() {
298319
return firstMissingLocalTime;
299320
}
300321

322+
@Override
323+
protected boolean anyMoveBackToPreviousDay() {
324+
return previous().anyMoveBackToPreviousDay();
325+
}
326+
301327
@Override
302328
protected String toString(long millis) {
303329
return "Gap of " + millis + "@" + Instant.ofEpochMilli(startUtcMillis());
@@ -307,13 +333,21 @@ protected String toString(long millis) {
307333
public static class Overlap extends Transition {
308334
private final long firstOverlappingLocalTime;
309335
private final long firstNonOverlappingLocalTime;
310-
311-
private Overlap(long millis, LocalTimeOffset previous, long startUtcMillis,
312-
long firstOverlappingLocalTime, long firstNonOverlappingLocalTime) {
336+
private final boolean movesBackToPreviousDay;
337+
338+
private Overlap(
339+
long millis,
340+
LocalTimeOffset previous,
341+
long startUtcMillis,
342+
long firstOverlappingLocalTime,
343+
long firstNonOverlappingLocalTime,
344+
boolean movesBackToPreviousDay
345+
) {
313346
super(millis, previous, startUtcMillis);
314347
this.firstOverlappingLocalTime = firstOverlappingLocalTime;
315348
this.firstNonOverlappingLocalTime = firstNonOverlappingLocalTime;
316349
assert firstOverlappingLocalTime < firstNonOverlappingLocalTime;
350+
this.movesBackToPreviousDay = movesBackToPreviousDay;
317351
}
318352

319353
@Override
@@ -341,6 +375,11 @@ public long firstOverlappingLocalTime() {
341375
return firstOverlappingLocalTime;
342376
}
343377

378+
@Override
379+
protected boolean anyMoveBackToPreviousDay() {
380+
return movesBackToPreviousDay || previous().anyMoveBackToPreviousDay();
381+
}
382+
344383
@Override
345384
protected String toString(long millis) {
346385
return "Overlap of " + millis + "@" + Instant.ofEpochMilli(startUtcMillis());
@@ -375,6 +414,11 @@ int size() {
375414
public String toString() {
376415
return String.format(Locale.ROOT, "FixedLookup[for %s at %s]", zone, fixed);
377416
}
417+
418+
@Override
419+
public boolean anyMoveBackToPreviousDay() {
420+
return false;
421+
}
378422
}
379423

380424
/**
@@ -406,6 +450,11 @@ public LocalTimeOffset innerLookup(long utcMillis) {
406450
int size() {
407451
return size;
408452
}
453+
454+
@Override
455+
public boolean anyMoveBackToPreviousDay() {
456+
return lastOffset.anyMoveBackToPreviousDay();
457+
}
409458
}
410459

411460
/**
@@ -453,6 +502,11 @@ int size() {
453502
return offsets.length;
454503
}
455504

505+
@Override
506+
public boolean anyMoveBackToPreviousDay() {
507+
return offsets[offsets.length - 1].anyMoveBackToPreviousDay();
508+
}
509+
456510
@Override
457511
public String toString() {
458512
return String.format(Locale.ROOT, "TransitionArrayLookup[for %s between %s and %s]",
@@ -505,7 +559,25 @@ protected static Transition buildTransition(ZoneOffsetTransition transition, Loc
505559
}
506560
long firstOverlappingLocalTime = utcStart + offsetAfterMillis;
507561
long firstNonOverlappingLocalTime = utcStart + offsetBeforeMillis;
508-
return new Overlap(offsetAfterMillis, previous, utcStart, firstOverlappingLocalTime, firstNonOverlappingLocalTime);
562+
return new Overlap(
563+
offsetAfterMillis,
564+
previous,
565+
utcStart,
566+
firstOverlappingLocalTime,
567+
firstNonOverlappingLocalTime,
568+
movesBackToPreviousDay(transition)
569+
);
570+
}
571+
572+
private static boolean movesBackToPreviousDay(ZoneOffsetTransition transition) {
573+
if (transition.getDateTimeBefore().getDayOfMonth() == transition.getDateTimeAfter().getDayOfMonth()) {
574+
return false;
575+
}
576+
if (transition.getDateTimeBefore().getLong(ChronoField.NANO_OF_DAY) == 0L) {
577+
// If we change *at* midnight this is ok.
578+
return false;
579+
}
580+
return true;
509581
}
510582
}
511583

server/src/main/java/org/elasticsearch/common/Rounding.java

+93-4
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package org.elasticsearch.common;
2020

21+
import org.apache.lucene.util.ArrayUtil;
2122
import org.elasticsearch.ElasticsearchException;
2223
import org.elasticsearch.common.LocalTimeOffset.Gap;
2324
import org.elasticsearch.common.LocalTimeOffset.Overlap;
@@ -43,6 +44,7 @@
4344
import java.time.temporal.TemporalQueries;
4445
import java.time.zone.ZoneOffsetTransition;
4546
import java.time.zone.ZoneRules;
47+
import java.util.Arrays;
4648
import java.util.List;
4749
import java.util.Locale;
4850
import java.util.Objects;
@@ -404,6 +406,34 @@ public Rounding build() {
404406
}
405407
}
406408

409+
private abstract class PreparedRounding implements Prepared {
410+
/**
411+
* Attempt to build a {@link Prepared} implementation that relies on pre-calcuated
412+
* "round down" points. If there would be more than {@code max} points then return
413+
* the original implementation, otherwise return the new, faster implementation.
414+
*/
415+
protected Prepared maybeUseArray(long minUtcMillis, long maxUtcMillis, int max) {
416+
long[] values = new long[1];
417+
long rounded = round(minUtcMillis);
418+
int i = 0;
419+
values[i++] = rounded;
420+
while ((rounded = nextRoundingValue(rounded)) <= maxUtcMillis) {
421+
if (i >= max) {
422+
return this;
423+
}
424+
/*
425+
* We expect a time in the last transition (rounded - 1) to round
426+
* to the last value we calculated. If it doesn't then we're
427+
* probably doing something wrong here....
428+
*/
429+
assert values[i - 1] == round(rounded - 1);
430+
values = ArrayUtil.grow(values, i + 1);
431+
values[i++]= rounded;
432+
}
433+
return new ArrayRounding(values, i, this);
434+
}
435+
}
436+
407437
static class TimeUnitRounding extends Rounding {
408438
static final byte ID = 1;
409439

@@ -468,6 +498,15 @@ private LocalDateTime truncateLocalDateTime(LocalDateTime localDateTime) {
468498

469499
@Override
470500
public Prepared prepare(long minUtcMillis, long maxUtcMillis) {
501+
/*
502+
* 128 is a power of two that isn't huge. We might be able to do
503+
* better if the limit was based on the actual type of prepared
504+
* rounding but this'll do for now.
505+
*/
506+
return prepareOffsetOrJavaTimeRounding(minUtcMillis, maxUtcMillis).maybeUseArray(minUtcMillis, maxUtcMillis, 128);
507+
}
508+
509+
private TimeUnitPreparedRounding prepareOffsetOrJavaTimeRounding(long minUtcMillis, long maxUtcMillis) {
471510
long minLookup = minUtcMillis - unit.extraLocalOffsetLookup();
472511
long maxLookup = maxUtcMillis;
473512

@@ -486,7 +525,6 @@ public Prepared prepare(long minUtcMillis, long maxUtcMillis) {
486525
// Range too long, just use java.time
487526
return prepareJavaTime();
488527
}
489-
490528
LocalTimeOffset fixedOffset = lookup.fixedInRange(minLookup, maxLookup);
491529
if (fixedOffset != null) {
492530
// The time zone is effectively fixed
@@ -515,7 +553,7 @@ public Prepared prepareForUnknown() {
515553
}
516554

517555
@Override
518-
Prepared prepareJavaTime() {
556+
TimeUnitPreparedRounding prepareJavaTime() {
519557
if (unitRoundsToMidnight) {
520558
return new JavaTimeToMidnightRounding();
521559
}
@@ -554,7 +592,7 @@ public String toString() {
554592
return "Rounding[" + unit + " in " + timeZone + "]";
555593
}
556594

557-
private abstract class TimeUnitPreparedRounding implements Prepared {
595+
private abstract class TimeUnitPreparedRounding extends PreparedRounding {
558596
@Override
559597
public double roundingSize(long utcMillis, DateTimeUnit timeUnit) {
560598
if (timeUnit.isMillisBased == unit.isMillisBased) {
@@ -648,6 +686,14 @@ public long inOverlap(long localMillis, Overlap overlap) {
648686
public long beforeOverlap(long localMillis, Overlap overlap) {
649687
return overlap.previous().localToUtc(localMillis, this);
650688
}
689+
690+
@Override
691+
protected Prepared maybeUseArray(long minUtcMillis, long maxUtcMillis, int max) {
692+
if (lookup.anyMoveBackToPreviousDay()) {
693+
return this;
694+
}
695+
return super.maybeUseArray(minUtcMillis, maxUtcMillis, max);
696+
}
651697
}
652698

653699
private class NotToMidnightRounding extends AbstractNotToMidnightRounding implements LocalTimeOffset.Strategy {
@@ -707,6 +753,12 @@ public long nextRoundingValue(long utcMillis) {
707753
return firstTimeOnDay(localMidnight);
708754
}
709755

756+
@Override
757+
protected Prepared maybeUseArray(long minUtcMillis, long maxUtcMillis, int max) {
758+
// We don't have the right information needed to know if this is safe for this time zone so we always use java rounding
759+
return this;
760+
}
761+
710762
private long firstTimeOnDay(LocalDateTime localMidnight) {
711763
assert localMidnight.toLocalTime().equals(LocalTime.of(0, 0, 0)) : "firstTimeOnDay should only be called at midnight";
712764

@@ -1107,7 +1159,7 @@ public byte id() {
11071159

11081160
@Override
11091161
public Prepared prepare(long minUtcMillis, long maxUtcMillis) {
1110-
return wrapPreparedRounding(delegate.prepare(minUtcMillis, maxUtcMillis));
1162+
return wrapPreparedRounding(delegate.prepare(minUtcMillis - offset, maxUtcMillis - offset));
11111163
}
11121164

11131165
@Override
@@ -1182,4 +1234,41 @@ public static Rounding read(StreamInput in) throws IOException {
11821234
throw new ElasticsearchException("unknown rounding id [" + id + "]");
11831235
}
11841236
}
1237+
1238+
/**
1239+
* Implementation of {@link Prepared} using pre-calculated "round down" points.
1240+
*/
1241+
private static class ArrayRounding implements Prepared {
1242+
private final long[] values;
1243+
private final int max;
1244+
private final Prepared delegate;
1245+
1246+
private ArrayRounding(long[] values, int max, Prepared delegate) {
1247+
this.values = values;
1248+
this.max = max;
1249+
this.delegate = delegate;
1250+
}
1251+
1252+
@Override
1253+
public long round(long utcMillis) {
1254+
assert values[0] <= utcMillis : "utcMillis must be after " + values[0];
1255+
int idx = Arrays.binarySearch(values, 0, max, utcMillis);
1256+
assert idx != -1 : "The insertion point is before the array! This should have tripped the assertion above.";
1257+
assert -1 - idx <= values.length : "This insertion point is after the end of the array.";
1258+
if (idx < 0) {
1259+
idx = -2 - idx;
1260+
}
1261+
return values[idx];
1262+
}
1263+
1264+
@Override
1265+
public long nextRoundingValue(long utcMillis) {
1266+
return delegate.nextRoundingValue(utcMillis);
1267+
}
1268+
1269+
@Override
1270+
public double roundingSize(long utcMillis, DateTimeUnit timeUnit) {
1271+
return delegate.roundingSize(utcMillis, timeUnit);
1272+
}
1273+
}
11851274
}

0 commit comments

Comments
 (0)