Skip to content

Commit ce24115

Browse files
authored
Speed up date_histogram by precomputing ranges (backport of #61467) (#62880)
A few of us were talking about ways to speed up the `date_histogram` using the index for the timestamp rather than the doc values. To do that we'd have to pre-compute all of the "round down" points in the index. It turns out that *just* precomputing those values speeds up rounding fairly significantly: ``` Benchmark (count) (interval) (range) (zone) Mode Cnt Score Error Units before 10000000 calendar month 2000-10-28 to 2000-10-31 UTC avgt 10 96461080.982 ± 616373.011 ns/op before 10000000 calendar month 2000-10-28 to 2000-10-31 America/New_York avgt 10 130598950.850 ± 1249189.867 ns/op after 10000000 calendar month 2000-10-28 to 2000-10-31 UTC avgt 10 52311775.080 ± 107171.092 ns/op after 10000000 calendar month 2000-10-28 to 2000-10-31 America/New_York avgt 10 54800134.968 ± 373844.796 ns/op ``` That's a 46% speed up when there isn't a time zone and a 58% speed up when there is. This doesn't work for every time zone, specifically those that have two midnights in a single day due to daylight savings time will produce wonky results. So they don't get the optimization. Second, this requires a few expensive computation up front to make the transition array. And if the transition array is too large then we give up and use the original mechanism, throwing away all of the work we did to build the array. This seems appropriate for most usages of `round`, but this change uses it for *all* usages of `round`. That seems ok for now, but it might be worth investigating in a follow up. I ran a macrobenchmark as well which showed an 11% preformance improvement. *BUT* the benchmark wasn't tuned for my desktop so it overwhelmed it and might have produced "funny" results. I think it is pretty clear that this is an improvement, but know the measurement is weird: ``` Benchmark (count) (interval) (range) (zone) Mode Cnt Score Error Units before 10000000 calendar month 2000-10-28 to 2000-10-31 UTC avgt 10 96461080.982 ± 616373.011 ns/op before 10000000 calendar month 2000-10-28 to 2000-10-31 America/New_York avgt 10 g± 1249189.867 ns/op after 10000000 calendar month 2000-10-28 to 2000-10-31 UTC avgt 10 52311775.080 ± 107171.092 ns/op after 10000000 calendar month 2000-10-28 to 2000-10-31 America/New_York avgt 10 54800134.968 ± 373844.796 ns/op Before: | Min Throughput | hourly_agg | 0.11 | ops/s | | Median Throughput | hourly_agg | 0.11 | ops/s | | Max Throughput | hourly_agg | 0.11 | ops/s | | 50th percentile latency | hourly_agg | 650623 | ms | | 90th percentile latency | hourly_agg | 821478 | ms | | 99th percentile latency | hourly_agg | 859780 | ms | | 100th percentile latency | hourly_agg | 864030 | ms | | 50th percentile service time | hourly_agg | 9268.71 | ms | | 90th percentile service time | hourly_agg | 9380 | ms | | 99th percentile service time | hourly_agg | 9626.88 | ms | |100th percentile service time | hourly_agg | 9884.27 | ms | | error rate | hourly_agg | 0 | % | After: | Min Throughput | hourly_agg | 0.12 | ops/s | | Median Throughput | hourly_agg | 0.12 | ops/s | | Max Throughput | hourly_agg | 0.12 | ops/s | | 50th percentile latency | hourly_agg | 519254 | ms | | 90th percentile latency | hourly_agg | 653099 | ms | | 99th percentile latency | hourly_agg | 683276 | ms | | 100th percentile latency | hourly_agg | 686611 | ms | | 50th percentile service time | hourly_agg | 8371.41 | ms | | 90th percentile service time | hourly_agg | 8407.02 | ms | | 99th percentile service time | hourly_agg | 8536.64 | ms | |100th percentile service time | hourly_agg | 8538.54 | ms | | error rate | hourly_agg | 0 | % | ```
1 parent 83ec8dd commit ce24115

File tree

4 files changed

+327
-55
lines changed

4 files changed

+327
-55
lines changed

server/src/main/java/org/elasticsearch/common/LocalTimeOffset.java

+76-4
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import java.time.Instant;
2323
import java.time.ZoneId;
24+
import java.time.temporal.ChronoField;
2425
import java.time.zone.ZoneOffsetTransition;
2526
import java.time.zone.ZoneOffsetTransitionRule;
2627
import java.time.zone.ZoneRules;
@@ -173,6 +174,12 @@ public interface Strategy {
173174
*/
174175
protected abstract LocalTimeOffset offsetContaining(long utcMillis);
175176

177+
/**
178+
* Does this transition or any previous transitions move back to the
179+
* previous day? See {@link Lookup#anyMoveBackToPreviousDay()} for rules.
180+
*/
181+
protected abstract boolean anyMoveBackToPreviousDay();
182+
176183
@Override
177184
public String toString() {
178185
return toString(millis);
@@ -194,6 +201,15 @@ public abstract static class Lookup {
194201
*/
195202
public abstract LocalTimeOffset fixedInRange(long minUtcMillis, long maxUtcMillis);
196203

204+
/**
205+
* Do any of the transitions move back to the previous day?
206+
* <p>
207+
* Note: If an overlap occurs at, say, 1 am and jumps back to
208+
* <strong>exactly</strong> midnight then it doesn't count because
209+
* midnight is still counted as being in the "next" day.
210+
*/
211+
public abstract boolean anyMoveBackToPreviousDay();
212+
197213
/**
198214
* The number of offsets in the lookup. Package private for testing.
199215
*/
@@ -224,6 +240,11 @@ protected LocalTimeOffset offsetContaining(long utcMillis) {
224240
return this;
225241
}
226242

243+
@Override
244+
protected boolean anyMoveBackToPreviousDay() {
245+
return false;
246+
}
247+
227248
@Override
228249
protected String toString(long millis) {
229250
return Long.toString(millis);
@@ -297,6 +318,11 @@ public long firstMissingLocalTime() {
297318
return firstMissingLocalTime;
298319
}
299320

321+
@Override
322+
protected boolean anyMoveBackToPreviousDay() {
323+
return previous().anyMoveBackToPreviousDay();
324+
}
325+
300326
@Override
301327
protected String toString(long millis) {
302328
return "Gap of " + millis + "@" + Instant.ofEpochMilli(startUtcMillis());
@@ -306,13 +332,21 @@ protected String toString(long millis) {
306332
public static class Overlap extends Transition {
307333
private final long firstOverlappingLocalTime;
308334
private final long firstNonOverlappingLocalTime;
309-
310-
private Overlap(long millis, LocalTimeOffset previous, long startUtcMillis,
311-
long firstOverlappingLocalTime, long firstNonOverlappingLocalTime) {
335+
private final boolean movesBackToPreviousDay;
336+
337+
private Overlap(
338+
long millis,
339+
LocalTimeOffset previous,
340+
long startUtcMillis,
341+
long firstOverlappingLocalTime,
342+
long firstNonOverlappingLocalTime,
343+
boolean movesBackToPreviousDay
344+
) {
312345
super(millis, previous, startUtcMillis);
313346
this.firstOverlappingLocalTime = firstOverlappingLocalTime;
314347
this.firstNonOverlappingLocalTime = firstNonOverlappingLocalTime;
315348
assert firstOverlappingLocalTime < firstNonOverlappingLocalTime;
349+
this.movesBackToPreviousDay = movesBackToPreviousDay;
316350
}
317351

318352
@Override
@@ -340,6 +374,11 @@ public long firstOverlappingLocalTime() {
340374
return firstOverlappingLocalTime;
341375
}
342376

377+
@Override
378+
protected boolean anyMoveBackToPreviousDay() {
379+
return movesBackToPreviousDay || previous().anyMoveBackToPreviousDay();
380+
}
381+
343382
@Override
344383
protected String toString(long millis) {
345384
return "Overlap of " + millis + "@" + Instant.ofEpochMilli(startUtcMillis());
@@ -374,6 +413,11 @@ int size() {
374413
public String toString() {
375414
return String.format(Locale.ROOT, "FixedLookup[for %s at %s]", zone, fixed);
376415
}
416+
417+
@Override
418+
public boolean anyMoveBackToPreviousDay() {
419+
return false;
420+
}
377421
}
378422

379423
/**
@@ -405,6 +449,11 @@ public LocalTimeOffset innerLookup(long utcMillis) {
405449
int size() {
406450
return size;
407451
}
452+
453+
@Override
454+
public boolean anyMoveBackToPreviousDay() {
455+
return lastOffset.anyMoveBackToPreviousDay();
456+
}
408457
}
409458

410459
/**
@@ -452,6 +501,11 @@ int size() {
452501
return offsets.length;
453502
}
454503

504+
@Override
505+
public boolean anyMoveBackToPreviousDay() {
506+
return offsets[offsets.length - 1].anyMoveBackToPreviousDay();
507+
}
508+
455509
@Override
456510
public String toString() {
457511
return String.format(Locale.ROOT, "TransitionArrayLookup[for %s between %s and %s]",
@@ -504,7 +558,25 @@ protected static Transition buildTransition(ZoneOffsetTransition transition, Loc
504558
}
505559
long firstOverlappingLocalTime = utcStart + offsetAfterMillis;
506560
long firstNonOverlappingLocalTime = utcStart + offsetBeforeMillis;
507-
return new Overlap(offsetAfterMillis, previous, utcStart, firstOverlappingLocalTime, firstNonOverlappingLocalTime);
561+
return new Overlap(
562+
offsetAfterMillis,
563+
previous,
564+
utcStart,
565+
firstOverlappingLocalTime,
566+
firstNonOverlappingLocalTime,
567+
movesBackToPreviousDay(transition)
568+
);
569+
}
570+
571+
private static boolean movesBackToPreviousDay(ZoneOffsetTransition transition) {
572+
if (transition.getDateTimeBefore().getDayOfMonth() == transition.getDateTimeAfter().getDayOfMonth()) {
573+
return false;
574+
}
575+
if (transition.getDateTimeBefore().getLong(ChronoField.NANO_OF_DAY) == 0L) {
576+
// If we change *at* midnight this is ok.
577+
return false;
578+
}
579+
return true;
508580
}
509581
}
510582

server/src/main/java/org/elasticsearch/common/Rounding.java

+93-4
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package org.elasticsearch.common;
2020

21+
import org.apache.lucene.util.ArrayUtil;
2122
import org.elasticsearch.ElasticsearchException;
2223
import org.elasticsearch.Version;
2324
import org.elasticsearch.common.LocalTimeOffset.Gap;
@@ -44,6 +45,7 @@
4445
import java.time.temporal.TemporalQueries;
4546
import java.time.zone.ZoneOffsetTransition;
4647
import java.time.zone.ZoneRules;
48+
import java.util.Arrays;
4749
import java.util.List;
4850
import java.util.Locale;
4951
import java.util.Objects;
@@ -405,6 +407,34 @@ public Rounding build() {
405407
}
406408
}
407409

410+
private abstract class PreparedRounding implements Prepared {
411+
/**
412+
* Attempt to build a {@link Prepared} implementation that relies on pre-calcuated
413+
* "round down" points. If there would be more than {@code max} points then return
414+
* the original implementation, otherwise return the new, faster implementation.
415+
*/
416+
protected Prepared maybeUseArray(long minUtcMillis, long maxUtcMillis, int max) {
417+
long[] values = new long[1];
418+
long rounded = round(minUtcMillis);
419+
int i = 0;
420+
values[i++] = rounded;
421+
while ((rounded = nextRoundingValue(rounded)) <= maxUtcMillis) {
422+
if (i >= max) {
423+
return this;
424+
}
425+
/*
426+
* We expect a time in the last transition (rounded - 1) to round
427+
* to the last value we calculated. If it doesn't then we're
428+
* probably doing something wrong here....
429+
*/
430+
assert values[i - 1] == round(rounded - 1);
431+
values = ArrayUtil.grow(values, i + 1);
432+
values[i++]= rounded;
433+
}
434+
return new ArrayRounding(values, i, this);
435+
}
436+
}
437+
408438
static class TimeUnitRounding extends Rounding {
409439
static final byte ID = 1;
410440

@@ -474,6 +504,15 @@ private LocalDateTime truncateLocalDateTime(LocalDateTime localDateTime) {
474504

475505
@Override
476506
public Prepared prepare(long minUtcMillis, long maxUtcMillis) {
507+
/*
508+
* 128 is a power of two that isn't huge. We might be able to do
509+
* better if the limit was based on the actual type of prepared
510+
* rounding but this'll do for now.
511+
*/
512+
return prepareOffsetOrJavaTimeRounding(minUtcMillis, maxUtcMillis).maybeUseArray(minUtcMillis, maxUtcMillis, 128);
513+
}
514+
515+
private TimeUnitPreparedRounding prepareOffsetOrJavaTimeRounding(long minUtcMillis, long maxUtcMillis) {
477516
long minLookup = minUtcMillis - unit.extraLocalOffsetLookup();
478517
long maxLookup = maxUtcMillis;
479518

@@ -492,7 +531,6 @@ public Prepared prepare(long minUtcMillis, long maxUtcMillis) {
492531
// Range too long, just use java.time
493532
return prepareJavaTime();
494533
}
495-
496534
LocalTimeOffset fixedOffset = lookup.fixedInRange(minLookup, maxLookup);
497535
if (fixedOffset != null) {
498536
// The time zone is effectively fixed
@@ -521,7 +559,7 @@ public Prepared prepareForUnknown() {
521559
}
522560

523561
@Override
524-
Prepared prepareJavaTime() {
562+
TimeUnitPreparedRounding prepareJavaTime() {
525563
if (unitRoundsToMidnight) {
526564
return new JavaTimeToMidnightRounding();
527565
}
@@ -560,7 +598,7 @@ public String toString() {
560598
return "Rounding[" + unit + " in " + timeZone + "]";
561599
}
562600

563-
private abstract class TimeUnitPreparedRounding implements Prepared {
601+
private abstract class TimeUnitPreparedRounding extends PreparedRounding {
564602
@Override
565603
public double roundingSize(long utcMillis, DateTimeUnit timeUnit) {
566604
if (timeUnit.isMillisBased == unit.isMillisBased) {
@@ -654,6 +692,14 @@ public long inOverlap(long localMillis, Overlap overlap) {
654692
public long beforeOverlap(long localMillis, Overlap overlap) {
655693
return overlap.previous().localToUtc(localMillis, this);
656694
}
695+
696+
@Override
697+
protected Prepared maybeUseArray(long minUtcMillis, long maxUtcMillis, int max) {
698+
if (lookup.anyMoveBackToPreviousDay()) {
699+
return this;
700+
}
701+
return super.maybeUseArray(minUtcMillis, maxUtcMillis, max);
702+
}
657703
}
658704

659705
private class NotToMidnightRounding extends AbstractNotToMidnightRounding implements LocalTimeOffset.Strategy {
@@ -713,6 +759,12 @@ public long nextRoundingValue(long utcMillis) {
713759
return firstTimeOnDay(localMidnight);
714760
}
715761

762+
@Override
763+
protected Prepared maybeUseArray(long minUtcMillis, long maxUtcMillis, int max) {
764+
// We don't have the right information needed to know if this is safe for this time zone so we always use java rounding
765+
return this;
766+
}
767+
716768
private long firstTimeOnDay(LocalDateTime localMidnight) {
717769
assert localMidnight.toLocalTime().equals(LocalTime.of(0, 0, 0)) : "firstTimeOnDay should only be called at midnight";
718770

@@ -1121,7 +1173,7 @@ public byte id() {
11211173

11221174
@Override
11231175
public Prepared prepare(long minUtcMillis, long maxUtcMillis) {
1124-
return wrapPreparedRounding(delegate.prepare(minUtcMillis, maxUtcMillis));
1176+
return wrapPreparedRounding(delegate.prepare(minUtcMillis - offset, maxUtcMillis - offset));
11251177
}
11261178

11271179
@Override
@@ -1196,4 +1248,41 @@ public static Rounding read(StreamInput in) throws IOException {
11961248
throw new ElasticsearchException("unknown rounding id [" + id + "]");
11971249
}
11981250
}
1251+
1252+
/**
1253+
* Implementation of {@link Prepared} using pre-calculated "round down" points.
1254+
*/
1255+
private static class ArrayRounding implements Prepared {
1256+
private final long[] values;
1257+
private final int max;
1258+
private final Prepared delegate;
1259+
1260+
private ArrayRounding(long[] values, int max, Prepared delegate) {
1261+
this.values = values;
1262+
this.max = max;
1263+
this.delegate = delegate;
1264+
}
1265+
1266+
@Override
1267+
public long round(long utcMillis) {
1268+
assert values[0] <= utcMillis : "utcMillis must be after " + values[0];
1269+
int idx = Arrays.binarySearch(values, 0, max, utcMillis);
1270+
assert idx != -1 : "The insertion point is before the array! This should have tripped the assertion above.";
1271+
assert -1 - idx <= values.length : "This insertion point is after the end of the array.";
1272+
if (idx < 0) {
1273+
idx = -2 - idx;
1274+
}
1275+
return values[idx];
1276+
}
1277+
1278+
@Override
1279+
public long nextRoundingValue(long utcMillis) {
1280+
return delegate.nextRoundingValue(utcMillis);
1281+
}
1282+
1283+
@Override
1284+
public double roundingSize(long utcMillis, DateTimeUnit timeUnit) {
1285+
return delegate.roundingSize(utcMillis, timeUnit);
1286+
}
1287+
}
11991288
}

0 commit comments

Comments
 (0)