Skip to content

Commit 7687e30

Browse files
committed
Speed up date_histogram by precomputing ranges
A few of us were talking about ways to speed up the `date_histogram` using the index for the timestamp rather than the doc values. To do that we'd have to pre-compute all of the "round down" points in the index. It turns out that *just* precomputing those values speeds up rounding fairly significantly: ``` Benchmark (count) (interval) (range) (zone) Mode Cnt Score Error Units before 10000000 calendar month 2000-10-28 to 2000-10-31 UTC avgt 10 96461080.982 ± 616373.011 ns/op before 10000000 calendar month 2000-10-28 to 2000-10-31 America/New_York avgt 10 130598950.850 ± 1249189.867 ns/op after 10000000 calendar month 2000-10-28 to 2000-10-31 UTC avgt 10 52311775.080 ± 107171.092 ns/op after 10000000 calendar month 2000-10-28 to 2000-10-31 America/New_York avgt 10 54800134.968 ± 373844.796 ns/op ``` That's a 46% speed up when there isn't a time zone and a 58% speed up when there is. This doesn't work for every time zone, specifically those that have two midnights in a single day due to daylight savings time will produce wonky results. So they don't get the optimization. Second, this requires a few expensive computation up front to make the transition array. And if the transition array is too large then we give up and use the original mechanism, throwing away all of the work we did to build the array. This seems appropriate for most usages of `round`, but this change uses it for *all* usages of `round`. That seems ok for now, but it might be worth investigating in a follow up. I ran a macrobenchmark as well which showed an 11% preformance improvement. *BUT* the benchmark wasn't tuned for my desktop so it overwhelmed it and might have produced "funny" results. I think it is pretty clear that this is an improvement, but know the measurement is weird: ``` Benchmark (count) (interval) (range) (zone) Mode Cnt Score Error Units before 10000000 calendar month 2000-10-28 to 2000-10-31 UTC avgt 10 96461080.982 ± 616373.011 ns/op before 10000000 calendar month 2000-10-28 to 2000-10-31 America/New_York avgt 10 g± 1249189.867 ns/op after 10000000 calendar month 2000-10-28 to 2000-10-31 UTC avgt 10 52311775.080 ± 107171.092 ns/op after 10000000 calendar month 2000-10-28 to 2000-10-31 America/New_York avgt 10 54800134.968 ± 373844.796 ns/op Before: | Min Throughput | hourly_agg | 0.11 | ops/s | | Median Throughput | hourly_agg | 0.11 | ops/s | | Max Throughput | hourly_agg | 0.11 | ops/s | | 50th percentile latency | hourly_agg | 650623 | ms | | 90th percentile latency | hourly_agg | 821478 | ms | | 99th percentile latency | hourly_agg | 859780 | ms | | 100th percentile latency | hourly_agg | 864030 | ms | | 50th percentile service time | hourly_agg | 9268.71 | ms | | 90th percentile service time | hourly_agg | 9380 | ms | | 99th percentile service time | hourly_agg | 9626.88 | ms | |100th percentile service time | hourly_agg | 9884.27 | ms | | error rate | hourly_agg | 0 | % | After: | Min Throughput | hourly_agg | 0.12 | ops/s | | Median Throughput | hourly_agg | 0.12 | ops/s | | Max Throughput | hourly_agg | 0.12 | ops/s | | 50th percentile latency | hourly_agg | 519254 | ms | | 90th percentile latency | hourly_agg | 653099 | ms | | 99th percentile latency | hourly_agg | 683276 | ms | | 100th percentile latency | hourly_agg | 686611 | ms | | 50th percentile service time | hourly_agg | 8371.41 | ms | | 90th percentile service time | hourly_agg | 8407.02 | ms | | 99th percentile service time | hourly_agg | 8536.64 | ms | |100th percentile service time | hourly_agg | 8538.54 | ms | | error rate | hourly_agg | 0 | % | ```
1 parent ab55d67 commit 7687e30

File tree

2 files changed

+77
-4
lines changed

2 files changed

+77
-4
lines changed

server/src/main/java/org/elasticsearch/common/Rounding.java

+68-2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package org.elasticsearch.common;
2020

21+
import org.apache.lucene.util.ArrayUtil;
2122
import org.elasticsearch.ElasticsearchException;
2223
import org.elasticsearch.Version;
2324
import org.elasticsearch.common.LocalTimeOffset.Gap;
@@ -44,8 +45,10 @@
4445
import java.time.temporal.TemporalQueries;
4546
import java.time.zone.ZoneOffsetTransition;
4647
import java.time.zone.ZoneRules;
48+
import java.util.Arrays;
4749
import java.util.List;
4850
import java.util.Objects;
51+
import java.util.Set;
4952
import java.util.concurrent.TimeUnit;
5053

5154
/**
@@ -401,8 +404,22 @@ private LocalDateTime truncateLocalDateTime(LocalDateTime localDateTime) {
401404
}
402405
}
403406

407+
/**
408+
* Time zones with two midnights get "funny" non-continuous rounding
409+
* that isn't compatible with the pre-computed array rounding.
410+
*/
411+
private static final Set<String> HAS_TWO_MIDNIGHTS = Set.of("America/Moncton", "America/St_Johns", "Canada/Newfoundland");
412+
404413
@Override
405414
public Prepared prepare(long minUtcMillis, long maxUtcMillis) {
415+
Prepared orig = prepareOffsetRounding(minUtcMillis, maxUtcMillis);
416+
if (unitRoundsToMidnight && HAS_TWO_MIDNIGHTS.contains(timeZone.getId())) {
417+
return orig;
418+
}
419+
return maybeUseArray(orig, minUtcMillis, maxUtcMillis, 128);
420+
}
421+
422+
private Prepared prepareOffsetRounding(long minUtcMillis, long maxUtcMillis) {
406423
long minLookup = minUtcMillis - unit.extraLocalOffsetLookup();
407424
long maxLookup = maxUtcMillis;
408425

@@ -421,7 +438,6 @@ public Prepared prepare(long minUtcMillis, long maxUtcMillis) {
421438
// Range too long, just use java.time
422439
return prepareJavaTime();
423440
}
424-
425441
LocalTimeOffset fixedOffset = lookup.fixedInRange(minLookup, maxLookup);
426442
if (fixedOffset != null) {
427443
// The time zone is effectively fixed
@@ -1015,7 +1031,7 @@ public byte id() {
10151031

10161032
@Override
10171033
public Prepared prepare(long minUtcMillis, long maxUtcMillis) {
1018-
return wrapPreparedRounding(delegate.prepare(minUtcMillis, maxUtcMillis));
1034+
return wrapPreparedRounding(delegate.prepare(minUtcMillis - offset, maxUtcMillis - offset));
10191035
}
10201036

10211037
@Override
@@ -1085,4 +1101,54 @@ public static Rounding read(StreamInput in) throws IOException {
10851101
throw new ElasticsearchException("unknown rounding id [" + id + "]");
10861102
}
10871103
}
1104+
1105+
/**
1106+
* Attempt to build a {@link Prepared} implementation that relies on pre-calcuated
1107+
* "round down" points. If there would be more than {@code max} points then return
1108+
* the original implementation, otherwise return the new, faster implementation.
1109+
*/
1110+
static Prepared maybeUseArray(Prepared orig, long minUtcMillis, long maxUtcMillis, int max) {
1111+
long[] values = new long[1];
1112+
long rounded = orig.round(minUtcMillis);
1113+
int i = 0;
1114+
values[i++] = rounded;
1115+
while ((rounded = orig.nextRoundingValue(rounded)) <= maxUtcMillis) {
1116+
if (i >= max) {
1117+
return orig;
1118+
}
1119+
assert values[i - 1] == orig.round(rounded - 1);
1120+
values = ArrayUtil.grow(values, i + 1);
1121+
values[i++]= rounded;
1122+
}
1123+
return new ArrayRounding(values, i, orig);
1124+
}
1125+
1126+
/**
1127+
* Implementation of {@link Prepared} using pre-calculated "round down" points.
1128+
*/
1129+
private static class ArrayRounding implements Prepared {
1130+
private final long[] values;
1131+
private int max;
1132+
private final Prepared delegate;
1133+
1134+
private ArrayRounding(long[] values, int max, Prepared delegate) {
1135+
this.values = values;
1136+
this.max = max;
1137+
this.delegate = delegate;
1138+
}
1139+
1140+
@Override
1141+
public long round(long utcMillis) {
1142+
int idx = Arrays.binarySearch(values, 0, max, utcMillis);
1143+
if (idx < 0) {
1144+
idx = -2 - idx;
1145+
}
1146+
return values[idx];
1147+
}
1148+
1149+
@Override
1150+
public long nextRoundingValue(long utcMillis) {
1151+
return delegate.nextRoundingValue(utcMillis);
1152+
}
1153+
}
10881154
}

server/src/test/java/org/elasticsearch/common/RoundingTests.java

+9-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
package org.elasticsearch.common;
2121

22+
import com.carrotsearch.randomizedtesting.annotations.Repeat;
23+
2224
import org.elasticsearch.common.collect.Tuple;
2325
import org.elasticsearch.common.rounding.DateTimeUnit;
2426
import org.elasticsearch.common.time.DateFormatter;
@@ -231,7 +233,7 @@ public void testRandomTimeUnitRounding() {
231233
Rounding.DateTimeUnit unit = randomFrom(Rounding.DateTimeUnit.values());
232234
ZoneId tz = randomZone();
233235
Rounding rounding = new Rounding.TimeUnitRounding(unit, tz);
234-
long[] bounds = randomDateBounds();
236+
long[] bounds = randomDateBounds(unit);
235237
Rounding.Prepared prepared = rounding.prepare(bounds[0], bounds[1]);
236238

237239
// Check that rounding is internally consistent and consistent with nextRoundingValue
@@ -894,8 +896,13 @@ private static long randomDate() {
894896
return Math.abs(randomLong() % (2 * (long) 10e11)); // 1970-01-01T00:00:00Z - 2033-05-18T05:33:20.000+02:00
895897
}
896898

897-
private static long[] randomDateBounds() {
899+
private static long[] randomDateBounds(Rounding.DateTimeUnit unit) {
898900
long b1 = randomDate();
901+
if (randomBoolean()) {
902+
// Sometimes use a fairly close date
903+
return new long[] {b1, b1 + unit.extraLocalOffsetLookup() * between(1, 40)};
904+
}
905+
// Otherwise use a totally random date
899906
long b2 = randomValueOtherThan(b1, RoundingTests::randomDate);
900907
if (b1 < b2) {
901908
return new long[] {b1, b2};

0 commit comments

Comments
 (0)