-
Notifications
You must be signed in to change notification settings - Fork 25.2k
Speed up date_histogram by precomputing ranges #61467
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
7687e30
e68463d
c13740b
4162ef1
02f0fe3
4fbcfbf
7a7ef49
4f23de0
178a158
f201a25
0e93729
35ef1c4
f1e286e
d2f1d23
2281d35
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
*/ | ||
package org.elasticsearch.common; | ||
|
||
import org.apache.lucene.util.ArrayUtil; | ||
import org.elasticsearch.ElasticsearchException; | ||
import org.elasticsearch.Version; | ||
import org.elasticsearch.common.LocalTimeOffset.Gap; | ||
|
@@ -44,8 +45,10 @@ | |
import java.time.temporal.TemporalQueries; | ||
import java.time.zone.ZoneOffsetTransition; | ||
import java.time.zone.ZoneRules; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.Objects; | ||
import java.util.Set; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
/** | ||
|
@@ -401,8 +404,22 @@ private LocalDateTime truncateLocalDateTime(LocalDateTime localDateTime) { | |
} | ||
} | ||
|
||
/** | ||
* Time zones with two midnights get "funny" non-continuous rounding | ||
* that isn't compatible with the pre-computed array rounding. | ||
*/ | ||
private static final Set<String> HAS_TWO_MIDNIGHTS = Set.of("America/Moncton", "America/St_Johns", "Canada/Newfoundland"); | ||
|
||
@Override | ||
public Prepared prepare(long minUtcMillis, long maxUtcMillis) { | ||
Prepared orig = prepareOffsetRounding(minUtcMillis, maxUtcMillis); | ||
if (unitRoundsToMidnight && HAS_TWO_MIDNIGHTS.contains(timeZone.getId())) { | ||
return orig; | ||
} | ||
return maybeUseArray(orig, minUtcMillis, maxUtcMillis, 128); | ||
} | ||
|
||
private Prepared prepareOffsetRounding(long minUtcMillis, long maxUtcMillis) { | ||
long minLookup = minUtcMillis - unit.extraLocalOffsetLookup(); | ||
long maxLookup = maxUtcMillis; | ||
|
||
|
@@ -421,7 +438,6 @@ public Prepared prepare(long minUtcMillis, long maxUtcMillis) { | |
// Range too long, just use java.time | ||
return prepareJavaTime(); | ||
} | ||
|
||
LocalTimeOffset fixedOffset = lookup.fixedInRange(minLookup, maxLookup); | ||
if (fixedOffset != null) { | ||
// The time zone is effectively fixed | ||
|
@@ -1015,7 +1031,7 @@ public byte id() { | |
|
||
@Override | ||
public Prepared prepare(long minUtcMillis, long maxUtcMillis) { | ||
return wrapPreparedRounding(delegate.prepare(minUtcMillis, maxUtcMillis)); | ||
return wrapPreparedRounding(delegate.prepare(minUtcMillis - offset, maxUtcMillis - offset)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this change fixing an existing bug? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, but I think the bug is worse with this change. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The new assertions in the |
||
} | ||
|
||
@Override | ||
|
@@ -1085,4 +1101,57 @@ public static Rounding read(StreamInput in) throws IOException { | |
throw new ElasticsearchException("unknown rounding id [" + id + "]"); | ||
} | ||
} | ||
|
||
/** | ||
* Attempt to build a {@link Prepared} implementation that relies on pre-calcuated | ||
* "round down" points. If there would be more than {@code max} points then return | ||
* the original implementation, otherwise return the new, faster implementation. | ||
*/ | ||
static Prepared maybeUseArray(Prepared orig, long minUtcMillis, long maxUtcMillis, int max) { | ||
long[] values = new long[1]; | ||
long rounded = orig.round(minUtcMillis); | ||
int i = 0; | ||
values[i++] = rounded; | ||
while ((rounded = orig.nextRoundingValue(rounded)) <= maxUtcMillis) { | ||
if (i >= max) { | ||
return orig; | ||
} | ||
assert values[i - 1] == orig.round(rounded - 1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not super clear to me what this assert is guarding against. Can you add a comment please? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
values = ArrayUtil.grow(values, i + 1); | ||
values[i++]= rounded; | ||
} | ||
return new ArrayRounding(values, i, orig); | ||
} | ||
|
||
/** | ||
* Implementation of {@link Prepared} using pre-calculated "round down" points. | ||
*/ | ||
private static class ArrayRounding implements Prepared { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will need to implement new methods added in #61369. |
||
private final long[] values; | ||
private final int max; | ||
private final Prepared delegate; | ||
|
||
private ArrayRounding(long[] values, int max, Prepared delegate) { | ||
this.values = values; | ||
this.max = max; | ||
this.delegate = delegate; | ||
} | ||
|
||
@Override | ||
public long round(long utcMillis) { | ||
assert values[0] <= utcMillis : "utcMillis must be after " + values[0]; | ||
int idx = Arrays.binarySearch(values, 0, max, utcMillis); | ||
assert idx != -1 : "The insertion point is before the array! This should have tripped the assertion above."; | ||
assert -1 - idx <= values.length : "This insertion point is after the end of the array."; | ||
if (idx < 0) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe assert that idx is neither -1 nor -1 - max? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure! |
||
idx = -2 - idx; | ||
} | ||
return values[idx]; | ||
} | ||
|
||
@Override | ||
public long nextRoundingValue(long utcMillis) { | ||
return delegate.nextRoundingValue(utcMillis); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
would it be doable to detect timezones that don't work with this optimization at runtime instead of maintaining an allowlist?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can't think of a way to do right now. At least, not a good way.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought Canada was changing time at 2 am, on another side, some other known time zones such as
Asia/Gaza
for example seems to indeed have 2 mightnights. I did some quick and dirty test and I have got a slightly different list for timezones in my JVM:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Very nice!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this really only a problem when there are two midnights, or is it a more general problem when the time goes back due to daylight savings, and you just need special values of
offset
to make the bug occur if the transition is not around midnight?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've played with offset to double check and can't cause issues with it. However do you think we could detect timezones that don't work dynamically instead of relying on a static list? E.g. could we iterate transitions in the considered interval and check whether there's one that brings us to a different day?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could use something like the test case uses to find candidates, but it'd require loading all of the time zone rules on startup. I'm hoping that the test can prevent us from having to do that by being very careful about this list.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually I wasn't thinking about testing all timezones up-front on startup, I was more thinking of doing the test when building the Rounding object by only looking at transitions of the considered timezone.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmmmm - the
assert
that I have below sort of asserts that. But it isn't nearly as strong. I'll think about it!There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jpountz I've pushed a change that does this.