Skip to content

Commit 7efce22

Browse files
authored
Fix a DST error in date_histogram (backport #52016) (#52237)
When `date_histogram` attempts to optimize itself it for a particular time zone it checks to see if the entire shard is within the same "transition". Most time zone transition once every size months or thereabouts so the optimization can usually kicks in. *But* it crashes when you attempt feed it a time zone who's last DST transition was before epoch. The reason for this is a little twisted: before this patch it'd find the next and previous transitions in milliseconds since epoch. Then it'd cast them to `Long`s and pass them into the `DateFieldType` to check if the shard's contents were within the range. The trouble is they are then converted to `String`s which are *then* parsed back to `Instant`s which are then convertd to `long`s. And the parser doesn't like most negative numbers. And everything before epoch is negative. This change removes the `long` -> `Long` -> `String` -> `Instant` -> `long` chain in favor of passing the `long` -> `Instant` -> `long` which avoids the fairly complex parsing code and handles a bunch of interesting edge cases around epoch. And other edge cases around `date_nanos`. Closes #50265
1 parent 12cb6dc commit 7efce22

File tree

7 files changed

+368
-120
lines changed

7 files changed

+368
-120
lines changed

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml

+86-6
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@ setup:
66
settings:
77
number_of_replicas: 0
88
mappings:
9-
"properties":
10-
"number":
11-
"type" : "integer"
12-
"date":
13-
"type" : "date"
9+
properties:
10+
number:
11+
type: integer
12+
date:
13+
type: date
1414
- do:
1515
cluster.health:
1616
wait_for_status: green
@@ -214,7 +214,10 @@ setup:
214214
mappings:
215215
properties:
216216
date:
217-
type : date
217+
type: date
218+
fields:
219+
nanos:
220+
type: date_nanos
218221

219222
- do:
220223
bulk:
@@ -239,7 +242,24 @@ setup:
239242
date_histogram:
240243
field: date
241244
calendar_interval: month
245+
- match: { hits.total.value: 4 }
246+
- length: { aggregations.histo.buckets: 3 }
247+
- match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" }
248+
- match: { aggregations.histo.buckets.0.doc_count: 2 }
249+
- match: { aggregations.histo.buckets.1.key_as_string: "2016-02-01T00:00:00.000Z" }
250+
- match: { aggregations.histo.buckets.1.doc_count: 1 }
251+
- match: { aggregations.histo.buckets.2.key_as_string: "2016-03-01T00:00:00.000Z" }
252+
- match: { aggregations.histo.buckets.2.doc_count: 1 }
242253

254+
- do:
255+
search:
256+
body:
257+
size: 0
258+
aggs:
259+
histo:
260+
date_histogram:
261+
field: date.nanos
262+
calendar_interval: month
243263
- match: { hits.total.value: 4 }
244264
- length: { aggregations.histo.buckets: 3 }
245265
- match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" }
@@ -410,3 +430,63 @@ setup:
410430
- match: { aggregations.histo.buckets.1.doc_count: 2 }
411431
- match: { aggregations.histo.buckets.2.key_as_string: "2016-02-02T00:00:00.000Z" }
412432
- match: { aggregations.histo.buckets.2.doc_count: 1 }
433+
434+
---
435+
"date_histogram with pre-epoch daylight savings time transition":
436+
- skip:
437+
version: " - 7.6.99"
438+
reason: bug fixed in 7.7.0. will be backported to 7.6.1
439+
# Add date_nanos to the mapping. We couldn't do it during setup because that
440+
# is run against 6.8 which doesn't have date_nanos
441+
- do:
442+
indices.put_mapping:
443+
index: test_1
444+
body:
445+
properties:
446+
number:
447+
type: integer
448+
date:
449+
type: date
450+
fields:
451+
nanos:
452+
type: date_nanos
453+
454+
- do:
455+
bulk:
456+
index: test_1
457+
refresh: true
458+
body:
459+
- '{"index": {}}'
460+
- '{"date": "2016-01-01"}'
461+
462+
- do:
463+
search:
464+
body:
465+
size: 0
466+
aggs:
467+
histo:
468+
date_histogram:
469+
field: date
470+
fixed_interval: 1ms
471+
time_zone: America/Phoenix
472+
473+
- match: { hits.total.value: 1 }
474+
- length: { aggregations.histo.buckets: 1 }
475+
- match: { aggregations.histo.buckets.0.key_as_string: "2015-12-31T17:00:00.000-07:00" }
476+
- match: { aggregations.histo.buckets.0.doc_count: 1 }
477+
478+
- do:
479+
search:
480+
body:
481+
size: 0
482+
aggs:
483+
histo:
484+
date_histogram:
485+
field: date.nanos
486+
fixed_interval: 1ms
487+
time_zone: America/Phoenix
488+
489+
- match: { hits.total.value: 1 }
490+
- length: { aggregations.histo.buckets: 1 }
491+
- match: { aggregations.histo.buckets.0.key_as_string: "2015-12-31T17:00:00.000-07:00" }
492+
- match: { aggregations.histo.buckets.0.doc_count: 1 }

server/src/main/java/org/elasticsearch/common/time/DateUtils.java

+21-1
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ public static ZoneId of(String zoneId) {
208208
return ZoneId.of(zoneId).normalized();
209209
}
210210

211-
private static final Instant MAX_NANOSECOND_INSTANT = Instant.parse("2262-04-11T23:47:16.854775807Z");
211+
static final Instant MAX_NANOSECOND_INSTANT = Instant.parse("2262-04-11T23:47:16.854775807Z");
212212

213213
static final long MAX_NANOSECOND_IN_MILLIS = MAX_NANOSECOND_INSTANT.toEpochMilli();
214214

@@ -231,6 +231,26 @@ public static long toLong(Instant instant) {
231231
return instant.getEpochSecond() * 1_000_000_000 + instant.getNano();
232232
}
233233

234+
/**
235+
* Returns an instant that is with valid nanosecond resolution. If
236+
* the parameter is before the valid nanosecond range then this returns
237+
* the minimum {@linkplain Instant} valid for nanosecond resultion. If
238+
* the parameter is after the valid nanosecond range then this returns
239+
* the maximum {@linkplain Instant} valid for nanosecond resolution.
240+
* <p>
241+
* Useful for checking if all values for the field are within some range,
242+
* even if the range's endpoints are not valid nanosecond resolution.
243+
*/
244+
public static Instant clampToNanosRange(Instant instant) {
245+
if (instant.isBefore(Instant.EPOCH)) {
246+
return Instant.EPOCH;
247+
}
248+
if (instant.isAfter(MAX_NANOSECOND_INSTANT)) {
249+
return MAX_NANOSECOND_INSTANT;
250+
}
251+
return instant;
252+
}
253+
234254
/**
235255
* convert a long value to a java time instant
236256
* the long value resembles the nanoseconds since the epoch

server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java

+42-3
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@ public long convert(Instant instant) {
8888
public Instant toInstant(long value) {
8989
return Instant.ofEpochMilli(value);
9090
}
91+
92+
@Override
93+
public Instant clampToValidRange(Instant instant) {
94+
return instant;
95+
}
9196
},
9297
NANOSECONDS("date_nanos", NumericType.DATE_NANOSECONDS) {
9398
@Override
@@ -99,6 +104,11 @@ public long convert(Instant instant) {
99104
public Instant toInstant(long value) {
100105
return DateUtils.toInstant(value);
101106
}
107+
108+
@Override
109+
public Instant clampToValidRange(Instant instant) {
110+
return DateUtils.clampToNanosRange(instant);
111+
}
102112
};
103113

104114
private final String type;
@@ -117,10 +127,18 @@ NumericType numericType() {
117127
return numericType;
118128
}
119129

130+
/**
131+
* Convert an {@linkplain Instant} into a long value in this resolution.
132+
*/
120133
public abstract long convert(Instant instant);
121134

135+
/**
136+
* Convert a long value in this resolution into an instant.
137+
*/
122138
public abstract Instant toInstant(long value);
123139

140+
public abstract Instant clampToValidRange(Instant instant);
141+
124142
public static Resolution ofOrdinal(int ord) {
125143
for (Resolution resolution : values()) {
126144
if (ord == resolution.ordinal()) {
@@ -440,9 +458,30 @@ public Relation isFieldWithinQuery(IndexReader reader,
440458
}
441459
}
442460

443-
// This check needs to be done after fromInclusive and toInclusive
444-
// are resolved so we can throw an exception if they are invalid
445-
// even if there are no points in the shard
461+
return isFieldWithinRange(reader, fromInclusive, toInclusive);
462+
}
463+
464+
/**
465+
* Return whether all values of the given {@link IndexReader} are within the range,
466+
* outside the range or cross the range. Unlike {@link #isFieldWithinQuery} this
467+
* accepts values that are out of the range of the {@link #resolution} of this field.
468+
* @param fromInclusive start date, inclusive
469+
* @param toInclusive end date, inclusive
470+
*/
471+
public Relation isFieldWithinRange(IndexReader reader, Instant fromInclusive, Instant toInclusive)
472+
throws IOException {
473+
return isFieldWithinRange(reader,
474+
resolution.convert(resolution.clampToValidRange(fromInclusive)),
475+
resolution.convert(resolution.clampToValidRange(toInclusive)));
476+
}
477+
478+
/**
479+
* Return whether all values of the given {@link IndexReader} are within the range,
480+
* outside the range or cross the range.
481+
* @param fromInclusive start date, inclusive, {@link Resolution#convert(Instant) converted} to the appropriate scale
482+
* @param toInclusive end date, inclusive, {@link Resolution#convert(Instant) converted} to the appropriate scale
483+
*/
484+
private Relation isFieldWithinRange(IndexReader reader, long fromInclusive, long toInclusive) throws IOException {
446485
if (PointValues.size(reader, name()) == 0) {
447486
// no points, so nothing matches
448487
return Relation.DISJOINT;

server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -421,10 +421,10 @@ public enum Relation {
421421
* {@link Relation#INTERSECTS}, which is always fine to return when there is
422422
* no way to check whether values are actually within bounds. */
423423
public Relation isFieldWithinQuery(
424-
IndexReader reader,
425-
Object from, Object to,
426-
boolean includeLower, boolean includeUpper,
427-
ZoneId timeZone, DateMathParser dateMathParser, QueryRewriteContext context) throws IOException {
424+
IndexReader reader,
425+
Object from, Object to,
426+
boolean includeLower, boolean includeUpper,
427+
ZoneId timeZone, DateMathParser dateMathParser, QueryRewriteContext context) throws IOException {
428428
return Relation.INTERSECTS;
429429
}
430430

0 commit comments

Comments
 (0)