@@ -53,6 +53,8 @@ public final class TimestampFormatFinder {
53
53
private static final Logger logger = LogManager .getLogger (TimestampFormatFinder .class );
54
54
private static final String PUNCTUATION_THAT_NEEDS_ESCAPING_IN_REGEX = "\\ |()[]{}^$.*?" ;
55
55
private static final String FRACTIONAL_SECOND_SEPARATORS = ":.," ;
56
+ private static final Pattern FRACTIONAL_SECOND_INTERPRETER =
57
+ Pattern .compile ("([" + FRACTIONAL_SECOND_SEPARATORS + "])(\\ d{3,9})($|[Z+-])" );
56
58
private static final char INDETERMINATE_FIELD_PLACEHOLDER = '?' ;
57
59
// The ? characters in this must match INDETERMINATE_FIELD_PLACEHOLDER
58
60
// above, but they're literals in this regex to aid readability
@@ -702,6 +704,20 @@ public List<String> getJavaTimestampFormats() {
702
704
(matchedFormats .size () > 1 ) ? matchedFormats .get (0 ) : null );
703
705
}
704
706
707
+ /**
708
+ * This is needed to decide between "date" and "date_nanos" as the index mapping type.
709
+ * @return Do the observed timestamps require nanosecond precision to store accurately?
710
+ */
711
+ public boolean needNanosecondPrecision () {
712
+ if (matchedFormats .isEmpty ()) {
713
+ // If errorOnNoTimestamp is set and we get here it means no samples have been added, which is likely a programmer mistake
714
+ assert errorOnNoTimestamp == false ;
715
+ return false ;
716
+ }
717
+ return matches .stream ().filter (match -> matchedFormats .size () < 2 || matchedFormats .get (0 ).canMergeWith (match .timestampFormat ))
718
+ .anyMatch (match -> match .hasNanosecondPrecision );
719
+ }
720
+
705
721
/**
706
722
* Given a list of timestamp formats that might contain indeterminate day/month parts,
707
723
* return the corresponding pattern with the placeholders replaced with concrete
@@ -947,6 +963,14 @@ public boolean hasTimezoneDependentParsing() {
947
963
.anyMatch (match -> match .hasTimezoneDependentParsing );
948
964
}
949
965
966
+ /**
967
+ * The @timestamp field will always have been parsed into epoch format,
968
+ * so we just need to know if it has nanosecond resolution or not.
969
+ */
970
+ public Map <String , String > getEsDateMappingTypeWithoutFormat () {
971
+ return Collections .singletonMap (FileStructureUtils .MAPPING_TYPE_SETTING , needNanosecondPrecision () ? "date_nanos" : "date" );
972
+ }
973
+
950
974
/**
951
975
* Sometimes Elasticsearch mappings for dates need to include the format.
952
976
* This method returns appropriate mappings settings: at minimum "type" : "date",
@@ -959,7 +983,7 @@ public Map<String, String> getEsDateMappingTypeWithFormat() {
959
983
return Collections .singletonMap (FileStructureUtils .MAPPING_TYPE_SETTING , "keyword" );
960
984
}
961
985
Map <String , String > mapping = new LinkedHashMap <>();
962
- mapping .put (FileStructureUtils .MAPPING_TYPE_SETTING , "date" );
986
+ mapping .put (FileStructureUtils .MAPPING_TYPE_SETTING , needNanosecondPrecision () ? "date_nanos" : "date" );
963
987
String formats = javaTimestampFormats .stream ().map (format -> {
964
988
switch (format ) {
965
989
case "ISO8601" :
@@ -1233,6 +1257,7 @@ static final class TimestampMatch {
1233
1257
final int secondIndeterminateDateNumber ;
1234
1258
1235
1259
final boolean hasTimezoneDependentParsing ;
1260
+ final boolean hasNanosecondPrecision ;
1236
1261
1237
1262
/**
1238
1263
* Text that came after the timestamp in the matched field/message.
@@ -1250,6 +1275,8 @@ static final class TimestampMatch {
1250
1275
this .secondIndeterminateDateNumber = indeterminateDateNumbers [1 ];
1251
1276
this .hasTimezoneDependentParsing = requiresTimezoneDependentParsing (timestampFormat .rawJavaTimestampFormats .get (0 ),
1252
1277
matchedDate );
1278
+ this .hasNanosecondPrecision = matchHasNanosecondPrecision (timestampFormat .rawJavaTimestampFormats .get (0 ),
1279
+ matchedDate );
1253
1280
this .epilogue = Objects .requireNonNull (epilogue );
1254
1281
}
1255
1282
@@ -1259,6 +1286,7 @@ static final class TimestampMatch {
1259
1286
this .firstIndeterminateDateNumber = toCopyExceptFormat .firstIndeterminateDateNumber ;
1260
1287
this .secondIndeterminateDateNumber = toCopyExceptFormat .secondIndeterminateDateNumber ;
1261
1288
this .hasTimezoneDependentParsing = toCopyExceptFormat .hasTimezoneDependentParsing ;
1289
+ this .hasNanosecondPrecision = toCopyExceptFormat .hasNanosecondPrecision ;
1262
1290
this .epilogue = toCopyExceptFormat .epilogue ;
1263
1291
}
1264
1292
@@ -1285,6 +1313,38 @@ static boolean requiresTimezoneDependentParsing(String format, String matchedDat
1285
1313
}
1286
1314
}
1287
1315
1316
+ static boolean matchHasNanosecondPrecision (String format , String matchedDate ) {
1317
+ switch (format ) {
1318
+ case "ISO8601" :
1319
+ Matcher matcher = FRACTIONAL_SECOND_INTERPRETER .matcher (matchedDate );
1320
+ return matcher .find () && matcher .group (2 ).length () > 3 ;
1321
+ case "UNIX_MS" :
1322
+ case "UNIX" :
1323
+ return false ;
1324
+ case "TAI64N" :
1325
+ return true ;
1326
+ default :
1327
+ boolean notQuoted = true ;
1328
+ int consecutiveSs = 0 ;
1329
+ for (int pos = 0 ; pos < format .length (); ++pos ) {
1330
+ char curChar = format .charAt (pos );
1331
+ if (curChar == '\'' ) {
1332
+ notQuoted = !notQuoted ;
1333
+ consecutiveSs = 0 ;
1334
+ } else if (notQuoted ) {
1335
+ if (curChar == 'S' ) {
1336
+ if (++consecutiveSs > 3 ) {
1337
+ return true ;
1338
+ }
1339
+ } else {
1340
+ consecutiveSs = 0 ;
1341
+ }
1342
+ }
1343
+ }
1344
+ return false ;
1345
+ }
1346
+ }
1347
+
1288
1348
static int [] parseIndeterminateDateNumbers (String matchedDate , List <String > rawJavaTimestampFormats ) {
1289
1349
int [] indeterminateDateNumbers = { -1 , -1 };
1290
1350
@@ -1368,7 +1428,6 @@ public String toString() {
1368
1428
*/
1369
1429
static final class CandidateTimestampFormat {
1370
1430
1371
- private static final Pattern FRACTIONAL_SECOND_INTERPRETER = Pattern .compile ("([" + FRACTIONAL_SECOND_SEPARATORS + "])(\\ d{3,9})$" );
1372
1431
// This means that in the case of a literal Z, XXX is preferred
1373
1432
private static final Pattern TRAILING_OFFSET_WITHOUT_COLON_FINDER = Pattern .compile ("[+-]\\ d{4}$" );
1374
1433
0 commit comments