Skip to content

Commit 78fafab

Browse files
authored
Fix delete_expired_data/nightly maintenance when many model snapshots need deleting (#57041) (#57145)
The queries performed by the expired data removers pull back entire documents when only a few fields are required. For ModelSnapshots in particular this is a problem as they contain quantiles which may be 100s of KB and the search size is set to 10,000. This change makes the search more efficient by only requesting the fields needed to work out which expired data should be deleted.
1 parent 47e6ee9 commit 78fafab

File tree

9 files changed

+281
-112
lines changed

9 files changed

+281
-112
lines changed

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/time/TimeUtils.java

+30
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,36 @@ public static Instant parseTimeFieldToInstant(XContentParser parser, String fiel
4646
"unexpected token [" + parser.currentToken() + "] for [" + fieldName + "]");
4747
}
4848

49+
/**
50+
* Safely parses a string epoch representation to a Long
51+
*
52+
* Commonly this function is used for parsing Date fields from doc values
53+
* requested with the format "epoch_millis".
54+
*
55+
* Since nanosecond support was added epoch_millis timestamps may have a fractional component.
56+
* We discard this, taking just whole milliseconds. Arguably it would be better to retain the
57+
* precision here and let the downstream component decide whether it wants the accuracy, but
58+
* that makes it hard to pass around the value as a number. The double type doesn't have
59+
* enough digits of accuracy, and obviously long cannot store the fraction. BigDecimal would
60+
* work, but that isn't supported by the JSON parser if the number gets round-tripped through
61+
* JSON. So String is really the only format that could be used, but the consumers of time
62+
* are expecting a number.
63+
*
64+
* @param epoch The epoch value as a string. This may contain a fractional component.
65+
* @return The epoch value.
66+
*/
67+
public static long parseToEpochMs(String epoch) {
68+
int dotPos = epoch.indexOf('.');
69+
if (dotPos == -1) {
70+
return Long.parseLong(epoch);
71+
} else if (dotPos > 0) {
72+
return Long.parseLong(epoch.substring(0, dotPos));
73+
} else {
74+
// The first character is '.' so round down to 0
75+
return 0L;
76+
}
77+
}
78+
4979
/**
5080
* First tries to parse the date first as a Long and convert that to an
5181
* epoch time. If the long number has more than 10 digits it is considered a

x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/time/TimeUtilsTests.java

+6
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,12 @@ public void testDateStringToEpoch() {
7272
assertEquals(1477058573500L, TimeUtils.dateStringToEpoch("1477058573500"));
7373
}
7474

75+
public void testParseToEpochMs() {
76+
assertEquals(1462096800000L, TimeUtils.parseToEpochMs("1462096800000"));
77+
assertEquals(1462096800000L, TimeUtils.parseToEpochMs("1462096800000.005"));
78+
assertEquals(0L, TimeUtils.parseToEpochMs(".005"));
79+
}
80+
7581
public void testCheckMultiple_GivenMultiples() {
7682
TimeUtils.checkMultiple(TimeValue.timeValueHours(1), TimeUnit.SECONDS, new ParseField("foo"));
7783
TimeUtils.checkMultiple(TimeValue.timeValueHours(1), TimeUnit.MINUTES, new ParseField("foo"));

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/TimeField.java

+2-17
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import org.elasticsearch.common.util.set.Sets;
99
import org.elasticsearch.search.SearchHit;
10+
import org.elasticsearch.xpack.core.common.time.TimeUtils;
1011

1112
import java.util.Collections;
1213
import java.util.Objects;
@@ -44,23 +45,7 @@ public Object[] value(SearchHit hit) {
4445
return value;
4546
}
4647
if (value[0] instanceof String) { // doc_value field with the epoch_millis format
47-
// Since nanosecond support was added epoch_millis timestamps may have a fractional component.
48-
// We discard this, taking just whole milliseconds. Arguably it would be better to retain the
49-
// precision here and let the downstream component decide whether it wants the accuracy, but
50-
// that makes it hard to pass around the value as a number. The double type doesn't have
51-
// enough digits of accuracy, and obviously long cannot store the fraction. BigDecimal would
52-
// work, but that isn't supported by the JSON parser if the number gets round-tripped through
53-
// JSON. So String is really the only format that could be used, but the ML consumers of time
54-
// are expecting a number.
55-
String strVal0 = (String) value[0];
56-
int dotPos = strVal0.indexOf('.');
57-
if (dotPos == -1) {
58-
value[0] = Long.parseLong(strVal0);
59-
} else if (dotPos > 0) {
60-
value[0] = Long.parseLong(strVal0.substring(0, dotPos));
61-
} else {
62-
value[0] = 0L;
63-
}
48+
value[0] = TimeUtils.parseToEpochMs((String)value[0]);
6449
} else if (value[0] instanceof Long == false) { // pre-6.0 field
6550
throw new IllegalStateException("Unexpected value for a time field: " + value[0].getClass());
6651
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/retention/ExpiredForecastsRemover.java

+48-25
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,6 @@
1414
import org.elasticsearch.action.search.SearchResponse;
1515
import org.elasticsearch.action.support.ThreadedActionListener;
1616
import org.elasticsearch.client.OriginSettingClient;
17-
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
18-
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
19-
import org.elasticsearch.common.xcontent.XContentFactory;
20-
import org.elasticsearch.common.xcontent.XContentParser;
21-
import org.elasticsearch.common.xcontent.XContentType;
2217
import org.elasticsearch.index.query.BoolQueryBuilder;
2318
import org.elasticsearch.index.query.QueryBuilder;
2419
import org.elasticsearch.index.query.QueryBuilders;
@@ -30,6 +25,7 @@
3025
import org.elasticsearch.search.SearchHits;
3126
import org.elasticsearch.search.builder.SearchSourceBuilder;
3227
import org.elasticsearch.threadpool.ThreadPool;
28+
import org.elasticsearch.xpack.core.common.time.TimeUtils;
3329
import org.elasticsearch.xpack.core.ml.job.config.Job;
3430
import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex;
3531
import org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings;
@@ -38,8 +34,6 @@
3834
import org.elasticsearch.xpack.core.ml.job.results.Result;
3935
import org.elasticsearch.xpack.ml.MachineLearning;
4036

41-
import java.io.IOException;
42-
import java.io.InputStream;
4337
import java.time.Clock;
4438
import java.time.Instant;
4539
import java.util.ArrayList;
@@ -85,6 +79,11 @@ public void remove(ActionListener<Boolean> listener, Supplier<Boolean> isTimedOu
8579
.filter(QueryBuilders.existsQuery(ForecastRequestStats.EXPIRY_TIME.getPreferredName())));
8680
source.size(MAX_FORECASTS);
8781
source.trackTotalHits(true);
82+
source.fetchSource(false);
83+
source.docValueField(Job.ID.getPreferredName(), null);
84+
source.docValueField(ForecastRequestStats.FORECAST_ID.getPreferredName(), null);
85+
source.docValueField(ForecastRequestStats.EXPIRY_TIME.getPreferredName(), "epoch_millis");
86+
8887

8988
// _doc is the most efficient sort order and will also disable scoring
9089
source.sort(ElasticsearchMappings.ES_DOC);
@@ -96,11 +95,9 @@ public void remove(ActionListener<Boolean> listener, Supplier<Boolean> isTimedOu
9695
}
9796

9897
private void deleteForecasts(SearchResponse searchResponse, ActionListener<Boolean> listener, Supplier<Boolean> isTimedOutSupplier) {
99-
List<ForecastRequestStats> forecastsToDelete;
100-
try {
101-
forecastsToDelete = findForecastsToDelete(searchResponse);
102-
} catch (IOException e) {
103-
listener.onFailure(e);
98+
List<JobForecastId> forecastsToDelete = findForecastsToDelete(searchResponse);
99+
if (forecastsToDelete.isEmpty()) {
100+
listener.onResponse(true);
104101
return;
105102
}
106103

@@ -131,39 +128,51 @@ public void onFailure(Exception e) {
131128
});
132129
}
133130

134-
private List<ForecastRequestStats> findForecastsToDelete(SearchResponse searchResponse) throws IOException {
135-
List<ForecastRequestStats> forecastsToDelete = new ArrayList<>();
131+
private List<JobForecastId> findForecastsToDelete(SearchResponse searchResponse) {
132+
List<JobForecastId> forecastsToDelete = new ArrayList<>();
136133

137134
SearchHits hits = searchResponse.getHits();
138135
if (hits.getTotalHits().value > MAX_FORECASTS) {
139136
LOGGER.info("More than [{}] forecasts were found. This run will only delete [{}] of them", MAX_FORECASTS, MAX_FORECASTS);
140137
}
141138

142139
for (SearchHit hit : hits.getHits()) {
143-
try (InputStream stream = hit.getSourceRef().streamInput();
144-
XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(
145-
NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, stream)) {
146-
ForecastRequestStats forecastRequestStats = ForecastRequestStats.LENIENT_PARSER.apply(parser, null);
147-
if (forecastRequestStats.getExpiryTime().toEpochMilli() < cutoffEpochMs) {
148-
forecastsToDelete.add(forecastRequestStats);
140+
String expiryTime = stringFieldValueOrNull(hit, ForecastRequestStats.EXPIRY_TIME.getPreferredName());
141+
if (expiryTime == null) {
142+
LOGGER.warn("Forecast request stats document [{}] has a null [{}] field", hit.getId(),
143+
ForecastRequestStats.EXPIRY_TIME.getPreferredName());
144+
continue;
145+
}
146+
long expiryMs = TimeUtils.parseToEpochMs(expiryTime);
147+
if (expiryMs < cutoffEpochMs) {
148+
JobForecastId idPair = new JobForecastId(
149+
stringFieldValueOrNull(hit, Job.ID.getPreferredName()),
150+
stringFieldValueOrNull(hit, Forecast.FORECAST_ID.getPreferredName()));
151+
152+
if (idPair.hasNullValue() == false) {
153+
forecastsToDelete.add(idPair);
149154
}
155+
150156
}
157+
151158
}
152159
return forecastsToDelete;
153160
}
154161

155-
private DeleteByQueryRequest buildDeleteByQuery(List<ForecastRequestStats> forecastsToDelete) {
162+
private DeleteByQueryRequest buildDeleteByQuery(List<JobForecastId> ids) {
156163
DeleteByQueryRequest request = new DeleteByQueryRequest();
157164
request.setSlices(AbstractBulkByScrollRequest.AUTO_SLICES);
158165

159166
request.indices(RESULTS_INDEX_PATTERN);
160167
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery().minimumShouldMatch(1);
161168
boolQuery.must(QueryBuilders.termsQuery(Result.RESULT_TYPE.getPreferredName(),
162169
ForecastRequestStats.RESULT_TYPE_VALUE, Forecast.RESULT_TYPE_VALUE));
163-
for (ForecastRequestStats forecastToDelete : forecastsToDelete) {
164-
boolQuery.should(QueryBuilders.boolQuery()
165-
.must(QueryBuilders.termQuery(Job.ID.getPreferredName(), forecastToDelete.getJobId()))
166-
.must(QueryBuilders.termQuery(Forecast.FORECAST_ID.getPreferredName(), forecastToDelete.getForecastId())));
170+
for (JobForecastId jobForecastId : ids) {
171+
if (jobForecastId.hasNullValue() == false) {
172+
boolQuery.should(QueryBuilders.boolQuery()
173+
.must(QueryBuilders.termQuery(Job.ID.getPreferredName(), jobForecastId.jobId))
174+
.must(QueryBuilders.termQuery(Forecast.FORECAST_ID.getPreferredName(), jobForecastId.forecastId)));
175+
}
167176
}
168177
QueryBuilder query = QueryBuilders.boolQuery().filter(boolQuery);
169178
request.setQuery(query);
@@ -173,4 +182,18 @@ private DeleteByQueryRequest buildDeleteByQuery(List<ForecastRequestStats> forec
173182

174183
return request;
175184
}
185+
186+
private static class JobForecastId {
187+
private final String jobId;
188+
private final String forecastId;
189+
190+
private JobForecastId(String jobId, String forecastId) {
191+
this.jobId = jobId;
192+
this.forecastId = forecastId;
193+
}
194+
195+
boolean hasNullValue() {
196+
return jobId == null || forecastId == null;
197+
}
198+
}
176199
}

0 commit comments

Comments
 (0)