Skip to content

Commit 040da13

Browse files
committed
[ML] Convert job data remover to work with index configs (elastic#34532)
1 parent cef9f30 commit 040da13

File tree

8 files changed

+315
-123
lines changed

8 files changed

+315
-123
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteExpiredDataAction.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,9 @@ protected void doExecute(Task task, DeleteExpiredDataAction.Request request,
5757
private void deleteExpiredData(ActionListener<DeleteExpiredDataAction.Response> listener) {
5858
Auditor auditor = new Auditor(client, clusterService.getNodeName());
5959
List<MlDataRemover> dataRemovers = Arrays.asList(
60-
new ExpiredResultsRemover(client, clusterService, auditor),
60+
new ExpiredResultsRemover(client, auditor),
6161
new ExpiredForecastsRemover(client, threadPool),
62-
new ExpiredModelSnapshotsRemover(client, threadPool, clusterService),
62+
new ExpiredModelSnapshotsRemover(client, threadPool),
6363
new UnusedStateRemover(client, clusterService)
6464
);
6565
Iterator<MlDataRemover> dataRemoversIterator = new VolatileCursorIterator<>(dataRemovers);
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
package org.elasticsearch.xpack.ml.job.persistence;
7+
8+
import org.elasticsearch.ElasticsearchParseException;
9+
import org.elasticsearch.client.Client;
10+
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
11+
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
12+
import org.elasticsearch.common.xcontent.XContentFactory;
13+
import org.elasticsearch.common.xcontent.XContentParser;
14+
import org.elasticsearch.common.xcontent.XContentType;
15+
import org.elasticsearch.index.query.QueryBuilder;
16+
import org.elasticsearch.index.query.TermQueryBuilder;
17+
import org.elasticsearch.search.SearchHit;
18+
import org.elasticsearch.xpack.core.ml.job.config.Job;
19+
20+
import java.io.IOException;
21+
import java.io.InputStream;
22+
23+
public class BatchedJobsIterator extends BatchedDocumentsIterator<Job.Builder> {
24+
25+
public BatchedJobsIterator(Client client, String index) {
26+
super(client, index);
27+
}
28+
29+
@Override
30+
protected QueryBuilder getQuery() {
31+
return new TermQueryBuilder(Job.JOB_TYPE.getPreferredName(), Job.ANOMALY_DETECTOR_JOB_TYPE);
32+
}
33+
34+
@Override
35+
protected Job.Builder map(SearchHit hit) {
36+
try (InputStream stream = hit.getSourceRef().streamInput();
37+
XContentParser parser = XContentFactory.xContent(XContentType.JSON)
38+
.createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, stream)) {
39+
return Job.LENIENT_PARSER.apply(parser, null);
40+
} catch (IOException e) {
41+
throw new ElasticsearchParseException("failed to parse job document [" + hit.getId() + "]", e);
42+
}
43+
}
44+
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/retention/AbstractExpiredJobDataRemover.java

Lines changed: 63 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,23 @@
66
package org.elasticsearch.xpack.ml.job.retention;
77

88
import org.elasticsearch.action.ActionListener;
9-
import org.elasticsearch.cluster.ClusterState;
10-
import org.elasticsearch.cluster.service.ClusterService;
9+
import org.elasticsearch.client.Client;
1110
import org.elasticsearch.common.unit.TimeValue;
1211
import org.elasticsearch.index.query.BoolQueryBuilder;
1312
import org.elasticsearch.index.query.QueryBuilders;
14-
import org.elasticsearch.xpack.core.ml.MlMetadata;
1513
import org.elasticsearch.xpack.core.ml.job.config.Job;
14+
import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex;
1615
import org.elasticsearch.xpack.core.ml.job.results.Result;
16+
import org.elasticsearch.xpack.ml.job.persistence.BatchedJobsIterator;
1717
import org.elasticsearch.xpack.ml.utils.VolatileCursorIterator;
1818
import org.joda.time.DateTime;
1919
import org.joda.time.chrono.ISOChronology;
2020

21-
import java.util.ArrayList;
21+
import java.util.Deque;
2222
import java.util.Iterator;
2323
import java.util.List;
24-
import java.util.Objects;
2524
import java.util.concurrent.TimeUnit;
25+
import java.util.stream.Collectors;
2626

2727
/**
2828
* Removes job data that expired with respect to their retention period.
@@ -33,23 +33,29 @@
3333
*/
3434
abstract class AbstractExpiredJobDataRemover implements MlDataRemover {
3535

36-
private final ClusterService clusterService;
36+
private final Client client;
3737

38-
AbstractExpiredJobDataRemover(ClusterService clusterService) {
39-
this.clusterService = Objects.requireNonNull(clusterService);
38+
AbstractExpiredJobDataRemover(Client client) {
39+
this.client = client;
4040
}
4141

4242
@Override
4343
public void remove(ActionListener<Boolean> listener) {
4444
removeData(newJobIterator(), listener);
4545
}
4646

47-
private void removeData(Iterator<Job> jobIterator, ActionListener<Boolean> listener) {
47+
private void removeData(WrappedBatchedJobsIterator jobIterator, ActionListener<Boolean> listener) {
4848
if (jobIterator.hasNext() == false) {
4949
listener.onResponse(true);
5050
return;
5151
}
5252
Job job = jobIterator.next();
53+
if (job == null) {
54+
// maybe null if the batched iterator search return no results
55+
listener.onResponse(true);
56+
return;
57+
}
58+
5359
Long retentionDays = getRetentionDays(job);
5460
if (retentionDays == null) {
5561
removeData(jobIterator, listener);
@@ -59,14 +65,9 @@ private void removeData(Iterator<Job> jobIterator, ActionListener<Boolean> liste
5965
removeDataBefore(job, cutoffEpochMs, ActionListener.wrap(response -> removeData(jobIterator, listener), listener::onFailure));
6066
}
6167

62-
private Iterator<Job> newJobIterator() {
63-
ClusterState clusterState = clusterService.state();
64-
List<Job> jobs = new ArrayList<>(MlMetadata.getMlMetadata(clusterState).getJobs().values());
65-
return createVolatileCursorIterator(jobs);
66-
}
67-
68-
protected static <T> Iterator<T> createVolatileCursorIterator(List<T> items) {
69-
return new VolatileCursorIterator<T>(items);
68+
private WrappedBatchedJobsIterator newJobIterator() {
69+
BatchedJobsIterator jobsIterator = new BatchedJobsIterator(client, AnomalyDetectorsIndex.configIndexName());
70+
return new WrappedBatchedJobsIterator(jobsIterator);
7071
}
7172

7273
private long calcCutoffEpochMs(long retentionDays) {
@@ -87,4 +88,49 @@ protected static BoolQueryBuilder createQuery(String jobId, long cutoffEpochMs)
8788
.filter(QueryBuilders.termQuery(Job.ID.getPreferredName(), jobId))
8889
.filter(QueryBuilders.rangeQuery(Result.TIMESTAMP.getPreferredName()).lt(cutoffEpochMs).format("epoch_millis"));
8990
}
91+
92+
/**
93+
* BatchedJobsIterator efficiently returns batches of jobs using a scroll
94+
* search but AbstractExpiredJobDataRemover works with one job at a time.
95+
* This class abstracts away the logic of pulling one job at a time from
96+
* multiple batches.
97+
*/
98+
private class WrappedBatchedJobsIterator implements Iterator<Job> {
99+
private final BatchedJobsIterator batchedIterator;
100+
private VolatileCursorIterator<Job> currentBatch;
101+
102+
WrappedBatchedJobsIterator(BatchedJobsIterator batchedIterator) {
103+
this.batchedIterator = batchedIterator;
104+
}
105+
106+
@Override
107+
public boolean hasNext() {
108+
return (currentBatch != null && currentBatch.hasNext()) || batchedIterator.hasNext();
109+
}
110+
111+
/**
112+
* Before BatchedJobsIterator has run a search it reports hasNext == true
113+
* but the first search may return no results. In that case null is return
114+
* and clients have to handle null.
115+
*/
116+
@Override
117+
public Job next() {
118+
if (currentBatch != null && currentBatch.hasNext()) {
119+
return currentBatch.next();
120+
}
121+
122+
// currentBatch is either null or all its elements have been iterated.
123+
// get the next currentBatch
124+
currentBatch = createBatchIteratorFromBatch(batchedIterator.next());
125+
126+
// BatchedJobsIterator.hasNext maybe true if searching the first time
127+
// but no results are returned.
128+
return currentBatch.hasNext() ? currentBatch.next() : null;
129+
}
130+
131+
private VolatileCursorIterator<Job> createBatchIteratorFromBatch(Deque<Job.Builder> builders) {
132+
List<Job> jobs = builders.stream().map(Job.Builder::build).collect(Collectors.toList());
133+
return new VolatileCursorIterator<>(jobs);
134+
}
135+
}
90136
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/retention/ExpiredModelSnapshotsRemover.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import org.elasticsearch.action.support.ThreadedActionListener;
1515
import org.elasticsearch.action.support.master.AcknowledgedResponse;
1616
import org.elasticsearch.client.Client;
17-
import org.elasticsearch.cluster.service.ClusterService;
1817
import org.elasticsearch.common.logging.Loggers;
1918
import org.elasticsearch.index.query.QueryBuilder;
2019
import org.elasticsearch.index.query.QueryBuilders;
@@ -27,6 +26,7 @@
2726
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSnapshot;
2827
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSnapshotField;
2928
import org.elasticsearch.xpack.ml.MachineLearning;
29+
import org.elasticsearch.xpack.ml.utils.VolatileCursorIterator;
3030

3131
import java.util.ArrayList;
3232
import java.util.Iterator;
@@ -57,8 +57,8 @@ public class ExpiredModelSnapshotsRemover extends AbstractExpiredJobDataRemover
5757
private final Client client;
5858
private final ThreadPool threadPool;
5959

60-
public ExpiredModelSnapshotsRemover(Client client, ThreadPool threadPool, ClusterService clusterService) {
61-
super(clusterService);
60+
public ExpiredModelSnapshotsRemover(Client client, ThreadPool threadPool) {
61+
super(client);
6262
this.client = Objects.requireNonNull(client);
6363
this.threadPool = Objects.requireNonNull(threadPool);
6464
}
@@ -103,7 +103,7 @@ public void onResponse(SearchResponse searchResponse) {
103103
for (SearchHit hit : searchResponse.getHits()) {
104104
modelSnapshots.add(ModelSnapshot.fromJson(hit.getSourceRef()));
105105
}
106-
deleteModelSnapshots(createVolatileCursorIterator(modelSnapshots), listener);
106+
deleteModelSnapshots(new VolatileCursorIterator<>(modelSnapshots), listener);
107107
} catch (Exception e) {
108108
onFailure(e);
109109
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/retention/ExpiredResultsRemover.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import org.elasticsearch.ElasticsearchException;
1010
import org.elasticsearch.action.ActionListener;
1111
import org.elasticsearch.client.Client;
12-
import org.elasticsearch.cluster.service.ClusterService;
1312
import org.elasticsearch.common.logging.Loggers;
1413
import org.elasticsearch.index.query.QueryBuilder;
1514
import org.elasticsearch.index.query.QueryBuilders;
@@ -48,8 +47,8 @@ public class ExpiredResultsRemover extends AbstractExpiredJobDataRemover {
4847
private final Client client;
4948
private final Auditor auditor;
5049

51-
public ExpiredResultsRemover(Client client, ClusterService clusterService, Auditor auditor) {
52-
super(clusterService);
50+
public ExpiredResultsRemover(Client client, Auditor auditor) {
51+
super(client);
5352
this.client = Objects.requireNonNull(client);
5453
this.auditor = Objects.requireNonNull(auditor);
5554
}

0 commit comments

Comments
 (0)