Skip to content

Commit cef9f30

Browse files
committed
[ML] Delete job document (elastic#34595)
1 parent 4e3d565 commit cef9f30

File tree

6 files changed

+153
-135
lines changed

6 files changed

+153
-135
lines changed

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndex.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@
55
*/
66
package org.elasticsearch.xpack.core.ml.job.persistence;
77

8-
import org.elasticsearch.cluster.ClusterState;
9-
import org.elasticsearch.xpack.core.ml.MlMetadata;
10-
118
/**
129
* Methods for handling index naming related functions
1310
*/
@@ -40,15 +37,6 @@ public static String resultsWriteAlias(String jobId) {
4037
return AnomalyDetectorsIndexFields.RESULTS_INDEX_PREFIX + ".write-" + jobId;
4138
}
4239

43-
/**
44-
* Retrieves the currently defined physical index from the job state
45-
* @param jobId Job Id
46-
* @return The index name
47-
*/
48-
public static String getPhysicalIndexFromState(ClusterState state, String jobId) {
49-
return MlMetadata.getMlMetadata(state).getJobs().get(jobId).getResultsIndexName();
50-
}
51-
5240
/**
5341
* The name of the default index where a job's state is stored
5442
* @return The index name

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteJobAction.java

Lines changed: 72 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,11 @@
2424
import org.elasticsearch.action.support.master.TransportMasterNodeAction;
2525
import org.elasticsearch.client.Client;
2626
import org.elasticsearch.client.ParentTaskAssigningClient;
27-
import org.elasticsearch.cluster.AckedClusterStateUpdateTask;
2827
import org.elasticsearch.cluster.ClusterState;
29-
import org.elasticsearch.cluster.ClusterStateUpdateTask;
3028
import org.elasticsearch.cluster.block.ClusterBlockException;
3129
import org.elasticsearch.cluster.block.ClusterBlockLevel;
3230
import org.elasticsearch.cluster.metadata.AliasMetaData;
3331
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
34-
import org.elasticsearch.cluster.metadata.MetaData;
3532
import org.elasticsearch.cluster.service.ClusterService;
3633
import org.elasticsearch.common.CheckedConsumer;
3734
import org.elasticsearch.common.Nullable;
@@ -52,30 +49,36 @@
5249
import org.elasticsearch.tasks.TaskId;
5350
import org.elasticsearch.threadpool.ThreadPool;
5451
import org.elasticsearch.transport.TransportService;
55-
import org.elasticsearch.xpack.core.ml.MlMetadata;
5652
import org.elasticsearch.xpack.core.ml.MlTasks;
5753
import org.elasticsearch.xpack.core.ml.action.DeleteJobAction;
5854
import org.elasticsearch.xpack.core.ml.action.GetModelSnapshotsAction;
5955
import org.elasticsearch.xpack.core.ml.action.KillProcessAction;
6056
import org.elasticsearch.xpack.core.ml.action.util.PageParams;
6157
import org.elasticsearch.xpack.core.ml.job.config.Job;
58+
import org.elasticsearch.xpack.core.ml.job.config.JobState;
59+
import org.elasticsearch.xpack.core.ml.job.config.JobTaskState;
6260
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
6361
import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex;
6462
import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndexFields;
6563
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.CategorizerState;
6664
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSnapshot;
6765
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.Quantiles;
66+
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
67+
import org.elasticsearch.xpack.ml.datafeed.persistence.DatafeedConfigProvider;
68+
import org.elasticsearch.xpack.ml.job.persistence.JobConfigProvider;
6869
import org.elasticsearch.xpack.ml.job.persistence.JobDataDeleter;
6970
import org.elasticsearch.xpack.ml.job.persistence.JobResultsProvider;
7071
import org.elasticsearch.xpack.ml.notifications.Auditor;
7172
import org.elasticsearch.xpack.ml.utils.MlIndicesUtils;
7273

7374
import java.util.ArrayList;
75+
import java.util.Collections;
7476
import java.util.HashMap;
7577
import java.util.HashSet;
7678
import java.util.List;
7779
import java.util.Map;
7880
import java.util.Set;
81+
import java.util.concurrent.atomic.AtomicReference;
7982
import java.util.function.Consumer;
8083

8184
import static org.elasticsearch.xpack.core.ClientHelper.ML_ORIGIN;
@@ -89,6 +92,8 @@ public class TransportDeleteJobAction extends TransportMasterNodeAction<DeleteJo
8992
private final PersistentTasksService persistentTasksService;
9093
private final Auditor auditor;
9194
private final JobResultsProvider jobResultsProvider;
95+
private final JobConfigProvider jobConfigProvider;
96+
private final DatafeedConfigProvider datafeedConfigProvider;
9297

9398
/**
9499
* A map of task listeners by job_id.
@@ -102,13 +107,16 @@ public class TransportDeleteJobAction extends TransportMasterNodeAction<DeleteJo
102107
public TransportDeleteJobAction(Settings settings, TransportService transportService, ClusterService clusterService,
103108
ThreadPool threadPool, ActionFilters actionFilters,
104109
IndexNameExpressionResolver indexNameExpressionResolver, PersistentTasksService persistentTasksService,
105-
Client client, Auditor auditor, JobResultsProvider jobResultsProvider) {
110+
Client client, Auditor auditor, JobResultsProvider jobResultsProvider,
111+
JobConfigProvider jobConfigProvider, DatafeedConfigProvider datafeedConfigProvider) {
106112
super(settings, DeleteJobAction.NAME, transportService, clusterService, threadPool, actionFilters,
107113
indexNameExpressionResolver, DeleteJobAction.Request::new);
108114
this.client = client;
109115
this.persistentTasksService = persistentTasksService;
110116
this.auditor = auditor;
111117
this.jobResultsProvider = jobResultsProvider;
118+
this.jobConfigProvider = jobConfigProvider;
119+
this.datafeedConfigProvider = datafeedConfigProvider;
112120
this.listenersByJobId = new HashMap<>();
113121
}
114122

@@ -137,6 +145,10 @@ protected void masterOperation(Task task, DeleteJobAction.Request request, Clust
137145
ActionListener<AcknowledgedResponse> listener) {
138146
logger.debug("Deleting job '{}'", request.getJobId());
139147

148+
if (request.isForce() == false) {
149+
checkJobIsNotOpen(request.getJobId(), state);
150+
}
151+
140152
TaskId taskId = new TaskId(clusterService.localNode().getId(), task.getId());
141153
ParentTaskAssigningClient parentTaskClient = new ParentTaskAssigningClient(client, taskId);
142154

@@ -175,7 +187,7 @@ protected void masterOperation(Task task, DeleteJobAction.Request request, Clust
175187
finalListener.onFailure(e);
176188
});
177189

178-
markJobAsDeleting(request.getJobId(), markAsDeletingListener, request.isForce());
190+
markJobAsDeletingIfNotUsed(request.getJobId(), markAsDeletingListener);
179191
}
180192

181193
private void notifyListeners(String jobId, @Nullable AcknowledgedResponse ack, @Nullable Exception error) {
@@ -211,33 +223,15 @@ private void normalDeleteJob(ParentTaskAssigningClient parentTaskClient, DeleteJ
211223
}
212224
};
213225

214-
// Step 3. When the physical storage has been deleted, remove from Cluster State
226+
// Step 3. When the physical storage has been deleted, delete the job config document
215227
// -------
216-
CheckedConsumer<Boolean, Exception> deleteJobStateHandler = response -> clusterService.submitStateUpdateTask(
217-
"delete-job-" + jobId,
218-
new AckedClusterStateUpdateTask<Boolean>(request, ActionListener.wrap(apiResponseHandler, listener::onFailure)) {
219-
220-
@Override
221-
protected Boolean newResponse(boolean acknowledged) {
222-
return acknowledged && response;
223-
}
224-
225-
@Override
226-
public ClusterState execute(ClusterState currentState) {
227-
MlMetadata currentMlMetadata = MlMetadata.getMlMetadata(currentState);
228-
if (currentMlMetadata.getJobs().containsKey(jobId) == false) {
229-
// We wouldn't have got here if the job never existed so
230-
// the Job must have been deleted by another action.
231-
// Don't error in this case
232-
return currentState;
233-
}
234-
235-
MlMetadata.Builder builder = new MlMetadata.Builder(currentMlMetadata);
236-
builder.deleteJob(jobId, currentState.getMetaData().custom(PersistentTasksCustomMetaData.TYPE));
237-
return buildNewClusterState(currentState, builder);
238-
}
239-
});
240-
228+
// Don't report an error if the document has already been deleted
229+
CheckedConsumer<Boolean, Exception> deleteJobStateHandler = response -> jobConfigProvider.deleteJob(jobId, false,
230+
ActionListener.wrap(
231+
deleteResponse -> apiResponseHandler.accept(Boolean.TRUE),
232+
listener::onFailure
233+
)
234+
);
241235

242236
// Step 2. Remove the job from any calendars
243237
CheckedConsumer<Boolean, Exception> removeFromCalendarsHandler = response -> jobResultsProvider.removeJobFromCalendars(jobId,
@@ -251,26 +245,26 @@ public ClusterState execute(ClusterState currentState) {
251245
private void deleteJobDocuments(ParentTaskAssigningClient parentTaskClient, String jobId,
252246
CheckedConsumer<Boolean, Exception> finishedHandler, Consumer<Exception> failureHandler) {
253247

254-
final String indexName = AnomalyDetectorsIndex.getPhysicalIndexFromState(clusterService.state(), jobId);
255-
final String indexPattern = indexName + "-*";
248+
AtomicReference<String> indexName = new AtomicReference<>();
256249

257250
final ActionListener<AcknowledgedResponse> completionHandler = ActionListener.wrap(
258251
response -> finishedHandler.accept(response.isAcknowledged()),
259252
failureHandler);
260253

261-
// Step 7. If we did not drop the index and after DBQ state done, we delete the aliases
254+
// Step 8. If we did not drop the index and after DBQ state done, we delete the aliases
262255
ActionListener<BulkByScrollResponse> dbqHandler = ActionListener.wrap(
263256
bulkByScrollResponse -> {
264257
if (bulkByScrollResponse == null) { // no action was taken by DBQ, assume Index was deleted
265258
completionHandler.onResponse(new AcknowledgedResponse(true));
266259
} else {
267260
if (bulkByScrollResponse.isTimedOut()) {
268-
logger.warn("[{}] DeleteByQuery for indices [{}, {}] timed out.", jobId, indexName, indexPattern);
261+
logger.warn("[{}] DeleteByQuery for indices [{}, {}] timed out.", jobId, indexName.get(),
262+
indexName.get() + "-*");
269263
}
270264
if (!bulkByScrollResponse.getBulkFailures().isEmpty()) {
271265
logger.warn("[{}] {} failures and {} conflicts encountered while running DeleteByQuery on indices [{}, {}].",
272266
jobId, bulkByScrollResponse.getBulkFailures().size(), bulkByScrollResponse.getVersionConflicts(),
273-
indexName, indexPattern);
267+
indexName.get(), indexName.get() + "-*");
274268
for (BulkItemResponse.Failure failure : bulkByScrollResponse.getBulkFailures()) {
275269
logger.warn("DBQ failure: " + failure);
276270
}
@@ -280,12 +274,13 @@ private void deleteJobDocuments(ParentTaskAssigningClient parentTaskClient, Stri
280274
},
281275
failureHandler);
282276

283-
// Step 6. If we did not delete the index, we run a delete by query
277+
// Step 7. If we did not delete the index, we run a delete by query
284278
ActionListener<Boolean> deleteByQueryExecutor = ActionListener.wrap(
285279
response -> {
286280
if (response) {
287-
logger.info("Running DBQ on [" + indexName + "," + indexPattern + "] for job [" + jobId + "]");
288-
DeleteByQueryRequest request = new DeleteByQueryRequest(indexName, indexPattern);
281+
String indexPattern = indexName.get() + "-*";
282+
logger.info("Running DBQ on [" + indexName.get() + "," + indexPattern + "] for job [" + jobId + "]");
283+
DeleteByQueryRequest request = new DeleteByQueryRequest(indexName.get(), indexPattern);
289284
ConstantScoreQueryBuilder query =
290285
new ConstantScoreQueryBuilder(new TermQueryBuilder(Job.ID.getPreferredName(), jobId));
291286
request.setQuery(query);
@@ -301,15 +296,15 @@ private void deleteJobDocuments(ParentTaskAssigningClient parentTaskClient, Stri
301296
},
302297
failureHandler);
303298

304-
// Step 5. If we have any hits, that means we are NOT the only job on this index, and should not delete it
299+
// Step 6. If we have any hits, that means we are NOT the only job on this index, and should not delete it
305300
// if we do not have any hits, we can drop the index and then skip the DBQ and alias deletion
306301
ActionListener<SearchResponse> customIndexSearchHandler = ActionListener.wrap(
307302
searchResponse -> {
308303
if (searchResponse == null || searchResponse.getHits().totalHits > 0) {
309304
deleteByQueryExecutor.onResponse(true); // We need to run DBQ and alias deletion
310305
} else {
311-
logger.info("Running DELETE Index on [" + indexName + "] for job [" + jobId + "]");
312-
DeleteIndexRequest request = new DeleteIndexRequest(indexName);
306+
logger.info("Running DELETE Index on [" + indexName.get() + "] for job [" + jobId + "]");
307+
DeleteIndexRequest request = new DeleteIndexRequest(indexName.get());
313308
request.indicesOptions(IndicesOptions.lenientExpandOpen());
314309
// If we have deleted the index, then we don't need to delete the aliases or run the DBQ
315310
executeAsyncWithOrigin(
@@ -331,9 +326,11 @@ private void deleteJobDocuments(ParentTaskAssigningClient parentTaskClient, Stri
331326
}
332327
);
333328

334-
// Step 4. Determine if we are on a shared index by looking at `.ml-anomalies-shared` or the custom index's aliases
335-
ActionListener<Boolean> deleteCategorizerStateHandler = ActionListener.wrap(
336-
response -> {
329+
// Step 5. Determine if we are on a shared index by looking at `.ml-anomalies-shared` or the custom index's aliases
330+
ActionListener<Job.Builder> getJobHandler = ActionListener.wrap(
331+
builder -> {
332+
Job job = builder.build();
333+
indexName.set(job.getResultsIndexName());
337334
if (indexName.equals(AnomalyDetectorsIndexFields.RESULTS_INDEX_PREFIX +
338335
AnomalyDetectorsIndexFields.RESULTS_INDEX_DEFAULT)) {
339336
//don't bother searching the index any further, we are on the default shared
@@ -344,14 +341,22 @@ private void deleteJobDocuments(ParentTaskAssigningClient parentTaskClient, Stri
344341
.query(QueryBuilders.boolQuery().filter(
345342
QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery(Job.ID.getPreferredName(), jobId))));
346343

347-
SearchRequest searchRequest = new SearchRequest(indexName);
344+
SearchRequest searchRequest = new SearchRequest(indexName.get());
348345
searchRequest.source(source);
349346
executeAsyncWithOrigin(parentTaskClient, ML_ORIGIN, SearchAction.INSTANCE, searchRequest, customIndexSearchHandler);
350347
}
351348
},
352349
failureHandler
353350
);
354351

352+
// Step 4. Get the job as the result index name is required
353+
ActionListener<Boolean> deleteCategorizerStateHandler = ActionListener.wrap(
354+
response -> {
355+
jobConfigProvider.getJob(jobId, getJobHandler);
356+
},
357+
failureHandler
358+
);
359+
355360
// Step 3. Delete quantiles done, delete the categorizer state
356361
ActionListener<Boolean> deleteQuantilesHandler = ActionListener.wrap(
357362
response -> deleteCategorizerState(parentTaskClient, jobId, 1, deleteCategorizerStateHandler),
@@ -554,36 +559,28 @@ public void onFailure(Exception e) {
554559
}
555560
}
556561

557-
private void markJobAsDeleting(String jobId, ActionListener<Boolean> listener, boolean force) {
558-
clusterService.submitStateUpdateTask("mark-job-as-deleted", new ClusterStateUpdateTask() {
559-
@Override
560-
public ClusterState execute(ClusterState currentState) {
561-
PersistentTasksCustomMetaData tasks = currentState.metaData().custom(PersistentTasksCustomMetaData.TYPE);
562-
MlMetadata.Builder builder = new MlMetadata.Builder(MlMetadata.getMlMetadata(currentState));
563-
builder.markJobAsDeleting(jobId, tasks, force);
564-
return buildNewClusterState(currentState, builder);
565-
}
566-
567-
@Override
568-
public void onFailure(String source, Exception e) {
569-
listener.onFailure(e);
570-
}
571-
572-
@Override
573-
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
574-
logger.debug("Job [" + jobId + "] is successfully marked as deleted");
575-
listener.onResponse(true);
576-
}
577-
});
562+
private void checkJobIsNotOpen(String jobId, ClusterState state) {
563+
PersistentTasksCustomMetaData tasks = state.metaData().custom(PersistentTasksCustomMetaData.TYPE);
564+
PersistentTasksCustomMetaData.PersistentTask<?> jobTask = MlTasks.getJobTask(jobId, tasks);
565+
if (jobTask != null) {
566+
JobTaskState jobTaskState = (JobTaskState) jobTask.getState();
567+
throw ExceptionsHelper.conflictStatusException("Cannot delete job [" + jobId + "] because the job is "
568+
+ ((jobTaskState == null) ? JobState.OPENING : jobTaskState.getState()));
569+
}
578570
}
579571

580-
static boolean jobIsDeletedFromState(String jobId, ClusterState clusterState) {
581-
return !MlMetadata.getMlMetadata(clusterState).getJobs().containsKey(jobId);
582-
}
572+
private void markJobAsDeletingIfNotUsed(String jobId, ActionListener<Boolean> listener) {
583573

584-
private static ClusterState buildNewClusterState(ClusterState currentState, MlMetadata.Builder builder) {
585-
ClusterState.Builder newState = ClusterState.builder(currentState);
586-
newState.metaData(MetaData.builder(currentState.getMetaData()).putCustom(MlMetadata.TYPE, builder.build()).build());
587-
return newState.build();
574+
datafeedConfigProvider.findDatafeedsForJobIds(Collections.singletonList(jobId), ActionListener.wrap(
575+
datafeedIds -> {
576+
if (datafeedIds.isEmpty() == false) {
577+
listener.onFailure(ExceptionsHelper.conflictStatusException("Cannot delete job [" + jobId + "] because datafeed ["
578+
+ datafeedIds.iterator().next() + "] refers to it"));
579+
return;
580+
}
581+
jobConfigProvider.markJobAsDeleting(jobId, listener);
582+
},
583+
listener::onFailure
584+
));
588585
}
589586
}

0 commit comments

Comments
 (0)