Skip to content

Commit 0cd088b

Browse files
authored
[ML] ensure the ml-config index (#36792)
1 parent c1ed462 commit 0cd088b

File tree

11 files changed

+303
-83
lines changed

11 files changed

+303
-83
lines changed

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndex.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
*/
1111
public final class AnomalyDetectorsIndex {
1212

13+
public static final int CONFIG_INDEX_MAX_RESULTS_WINDOW = 10_000;
14+
1315
private AnomalyDetectorsIndex() {
1416
}
1517

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -675,7 +675,9 @@ public UnaryOperator<Map<String, IndexTemplateMetaData>> getIndexTemplateMetaDat
675675
// least possible burden on Elasticsearch
676676
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
677677
.put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1")
678-
.put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), delayedNodeTimeOutSetting))
678+
.put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), delayedNodeTimeOutSetting)
679+
.put(IndexSettings.MAX_RESULT_WINDOW_SETTING.getKey(),
680+
AnomalyDetectorsIndex.CONFIG_INDEX_MAX_RESULTS_WINDOW))
679681
.version(Version.CURRENT.id)
680682
.putMapping(ElasticsearchMappings.DOC_TYPE, Strings.toString(configMapping))
681683
.build();

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlAssignmentNotifier.java

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import org.elasticsearch.action.ActionListener;
1111
import org.elasticsearch.client.Client;
1212
import org.elasticsearch.cluster.ClusterChangedEvent;
13-
import org.elasticsearch.cluster.ClusterState;
1413
import org.elasticsearch.cluster.ClusterStateListener;
1514
import org.elasticsearch.cluster.node.DiscoveryNode;
1615
import org.elasticsearch.cluster.service.ClusterService;
@@ -58,23 +57,23 @@ public void clusterChanged(ClusterChangedEvent event) {
5857
return;
5958
}
6059

61-
if (event.metaDataChanged() == false) {
62-
return;
63-
}
64-
PersistentTasksCustomMetaData previous = event.previousState().getMetaData().custom(PersistentTasksCustomMetaData.TYPE);
65-
PersistentTasksCustomMetaData current = event.state().getMetaData().custom(PersistentTasksCustomMetaData.TYPE);
66-
6760
mlConfigMigrator.migrateConfigsWithoutTasks(event.state(), ActionListener.wrap(
68-
response -> threadPool.executor(executorName()).execute(() -> auditChangesToMlTasks(current, previous, event.state())),
61+
response -> threadPool.executor(executorName()).execute(() -> auditChangesToMlTasks(event)),
6962
e -> {
7063
logger.error("error migrating ml configurations", e);
71-
threadPool.executor(executorName()).execute(() -> auditChangesToMlTasks(current, previous, event.state()));
64+
threadPool.executor(executorName()).execute(() -> auditChangesToMlTasks(event));
7265
}
7366
));
7467
}
7568

76-
private void auditChangesToMlTasks(PersistentTasksCustomMetaData current, PersistentTasksCustomMetaData previous,
77-
ClusterState state) {
69+
private void auditChangesToMlTasks(ClusterChangedEvent event) {
70+
71+
if (event.metaDataChanged() == false) {
72+
return;
73+
}
74+
75+
PersistentTasksCustomMetaData previous = event.previousState().getMetaData().custom(PersistentTasksCustomMetaData.TYPE);
76+
PersistentTasksCustomMetaData current = event.state().getMetaData().custom(PersistentTasksCustomMetaData.TYPE);
7877

7978
if (Objects.equals(previous, current)) {
8079
return;
@@ -92,7 +91,7 @@ private void auditChangesToMlTasks(PersistentTasksCustomMetaData current, Persis
9291
if (currentAssignment.getExecutorNode() == null) {
9392
auditor.warning(jobId, "No node found to open job. Reasons [" + currentAssignment.getExplanation() + "]");
9493
} else {
95-
DiscoveryNode node = state.nodes().get(currentAssignment.getExecutorNode());
94+
DiscoveryNode node = event.state().nodes().get(currentAssignment.getExecutorNode());
9695
auditor.info(jobId, "Opening job on node [" + node.toString() + "]");
9796
}
9897
} else if (MlTasks.DATAFEED_TASK_NAME.equals(currentTask.getTaskName())) {
@@ -106,7 +105,7 @@ private void auditChangesToMlTasks(PersistentTasksCustomMetaData current, Persis
106105
auditor.warning(jobId, msg);
107106
}
108107
} else {
109-
DiscoveryNode node = state.nodes().get(currentAssignment.getExecutorNode());
108+
DiscoveryNode node = event.state().nodes().get(currentAssignment.getExecutorNode());
110109
if (jobId != null) {
111110
auditor.info(jobId, "Starting datafeed [" + datafeedParams.getDatafeedId() + "] on node [" + node + "]");
112111
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlConfigMigrationEligibilityCheck.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@
77

88
import org.elasticsearch.Version;
99
import org.elasticsearch.cluster.ClusterState;
10+
import org.elasticsearch.cluster.routing.IndexRoutingTable;
1011
import org.elasticsearch.cluster.service.ClusterService;
1112
import org.elasticsearch.common.settings.Setting;
1213
import org.elasticsearch.common.settings.Settings;
1314
import org.elasticsearch.persistent.PersistentTasksCustomMetaData;
1415
import org.elasticsearch.xpack.core.ml.MlMetadata;
1516
import org.elasticsearch.xpack.core.ml.MlTasks;
1617
import org.elasticsearch.xpack.core.ml.job.config.Job;
18+
import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex;
1719

1820
/**
1921
* Checks whether migration can start and whether ML resources (e.g. jobs, datafeeds)
@@ -37,10 +39,12 @@ private void setConfigMigrationEnabled(boolean configMigrationEnabled) {
3739
this.isConfigMigrationEnabled = configMigrationEnabled;
3840
}
3941

42+
4043
/**
4144
* Can migration start? Returns:
4245
* False if config migration is disabled via the setting {@link #ENABLE_CONFIG_MIGRATION}
4346
* False if the min node version of the cluster is before {@link #MIN_NODE_VERSION}
47+
* False if the .ml-config index shards are not active
4448
* True otherwise
4549
* @param clusterState The cluster state
4650
* @return A boolean that dictates if config migration can start
@@ -54,12 +58,26 @@ public boolean canStartMigration(ClusterState clusterState) {
5458
if (minNodeVersion.before(MIN_NODE_VERSION)) {
5559
return false;
5660
}
61+
62+
return mlConfigIndexIsAllocated(clusterState);
63+
}
64+
65+
static boolean mlConfigIndexIsAllocated(ClusterState clusterState) {
66+
if (clusterState.metaData().hasIndex(AnomalyDetectorsIndex.configIndexName()) == false) {
67+
return false;
68+
}
69+
70+
IndexRoutingTable routingTable = clusterState.getRoutingTable().index(AnomalyDetectorsIndex.configIndexName());
71+
if (routingTable == null || routingTable.allPrimaryShardsActive() == false) {
72+
return false;
73+
}
5774
return true;
5875
}
5976

6077
/**
6178
* Is the job a eligible for migration? Returns:
6279
* False if {@link #canStartMigration(ClusterState)} returns {@code false}
80+
* False if the job is not in the cluster state
6381
* False if the {@link Job#isDeleting()}
6482
* False if the job has a persistent task
6583
* True otherwise i.e. the job is present, not deleting

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlConfigMigrator.java

Lines changed: 57 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
import org.elasticsearch.action.ActionListener;
1212
import org.elasticsearch.action.DocWriteRequest;
1313
import org.elasticsearch.action.DocWriteResponse;
14+
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
15+
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
1416
import org.elasticsearch.action.bulk.BulkItemResponse;
1517
import org.elasticsearch.action.bulk.BulkRequestBuilder;
1618
import org.elasticsearch.action.bulk.BulkResponse;
@@ -21,6 +23,7 @@
2123
import org.elasticsearch.client.Client;
2224
import org.elasticsearch.cluster.ClusterState;
2325
import org.elasticsearch.cluster.ClusterStateUpdateTask;
26+
import org.elasticsearch.cluster.metadata.IndexMetaData;
2427
import org.elasticsearch.cluster.metadata.MetaData;
2528
import org.elasticsearch.cluster.service.ClusterService;
2629
import org.elasticsearch.common.settings.Settings;
@@ -29,6 +32,7 @@
2932
import org.elasticsearch.common.xcontent.ToXContentObject;
3033
import org.elasticsearch.common.xcontent.XContentBuilder;
3134
import org.elasticsearch.common.xcontent.XContentFactory;
35+
import org.elasticsearch.index.IndexSettings;
3236
import org.elasticsearch.persistent.PersistentTasksCustomMetaData;
3337
import org.elasticsearch.xpack.core.ml.MlMetadata;
3438
import org.elasticsearch.xpack.core.ml.MlTasks;
@@ -126,19 +130,11 @@ public MlConfigMigrator(Settings settings, Client client, ClusterService cluster
126130
* @param listener The success listener
127131
*/
128132
public void migrateConfigsWithoutTasks(ClusterState clusterState, ActionListener<Boolean> listener) {
129-
130-
if (migrationEligibilityCheck.canStartMigration(clusterState) == false) {
131-
listener.onResponse(false);
132-
return;
133-
}
134-
135133
if (migrationInProgress.compareAndSet(false, true) == false) {
136134
listener.onResponse(Boolean.FALSE);
137135
return;
138136
}
139137

140-
logger.debug("migrating ml configurations");
141-
142138
ActionListener<Boolean> unMarkMigrationInProgress = ActionListener.wrap(
143139
response -> {
144140
migrationInProgress.set(false);
@@ -150,19 +146,34 @@ public void migrateConfigsWithoutTasks(ClusterState clusterState, ActionListener
150146
}
151147
);
152148

149+
List<JobsAndDatafeeds> batches = splitInBatches(clusterState);
150+
if (batches.isEmpty()) {
151+
unMarkMigrationInProgress.onResponse(Boolean.FALSE);
152+
return;
153+
}
154+
155+
if (clusterState.metaData().hasIndex(AnomalyDetectorsIndex.configIndexName()) == false) {
156+
createConfigIndex(ActionListener.wrap(
157+
response -> {
158+
unMarkMigrationInProgress.onResponse(Boolean.FALSE);
159+
},
160+
unMarkMigrationInProgress::onFailure
161+
));
162+
return;
163+
}
164+
165+
if (migrationEligibilityCheck.canStartMigration(clusterState) == false) {
166+
unMarkMigrationInProgress.onResponse(Boolean.FALSE);
167+
return;
168+
}
169+
153170
snapshotMlMeta(MlMetadata.getMlMetadata(clusterState), ActionListener.wrap(
154-
response -> {
155-
// We have successfully snapshotted the ML configs so we don't need to try again
156-
tookConfigSnapshot.set(true);
157-
158-
List<JobsAndDatafeeds> batches = splitInBatches(clusterState);
159-
if (batches.isEmpty()) {
160-
unMarkMigrationInProgress.onResponse(Boolean.FALSE);
161-
return;
162-
}
163-
migrateBatches(batches, unMarkMigrationInProgress);
164-
},
165-
unMarkMigrationInProgress::onFailure
171+
response -> {
172+
// We have successfully snapshotted the ML configs so we don't need to try again
173+
tookConfigSnapshot.set(true);
174+
migrateBatches(batches, unMarkMigrationInProgress);
175+
},
176+
unMarkMigrationInProgress::onFailure
166177
));
167178
}
168179

@@ -296,13 +307,15 @@ static RemovalResult removeJobsAndDatafeeds(List<String> jobsToRemove, List<Stri
296307
private void addJobIndexRequests(Collection<Job> jobs, BulkRequestBuilder bulkRequestBuilder) {
297308
ToXContent.Params params = new ToXContent.MapParams(JobConfigProvider.TO_XCONTENT_PARAMS);
298309
for (Job job : jobs) {
310+
logger.debug("adding job to migrate: " + job.getId());
299311
bulkRequestBuilder.add(indexRequest(job, Job.documentId(job.getId()), params));
300312
}
301313
}
302314

303315
private void addDatafeedIndexRequests(Collection<DatafeedConfig> datafeedConfigs, BulkRequestBuilder bulkRequestBuilder) {
304316
ToXContent.Params params = new ToXContent.MapParams(DatafeedConfigProvider.TO_XCONTENT_PARAMS);
305317
for (DatafeedConfig datafeedConfig : datafeedConfigs) {
318+
logger.debug("adding datafeed to migrate: " + datafeedConfig.getId());
306319
bulkRequestBuilder.add(indexRequest(datafeedConfig, DatafeedConfig.documentId(datafeedConfig.getId()), params));
307320
}
308321
}
@@ -318,7 +331,6 @@ private IndexRequest indexRequest(ToXContentObject source, String documentId, To
318331
return indexRequest;
319332
}
320333

321-
322334
// public for testing
323335
public void snapshotMlMeta(MlMetadata mlMetadata, ActionListener<Boolean> listener) {
324336

@@ -361,6 +373,30 @@ public void snapshotMlMeta(MlMetadata mlMetadata, ActionListener<Boolean> listen
361373
);
362374
}
363375

376+
private void createConfigIndex(ActionListener<Boolean> listener) {
377+
logger.info("creating the .ml-config index");
378+
CreateIndexRequest createIndexRequest = new CreateIndexRequest(AnomalyDetectorsIndex.configIndexName());
379+
try
380+
{
381+
createIndexRequest.settings(
382+
Settings.builder()
383+
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
384+
.put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1")
385+
.put(IndexSettings.MAX_RESULT_WINDOW_SETTING.getKey(), AnomalyDetectorsIndex.CONFIG_INDEX_MAX_RESULTS_WINDOW)
386+
);
387+
createIndexRequest.mapping(ElasticsearchMappings.DOC_TYPE, ElasticsearchMappings.configMapping());
388+
} catch (Exception e) {
389+
logger.error("error writing the .ml-config mappings", e);
390+
listener.onFailure(e);
391+
return;
392+
}
393+
394+
executeAsyncWithOrigin(client.threadPool().getThreadContext(), ML_ORIGIN, createIndexRequest,
395+
ActionListener.<CreateIndexResponse>wrap(
396+
r -> listener.onResponse(r.isAcknowledged()),
397+
listener::onFailure
398+
), client.admin().indices()::create);
399+
}
364400

365401
public static Job updateJobForMigration(Job job) {
366402
Job.Builder builder = new Job.Builder(job);

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/persistence/DatafeedConfigProvider.java

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,15 @@
7373
import static org.elasticsearch.xpack.core.ClientHelper.ML_ORIGIN;
7474
import static org.elasticsearch.xpack.core.ClientHelper.executeAsyncWithOrigin;
7575

76+
/**
77+
* This class implements CRUD operation for the
78+
* datafeed configuration document
79+
*
80+
* The number of datafeeds returned in a search it limited to
81+
* {@link AnomalyDetectorsIndex#CONFIG_INDEX_MAX_RESULTS_WINDOW}.
82+
* In most cases we expect 10s or 100s of datafeeds to be defined and
83+
* a search for all datafeeds should return all.
84+
*/
7685
public class DatafeedConfigProvider {
7786

7887
private static final Logger logger = LogManager.getLogger(DatafeedConfigProvider.class);
@@ -88,13 +97,6 @@ public class DatafeedConfigProvider {
8897
TO_XCONTENT_PARAMS = Collections.unmodifiableMap(modifiable);
8998
}
9099

91-
/**
92-
* In most cases we expect 10s or 100s of datafeeds to be defined and
93-
* a search for all datafeeds should return all.
94-
* TODO this is a temporary fix
95-
*/
96-
public int searchSize = 1000;
97-
98100
public DatafeedConfigProvider(Client client, NamedXContentRegistry xContentRegistry) {
99101
this.client = client;
100102
this.xContentRegistry = xContentRegistry;
@@ -433,7 +435,7 @@ private SearchRequest buildExpandDatafeedIdsSearch(String expression) {
433435
return client.prepareSearch(AnomalyDetectorsIndex.configIndexName())
434436
.setIndicesOptions(IndicesOptions.lenientExpandOpen())
435437
.setSource(sourceBuilder)
436-
.setSize(searchSize)
438+
.setSize(AnomalyDetectorsIndex.CONFIG_INDEX_MAX_RESULTS_WINDOW)
437439
.request();
438440
}
439441

@@ -458,7 +460,7 @@ public void expandDatafeedConfigs(String expression, boolean allowNoDatafeeds, A
458460
SearchRequest searchRequest = client.prepareSearch(AnomalyDetectorsIndex.configIndexName())
459461
.setIndicesOptions(IndicesOptions.lenientExpandOpen())
460462
.setSource(sourceBuilder)
461-
.setSize(searchSize)
463+
.setSize(AnomalyDetectorsIndex.CONFIG_INDEX_MAX_RESULTS_WINDOW)
462464
.request();
463465

464466
ExpandedIdsMatcher requiredMatches = new ExpandedIdsMatcher(tokens, allowNoDatafeeds);
@@ -514,7 +516,7 @@ public void expandDatafeedConfigsWithoutMissingCheck(String expression, ActionLi
514516
SearchRequest searchRequest = client.prepareSearch(AnomalyDetectorsIndex.configIndexName())
515517
.setIndicesOptions(IndicesOptions.lenientExpandOpen())
516518
.setSource(sourceBuilder)
517-
.setSize(searchSize)
519+
.setSize(AnomalyDetectorsIndex.CONFIG_INDEX_MAX_RESULTS_WINDOW)
518520
.request();
519521

520522
executeAsyncWithOrigin(client.threadPool().getThreadContext(), ML_ORIGIN, searchRequest,

0 commit comments

Comments
 (0)