24
24
import org .elasticsearch .cluster .metadata .MetaData ;
25
25
import org .elasticsearch .cluster .service .ClusterService ;
26
26
import org .elasticsearch .common .settings .Settings ;
27
+ import org .elasticsearch .common .util .concurrent .EsExecutors ;
27
28
import org .elasticsearch .common .xcontent .ToXContent ;
28
29
import org .elasticsearch .common .xcontent .ToXContentObject ;
29
30
import org .elasticsearch .common .xcontent .XContentBuilder ;
38
39
import org .elasticsearch .xpack .core .ml .utils .ToXContentParams ;
39
40
import org .elasticsearch .xpack .ml .datafeed .persistence .DatafeedConfigProvider ;
40
41
import org .elasticsearch .xpack .ml .job .persistence .JobConfigProvider ;
42
+ import org .elasticsearch .xpack .ml .utils .ChainTaskExecutor ;
41
43
42
44
import java .io .IOException ;
43
45
import java .util .ArrayList ;
@@ -96,14 +98,14 @@ public class MlConfigMigrator {
96
98
private final MlConfigMigrationEligibilityCheck migrationEligibilityCheck ;
97
99
98
100
private final AtomicBoolean migrationInProgress ;
99
- private final AtomicBoolean firstTime ;
101
+ private final AtomicBoolean tookConfigSnapshot ;
100
102
101
103
public MlConfigMigrator (Settings settings , Client client , ClusterService clusterService ) {
102
104
this .client = Objects .requireNonNull (client );
103
105
this .clusterService = Objects .requireNonNull (clusterService );
104
106
this .migrationEligibilityCheck = new MlConfigMigrationEligibilityCheck (settings , clusterService );
105
107
this .migrationInProgress = new AtomicBoolean (false );
106
- this .firstTime = new AtomicBoolean (true );
108
+ this .tookConfigSnapshot = new AtomicBoolean (false );
107
109
}
108
110
109
111
/**
@@ -135,12 +137,7 @@ public void migrateConfigsWithoutTasks(ClusterState clusterState, ActionListener
135
137
return ;
136
138
}
137
139
138
- Collection <DatafeedConfig > stoppedDatafeeds = stoppedDatafeedConfigs (clusterState );
139
- Map <String , Job > eligibleJobs = nonDeletingJobs (closedJobConfigs (clusterState )).stream ()
140
- .map (MlConfigMigrator ::updateJobForMigration )
141
- .collect (Collectors .toMap (Job ::getId , Function .identity (), (a , b ) -> a ));
142
-
143
- JobsAndDatafeeds jobsAndDatafeedsToMigrate = limitWrites (stoppedDatafeeds , eligibleJobs );
140
+ logger .debug ("migrating ml configurations" );
144
141
145
142
ActionListener <Boolean > unMarkMigrationInProgress = ActionListener .wrap (
146
143
response -> {
@@ -153,37 +150,36 @@ public void migrateConfigsWithoutTasks(ClusterState clusterState, ActionListener
153
150
}
154
151
);
155
152
156
- if (firstTime .get ()) {
157
- snapshotMlMeta (MlMetadata .getMlMetadata (clusterState ), ActionListener .wrap (
158
- response -> {
159
- firstTime .set (false );
160
- migrate (jobsAndDatafeedsToMigrate , unMarkMigrationInProgress );
161
- },
162
- unMarkMigrationInProgress ::onFailure
163
- ));
164
- return ;
165
- }
153
+ snapshotMlMeta (MlMetadata .getMlMetadata (clusterState ), ActionListener .wrap (
154
+ response -> {
155
+ // We have successfully snapshotted the ML configs so we don't need to try again
156
+ tookConfigSnapshot .set (true );
166
157
167
- migrate (jobsAndDatafeedsToMigrate , unMarkMigrationInProgress );
158
+ List <JobsAndDatafeeds > batches = splitInBatches (clusterState );
159
+ if (batches .isEmpty ()) {
160
+ unMarkMigrationInProgress .onResponse (Boolean .FALSE );
161
+ return ;
162
+ }
163
+ migrateBatches (batches , unMarkMigrationInProgress );
164
+ },
165
+ unMarkMigrationInProgress ::onFailure
166
+ ));
168
167
}
169
168
170
- private void migrate (JobsAndDatafeeds jobsAndDatafeedsToMigrate , ActionListener <Boolean > listener ) {
171
- if (jobsAndDatafeedsToMigrate .totalCount () == 0 ) {
172
- listener .onResponse (Boolean .FALSE );
173
- return ;
174
- }
175
-
176
- logger .debug ("migrating ml configurations" );
177
-
178
- writeConfigToIndex (jobsAndDatafeedsToMigrate .datafeedConfigs , jobsAndDatafeedsToMigrate .jobs , ActionListener .wrap (
169
+ private void migrateBatches (List <JobsAndDatafeeds > batches , ActionListener <Boolean > listener ) {
170
+ ChainTaskExecutor chainTaskExecutor = new ChainTaskExecutor (EsExecutors .newDirectExecutorService (), true );
171
+ for (JobsAndDatafeeds batch : batches ) {
172
+ chainTaskExecutor .add (chainedListener -> writeConfigToIndex (batch .datafeedConfigs , batch .jobs , ActionListener .wrap (
179
173
failedDocumentIds -> {
180
- List <String > successfulJobWrites = filterFailedJobConfigWrites (failedDocumentIds , jobsAndDatafeedsToMigrate .jobs );
174
+ List <String > successfulJobWrites = filterFailedJobConfigWrites (failedDocumentIds , batch .jobs );
181
175
List <String > successfulDatafeedWrites =
182
- filterFailedDatafeedConfigWrites (failedDocumentIds , jobsAndDatafeedsToMigrate .datafeedConfigs );
183
- removeFromClusterState (successfulJobWrites , successfulDatafeedWrites , listener );
176
+ filterFailedDatafeedConfigWrites (failedDocumentIds , batch .datafeedConfigs );
177
+ removeFromClusterState (successfulJobWrites , successfulDatafeedWrites , chainedListener );
184
178
},
185
- listener ::onFailure
186
- ));
179
+ chainedListener ::onFailure
180
+ )));
181
+ }
182
+ chainTaskExecutor .execute (ActionListener .wrap (aVoid -> listener .onResponse (true ), listener ::onFailure ));
187
183
}
188
184
189
185
// Exposed for testing
@@ -208,9 +204,9 @@ public void writeConfigToIndex(Collection<DatafeedConfig> datafeedsToMigrate,
208
204
}
209
205
210
206
private void removeFromClusterState (List <String > jobsToRemoveIds , List <String > datafeedsToRemoveIds ,
211
- ActionListener <Boolean > listener ) {
207
+ ActionListener <Void > listener ) {
212
208
if (jobsToRemoveIds .isEmpty () && datafeedsToRemoveIds .isEmpty ()) {
213
- listener .onResponse (Boolean . FALSE );
209
+ listener .onResponse (null );
214
210
return ;
215
211
}
216
212
@@ -244,7 +240,7 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS
244
240
logger .info ("ml datafeed configurations migrated: {}" , removedConfigs .get ().removedDatafeedIds );
245
241
}
246
242
}
247
- listener .onResponse (Boolean . TRUE );
243
+ listener .onResponse (null );
248
244
}
249
245
});
250
246
}
@@ -326,12 +322,17 @@ private IndexRequest indexRequest(ToXContentObject source, String documentId, To
326
322
// public for testing
327
323
public void snapshotMlMeta (MlMetadata mlMetadata , ActionListener <Boolean > listener ) {
328
324
325
+ if (tookConfigSnapshot .get ()) {
326
+ listener .onResponse (true );
327
+ return ;
328
+ }
329
+
329
330
if (mlMetadata .getJobs ().isEmpty () && mlMetadata .getDatafeeds ().isEmpty ()) {
330
- listener .onResponse (Boolean . TRUE );
331
+ listener .onResponse (true );
331
332
return ;
332
333
}
333
334
334
- logger .debug ("taking a snapshot of mlmetadata " );
335
+ logger .debug ("taking a snapshot of ml_metadata " );
335
336
String documentId = "ml-config" ;
336
337
IndexRequestBuilder indexRequest = client .prepareIndex (AnomalyDetectorsIndex .jobStateIndexName (),
337
338
ElasticsearchMappings .DOC_TYPE , documentId )
@@ -345,7 +346,7 @@ public void snapshotMlMeta(MlMetadata mlMetadata, ActionListener<Boolean> listen
345
346
346
347
indexRequest .setSource (builder );
347
348
} catch (IOException e ) {
348
- logger .error ("failed to serialise mlmetadata " , e );
349
+ logger .error ("failed to serialise ml_metadata " , e );
349
350
listener .onFailure (e );
350
351
return ;
351
352
}
@@ -437,6 +438,22 @@ public int totalCount() {
437
438
}
438
439
}
439
440
441
+ public static List <JobsAndDatafeeds > splitInBatches (ClusterState clusterState ) {
442
+ Collection <DatafeedConfig > stoppedDatafeeds = stoppedDatafeedConfigs (clusterState );
443
+ Map <String , Job > eligibleJobs = nonDeletingJobs (closedJobConfigs (clusterState )).stream ()
444
+ .map (MlConfigMigrator ::updateJobForMigration )
445
+ .collect (Collectors .toMap (Job ::getId , Function .identity (), (a , b ) -> a ));
446
+
447
+ List <JobsAndDatafeeds > batches = new ArrayList <>();
448
+ while (stoppedDatafeeds .isEmpty () == false || eligibleJobs .isEmpty () == false ) {
449
+ JobsAndDatafeeds batch = limitWrites (stoppedDatafeeds , eligibleJobs );
450
+ batches .add (batch );
451
+ stoppedDatafeeds .removeAll (batch .datafeedConfigs );
452
+ batch .jobs .forEach (job -> eligibleJobs .remove (job .getId ()));
453
+ }
454
+ return batches ;
455
+ }
456
+
440
457
/**
441
458
* Return at most {@link #MAX_BULK_WRITE_SIZE} configs favouring
442
459
* datafeed and job pairs so if a datafeed is chosen so is its job.
0 commit comments