64
64
import org .elasticsearch .xpack .ml .dataframe .MappingsMerger ;
65
65
import org .elasticsearch .xpack .ml .dataframe .SourceDestValidator ;
66
66
import org .elasticsearch .xpack .ml .dataframe .extractor .DataFrameDataExtractorFactory ;
67
+ import org .elasticsearch .xpack .ml .dataframe .extractor .ExtractedFieldsDetectorFactory ;
67
68
import org .elasticsearch .xpack .ml .dataframe .persistence .DataFrameAnalyticsConfigProvider ;
68
69
import org .elasticsearch .xpack .ml .job .JobNodeSelector ;
69
70
import org .elasticsearch .xpack .ml .notifications .DataFrameAnalyticsAuditor ;
@@ -228,33 +229,7 @@ private void getStartContext(String id, ActionListener<StartContext> finalListen
228
229
229
230
// Step 6. Validate that there are analyzable data in the source index
230
231
ActionListener <StartContext > validateMappingsMergeListener = ActionListener .wrap (
231
- startContext -> DataFrameDataExtractorFactory .createForSourceIndices (client ,
232
- "validate_source_index_has_rows-" + id ,
233
- startContext .config ,
234
- ActionListener .wrap (
235
- dataFrameDataExtractorFactory ->
236
- dataFrameDataExtractorFactory
237
- .newExtractor (false )
238
- .collectDataSummaryAsync (ActionListener .wrap (
239
- dataSummary -> {
240
- if (dataSummary .rows == 0 ) {
241
- finalListener .onFailure (ExceptionsHelper .badRequestException (
242
- "Unable to start {} as no documents in the source indices [{}] contained all the fields "
243
- + "selected for analysis. If you are relying on automatic field selection then there are "
244
- + "currently mapped fields that do not exist in any indexed documents, and you will have "
245
- + "to switch to explicit field selection and include only fields that exist in indexed "
246
- + "documents." ,
247
- id , Strings .arrayToCommaDelimitedString (startContext .config .getSource ().getIndex ())
248
- ));
249
- } else {
250
- finalListener .onResponse (startContext );
251
- }
252
- },
253
- finalListener ::onFailure
254
- )),
255
- finalListener ::onFailure
256
- ))
257
- ,
232
+ startContext -> validateSourceIndexHasRows (startContext , finalListener ),
258
233
finalListener ::onFailure
259
234
);
260
235
@@ -269,9 +244,7 @@ private void getStartContext(String id, ActionListener<StartContext> finalListen
269
244
// Step 4. Validate dest index is empty if task is starting for first time
270
245
ActionListener <StartContext > toValidateDestEmptyListener = ActionListener .wrap (
271
246
startContext -> {
272
- DataFrameAnalyticsTask .StartingState startingState = DataFrameAnalyticsTask .determineStartingState (
273
- startContext .config .getId (), startContext .progressOnStart );
274
- switch (startingState ) {
247
+ switch (startContext .startingState ) {
275
248
case FIRST_TIME :
276
249
checkDestIndexIsEmptyIfExists (startContext , toValidateMappingsListener );
277
250
break ;
@@ -285,7 +258,7 @@ private void getStartContext(String id, ActionListener<StartContext> finalListen
285
258
"Cannot start because the job has already finished" ));
286
259
break ;
287
260
default :
288
- finalListener .onFailure (ExceptionsHelper .serverError ("Unexpected starting state " + startingState ));
261
+ finalListener .onFailure (ExceptionsHelper .serverError ("Unexpected starting state " + startContext . startingState ));
289
262
break ;
290
263
}
291
264
},
@@ -295,9 +268,16 @@ private void getStartContext(String id, ActionListener<StartContext> finalListen
295
268
// Step 3. Validate source and dest; check data extraction is possible
296
269
ActionListener <StartContext > startContextListener = ActionListener .wrap (
297
270
startContext -> {
271
+ // Validate the query parses
272
+ startContext .config .getSource ().getParsedQuery ();
273
+
274
+ // Validate source/dest are valid
298
275
new SourceDestValidator (clusterService .state (), indexNameExpressionResolver ).check (startContext .config );
299
- DataFrameDataExtractorFactory .validateConfigAndSourceIndex (client , startContext .config , ActionListener .wrap (
300
- config -> toValidateDestEmptyListener .onResponse (startContext ), finalListener ::onFailure ));
276
+
277
+ // Validate extraction is possible
278
+ boolean isTaskRestarting = startContext .startingState != DataFrameAnalyticsTask .StartingState .FIRST_TIME ;
279
+ new ExtractedFieldsDetectorFactory (client ).createFromSource (startContext .config , isTaskRestarting , ActionListener .wrap (
280
+ extractedFieldsDetector -> toValidateDestEmptyListener .onResponse (startContext ), finalListener ::onFailure ));
301
281
},
302
282
finalListener ::onFailure
303
283
);
@@ -313,6 +293,38 @@ private void getStartContext(String id, ActionListener<StartContext> finalListen
313
293
configProvider .get (id , getConfigListener );
314
294
}
315
295
296
+ private void validateSourceIndexHasRows (StartContext startContext , ActionListener <StartContext > listener ) {
297
+ boolean isTaskRestarting = startContext .startingState != DataFrameAnalyticsTask .StartingState .FIRST_TIME ;
298
+ DataFrameDataExtractorFactory .createForSourceIndices (client ,
299
+ "validate_source_index_has_rows-" + startContext .config .getId (),
300
+ isTaskRestarting ,
301
+ startContext .config ,
302
+ ActionListener .wrap (
303
+ dataFrameDataExtractorFactory ->
304
+ dataFrameDataExtractorFactory
305
+ .newExtractor (false )
306
+ .collectDataSummaryAsync (ActionListener .wrap (
307
+ dataSummary -> {
308
+ if (dataSummary .rows == 0 ) {
309
+ listener .onFailure (ExceptionsHelper .badRequestException (
310
+ "Unable to start {} as no documents in the source indices [{}] contained all the fields "
311
+ + "selected for analysis. If you are relying on automatic field selection then there are "
312
+ + "currently mapped fields that do not exist in any indexed documents, and you will have "
313
+ + "to switch to explicit field selection and include only fields that exist in indexed "
314
+ + "documents." ,
315
+ startContext .config .getId (),
316
+ Strings .arrayToCommaDelimitedString (startContext .config .getSource ().getIndex ())
317
+ ));
318
+ } else {
319
+ listener .onResponse (startContext );
320
+ }
321
+ },
322
+ listener ::onFailure
323
+ )),
324
+ listener ::onFailure
325
+ ));
326
+ }
327
+
316
328
private void getProgress (DataFrameAnalyticsConfig config , ActionListener <List <PhaseProgress >> listener ) {
317
329
GetDataFrameAnalyticsStatsAction .Request getStatsRequest = new GetDataFrameAnalyticsStatsAction .Request (config .getId ());
318
330
executeAsyncWithOrigin (client , ML_ORIGIN , GetDataFrameAnalyticsStatsAction .INSTANCE , getStatsRequest , ActionListener .wrap (
@@ -389,10 +401,12 @@ public void onTimeout(TimeValue timeout) {
389
401
private static class StartContext {
390
402
private final DataFrameAnalyticsConfig config ;
391
403
private final List <PhaseProgress > progressOnStart ;
404
+ private final DataFrameAnalyticsTask .StartingState startingState ;
392
405
393
406
private StartContext (DataFrameAnalyticsConfig config , List <PhaseProgress > progressOnStart ) {
394
407
this .config = config ;
395
408
this .progressOnStart = progressOnStart ;
409
+ this .startingState = DataFrameAnalyticsTask .determineStartingState (config .getId (), progressOnStart );
396
410
}
397
411
}
398
412
0 commit comments