[7.x][ML] DFA _explain API should not fail when none field is included (#66281) (#66288)

dimitris-athanasiou · web-flow · commit 9cf8949d2169 · 2020-12-14T22:06:46.000+02:00
This commit fixes an issue with DFA _explain API where if it is called and no field is included, it results to an error message coming from the c++ process due to the data frame having no columns. We want the _explain API not to error when no fields are included exactly in order to explain to the user why it is that no fields are included. Thus, we can simply fix this by not running the memory estimation process and returning zero estimates instead. Note that the _start API will fail with a user friendly error message that informs there are no included fields. Backport of #66281
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java
@@ -17,6 +17,7 @@
 import org.elasticsearch.common.collect.Tuple;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.license.LicenseUtils;
 import org.elasticsearch.license.XPackLicenseState;
 import org.elasticsearch.tasks.Task;
@@ -106,7 +107,7 @@ private void explain(Task task, PutDataFrameAnalyticsAction.Request request,
         );
         if (licenseState.isSecurityEnabled()) {
             useSecondaryAuthIfAvailable(this.securityContext, () -> {
-                // Set the auth headers (preferring the secondary headers) to the caller's. 
+                // Set the auth headers (preferring the secondary headers) to the caller's.
                 // Regardless if the config was previously stored or not.
                 DataFrameAnalyticsConfig config = new DataFrameAnalyticsConfig.Builder(request.getConfig())
                     .setHeaders(filterSecurityHeaders(threadPool.getThreadContext().getHeaders()))
@@ -152,6 +153,11 @@ private void estimateMemoryUsage(Task task,
                                      DataFrameAnalyticsConfig config,
                                      ExtractedFields extractedFields,
                                      ActionListener<MemoryEstimation> listener) {
+        if (extractedFields.getAllFields().isEmpty()) {
+            listener.onResponse(new MemoryEstimation(ByteSizeValue.ZERO, ByteSizeValue.ZERO));
+            return;
+        }
+
         final String estimateMemoryTaskId = "memory_usage_estimation_" + task.getId();
         DataFrameDataExtractorFactory extractorFactory = DataFrameDataExtractorFactory.createForSourceIndices(
             new ParentTaskAssigningClient(client, task.getParentTaskId()), estimateMemoryTaskId, config, extractedFields);
diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml
@@ -312,3 +312,35 @@
   - match: { field_selection.4.is_required: false }
   - match: { field_selection.4.feature_type: "categorical" }
   - is_false: field_selection.4.reason
+
+---
+"Test given no included field":
+
+  - do:
+      indices.create:
+        index: index-source
+        body:
+          mappings:
+            properties:
+              x:
+                type: keyword
+
+  - do:
+      index:
+        index: index-source
+        refresh: true
+        body: { x: "hello!" }
+  - match: { result: "created" }
+
+  - do:
+      ml.explain_data_frame_analytics:
+        body:
+          source: { index: "index-source" }
+          analysis: { outlier_detection: {} }
+  - match:
+      memory_estimation.expected_memory_without_disk: "0"
+  - match:
+      memory_estimation.expected_memory_with_disk: "0"
+  - length: { field_selection: 1 }
+  - match: { field_selection.0.name: "x" }
+  - match: { field_selection.0.is_included: false }