[ML] Make warnings from inference errors (elastic#81735)

davidkyle · davidkyle · commit 1017fe31594a · 2021-12-15T14:29:07.000Z
Consistently using exceptions for errors instead of WarningInferenceResults
to simplify debugging/triaging
# Conflicts:
#	x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java
#	x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/FillMaskProcessor.java
#	x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/NerProcessor.java
#	x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/FillMaskProcessorTests.java
#	x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/NerProcessorTests.java
#	x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/TextClassificationProcessorTests.java
diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java
@@ -605,7 +605,8 @@ public void testPipelineWithBadProcessor() throws IOException {
             """;
 
         response = EntityUtils.toString(client().performRequest(simulateRequest(source)).getEntity());
-        assertThat(response, containsString("warning"));
+        assertThat(response, containsString("no value could be found for input field [input]"));
+        assertThat(response, containsString("status_exception"));
     }
 
     private int sumInferenceCountOnNodes(List<Map<String, Object>> nodes) {
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/deployment/DeploymentManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/deployment/DeploymentManager.java
@@ -11,7 +11,6 @@
 import org.apache.logging.log4j.Logger;
 import org.apache.logging.log4j.message.ParameterizedMessage;
 import org.apache.lucene.util.SetOnce;
-import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ElasticsearchStatusException;
 import org.elasticsearch.ResourceNotFoundException;
 import org.elasticsearch.action.ActionListener;
@@ -34,7 +33,6 @@
 import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig;
 import org.elasticsearch.xpack.core.ml.inference.TrainedModelInput;
 import org.elasticsearch.xpack.core.ml.inference.results.InferenceResults;
-import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.IndexLocation;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceConfig;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.NlpConfig;
@@ -369,32 +367,17 @@ protected void doRun() throws Exception {
                                 processContext,
                                 request.tokenization,
                                 processor.getResultProcessor((NlpConfig) config),
-                                ActionListener.wrap(this::onSuccess, f -> handleFailure(f, this))
+                                this
                             ),
                             this::onFailure
                         )
                     );
                 processContext.process.get().writeInferenceRequest(request.processInput);
             } catch (IOException e) {
                 logger.error(new ParameterizedMessage("[{}] error writing to process", processContext.task.getModelId()), e);
-                handleFailure(ExceptionsHelper.serverError("error writing to process", e), this);
+                onFailure(ExceptionsHelper.serverError("error writing to process", e));
             } catch (Exception e) {
-                handleFailure(e, this);
-            }
-        }
-
-        private static void handleFailure(Exception e, ActionListener<InferenceResults> listener) {
-            Throwable unwrapped = org.elasticsearch.ExceptionsHelper.unwrapCause(e);
-            if (unwrapped instanceof ElasticsearchException ex) {
-                if (ex.status() == RestStatus.BAD_REQUEST) {
-                    listener.onResponse(new WarningInferenceResults(ex.getMessage()));
-                } else {
-                    listener.onFailure(ex);
-                }
-            } else if (unwrapped instanceof IllegalArgumentException) {
-                listener.onResponse(new WarningInferenceResults(e.getMessage()));
-            } else {
-                listener.onFailure(e);
+                onFailure(e);
             }
         }
 
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/FillMaskProcessor.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/FillMaskProcessor.java
@@ -7,10 +7,11 @@
 
 package org.elasticsearch.xpack.ml.inference.nlp;
 
+import org.elasticsearch.ElasticsearchStatusException;
+import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.xpack.core.ml.inference.results.FillMaskResults;
 import org.elasticsearch.xpack.core.ml.inference.results.InferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.results.TopClassEntry;
-import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.FillMaskConfig;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.NlpConfig;
 import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
@@ -79,10 +80,11 @@ static InferenceResults processResult(
         String resultsField
     ) {
         if (tokenization.getTokenizations().isEmpty() || tokenization.getTokenizations().get(0).getTokens().length == 0) {
-            return new WarningInferenceResults("No valid tokens for inference");
+            throw new ElasticsearchStatusException("tokenization is empty", RestStatus.INTERNAL_SERVER_ERROR);
         }
 
         int maskTokenIndex = Arrays.asList(tokenization.getTokenizations().get(0).getTokens()).indexOf(BertTokenizer.MASK_TOKEN);
+
         // TODO - process all results in the batch
         double[] normalizedScores = NlpHelpers.convertToProbabilitiesBySoftMax(pyTorchResult.getInferenceResult()[0][maskTokenIndex]);
 
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/NerProcessor.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/NerProcessor.java
@@ -7,12 +7,13 @@
 
 package org.elasticsearch.xpack.ml.inference.nlp;
 
+import org.elasticsearch.ElasticsearchStatusException;
 import org.elasticsearch.common.ValidationException;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.xpack.core.ml.inference.results.InferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.results.NerResults;
-import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.NerConfig;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.NlpConfig;
 import org.elasticsearch.xpack.ml.inference.deployment.PyTorchResult;
@@ -194,7 +195,7 @@ static class NerResultProcessor implements NlpTask.ResultProcessor {
         @Override
         public InferenceResults processResult(TokenizationResult tokenization, PyTorchResult pyTorchResult) {
             if (tokenization.getTokenizations().isEmpty() || tokenization.getTokenizations().get(0).getTokens().length == 0) {
-                return new WarningInferenceResults("no valid tokens to build result");
+                throw new ElasticsearchStatusException("no valid tokenization to build result", RestStatus.INTERNAL_SERVER_ERROR);
             }
             // TODO - process all results in the batch
 
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/TextClassificationProcessor.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/TextClassificationProcessor.java
@@ -7,10 +7,11 @@
 
 package org.elasticsearch.xpack.ml.inference.nlp;
 
+import org.elasticsearch.ElasticsearchStatusException;
+import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.xpack.core.ml.inference.results.InferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.results.NlpClassificationInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.results.TopClassEntry;
-import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.NlpConfig;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextClassificationConfig;
 import org.elasticsearch.xpack.ml.inference.deployment.PyTorchResult;
@@ -39,11 +40,6 @@ public class TextClassificationProcessor implements NlpTask.Processor {
         // negative values are a special case of asking for ALL classes. Since we require the output size to equal the classLabel size
         // This is a nice way of setting the value
         this.numTopClasses = config.getNumTopClasses() < 0 ? this.classLabels.length : config.getNumTopClasses();
-        validate();
-    }
-
-    private void validate() {
-        // validation occurs in TextClassificationConfig
     }
 
     @Override
@@ -87,14 +83,15 @@ static InferenceResults processResult(
         String resultsField
     ) {
         if (pyTorchResult.getInferenceResult().length < 1) {
-            return new WarningInferenceResults("Text classification result has no data");
+            throw new ElasticsearchStatusException("Text classification result has no data", RestStatus.INTERNAL_SERVER_ERROR);
         }
 
         // TODO only the first entry in the batch result is verified and
         // checked. Implement for all in batch
         if (pyTorchResult.getInferenceResult()[0][0].length != labels.size()) {
-            return new WarningInferenceResults(
+            throw new ElasticsearchStatusException(
                 "Expected exactly [{}] values in text classification result; got [{}]",
+                RestStatus.INTERNAL_SERVER_ERROR,
                 labels.size(),
                 pyTorchResult.getInferenceResult()[0][0].length
             );
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/ZeroShotClassificationProcessor.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/ZeroShotClassificationProcessor.java
@@ -7,11 +7,12 @@
 
 package org.elasticsearch.xpack.ml.inference.nlp;
 
+import org.elasticsearch.ElasticsearchStatusException;
 import org.elasticsearch.common.logging.LoggerMessageFormat;
+import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.xpack.core.ml.inference.results.InferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.results.NlpClassificationInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.results.TopClassEntry;
-import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.NlpConfig;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ZeroShotClassificationConfig;
 import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
@@ -113,7 +114,7 @@ static class RequestBuilder implements NlpTask.RequestBuilder {
         @Override
         public NlpTask.Request buildRequest(List<String> inputs, String requestId) throws IOException {
             if (inputs.size() > 1) {
-                throw new IllegalArgumentException("Unable to do zero-shot classification on more than one text input at a time");
+                throw ExceptionsHelper.badRequestException("Unable to do zero-shot classification on more than one text input at a time");
             }
             List<TokenizationResult.Tokenization> tokenizations = new ArrayList<>(labels.length);
             for (String label : labels) {
@@ -147,13 +148,14 @@ static class ResultProcessor implements NlpTask.ResultProcessor {
         @Override
         public InferenceResults processResult(TokenizationResult tokenization, PyTorchResult pyTorchResult) {
             if (pyTorchResult.getInferenceResult().length < 1) {
-                return new WarningInferenceResults("Zero shot classification result has no data");
+                throw new ElasticsearchStatusException("Zero shot classification result has no data", RestStatus.INTERNAL_SERVER_ERROR);
             }
             // TODO only the first entry in the batch result is verified and
             // checked. Implement for all in batch
             if (pyTorchResult.getInferenceResult()[0].length != labels.length) {
-                return new WarningInferenceResults(
+                throw new ElasticsearchStatusException(
                     "Expected exactly [{}] values in zero shot classification result; got [{}]",
+                    RestStatus.INTERNAL_SERVER_ERROR,
                     labels.length,
                     pyTorchResult.getInferenceResult().length
                 );
@@ -164,8 +166,9 @@ public InferenceResults processResult(TokenizationResult tokenization, PyTorchRe
                 int v = 0;
                 for (double[] vals : pyTorchResult.getInferenceResult()[0]) {
                     if (vals.length != 3) {
-                        return new WarningInferenceResults(
+                        throw new ElasticsearchStatusException(
                             "Expected exactly [{}] values in inner zero shot classification result; got [{}]",
+                            RestStatus.INTERNAL_SERVER_ERROR,
                             3,
                             vals.length
                         );
@@ -180,8 +183,9 @@ public InferenceResults processResult(TokenizationResult tokenization, PyTorchRe
                 int v = 0;
                 for (double[] vals : pyTorchResult.getInferenceResult()[0]) {
                     if (vals.length != 3) {
-                        return new WarningInferenceResults(
+                        throw new ElasticsearchStatusException(
                             "Expected exactly [{}] values in inner zero shot classification result; got [{}]",
+                            RestStatus.INTERNAL_SERVER_ERROR,
                             3,
                             vals.length
                         );
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/FillMaskProcessorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/FillMaskProcessorTests.java
@@ -11,7 +11,6 @@
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.xpack.core.ml.inference.results.FillMaskResults;
 import org.elasticsearch.xpack.core.ml.inference.results.TopClassEntry;
-import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.FillMaskConfig;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.VocabularyConfig;
 import org.elasticsearch.xpack.ml.inference.deployment.PyTorchResult;
@@ -25,7 +24,6 @@
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.hasSize;
-import static org.hamcrest.Matchers.instanceOf;
 import static org.mockito.Mockito.mock;
 
 public class FillMaskProcessorTests extends ESTestCase {
@@ -77,9 +75,9 @@ public void testProcessResults_GivenMissingTokens() {
         tokenization.addTokenization("", false, new String[] {}, new int[] {}, new int[] {});
 
         PyTorchResult pyTorchResult = new PyTorchResult("1", new double[][][] { { {} } }, 0L, null);
-        assertThat(
-            FillMaskProcessor.processResult(tokenization, pyTorchResult, 5, randomAlphaOfLength(10)),
-            instanceOf(WarningInferenceResults.class)
+        expectThrows(
+            ElasticsearchStatusException.class,
+            () -> FillMaskProcessor.processResult(tokenization, pyTorchResult, 5, randomAlphaOfLength(10))
         );
     }
 
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/NerProcessorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/NerProcessorTests.java
@@ -7,10 +7,10 @@
 
 package org.elasticsearch.xpack.ml.inference.nlp;
 
+import org.elasticsearch.ElasticsearchStatusException;
 import org.elasticsearch.common.ValidationException;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.xpack.core.ml.inference.results.NerResults;
-import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.BertTokenization;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.NerConfig;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.Tokenization;
@@ -89,10 +89,11 @@ public void testValidate_NotAEntityLabel() {
     public void testProcessResults_GivenNoTokens() {
         NerProcessor.NerResultProcessor processor = new NerProcessor.NerResultProcessor(NerProcessor.IobTag.values(), null, false);
         TokenizationResult tokenization = tokenize(Collections.emptyList(), "");
-        assertThat(
-            processor.processResult(tokenization, new PyTorchResult("test", null, 0L, null)),
-            instanceOf(WarningInferenceResults.class)
+        var e = expectThrows(
+            ElasticsearchStatusException.class,
+            () -> processor.processResult(tokenization, new PyTorchResult("test", null, 0L, null))
         );
+        assertThat(e, instanceOf(ElasticsearchStatusException.class));
     }
 
     public void testProcessResults() {
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/TextClassificationProcessorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/TextClassificationProcessorTests.java
@@ -7,11 +7,10 @@
 
 package org.elasticsearch.xpack.ml.inference.nlp;
 
+import org.elasticsearch.ElasticsearchStatusException;
 import org.elasticsearch.common.xcontent.XContentHelper;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.xcontent.XContentType;
-import org.elasticsearch.xpack.core.ml.inference.results.InferenceResults;
-import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.BertTokenization;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextClassificationConfig;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.Tokenization;
@@ -25,38 +24,27 @@
 import java.util.List;
 import java.util.Map;
 
+import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.hasSize;
-import static org.hamcrest.Matchers.instanceOf;
 
 public class TextClassificationProcessorTests extends ESTestCase {
 
     public void testInvalidResult() {
         {
             PyTorchResult torchResult = new PyTorchResult("foo", new double[][][] {}, 0L, null);
-            InferenceResults inferenceResults = TextClassificationProcessor.processResult(
-                null,
-                torchResult,
-                randomInt(),
-                List.of("a", "b"),
-                randomAlphaOfLength(10)
+            var e = expectThrows(
+                ElasticsearchStatusException.class,
+                () -> TextClassificationProcessor.processResult(null, torchResult, randomInt(), List.of("a", "b"), randomAlphaOfLength(10))
             );
-            assertThat(inferenceResults, instanceOf(WarningInferenceResults.class));
-            assertEquals("Text classification result has no data", ((WarningInferenceResults) inferenceResults).getWarning());
+            assertThat(e.getMessage(), containsString("Text classification result has no data"));
         }
         {
             PyTorchResult torchResult = new PyTorchResult("foo", new double[][][] { { { 1.0 } } }, 0L, null);
-            InferenceResults inferenceResults = TextClassificationProcessor.processResult(
-                null,
-                torchResult,
-                randomInt(),
-                List.of("a", "b"),
-                randomAlphaOfLength(10)
-            );
-            assertThat(inferenceResults, instanceOf(WarningInferenceResults.class));
-            assertEquals(
-                "Expected exactly [2] values in text classification result; got [1]",
-                ((WarningInferenceResults) inferenceResults).getWarning()
+            var e = expectThrows(
+                ElasticsearchStatusException.class,
+                () -> TextClassificationProcessor.processResult(null, torchResult, randomInt(), List.of("a", "b"), randomAlphaOfLength(10))
             );
+            assertThat(e.getMessage(), containsString("Expected exactly [2] values in text classification result; got [1]"));
         }
     }
 

Original file line number	Diff line number	Diff line change
`@@ -605,7 +605,8 @@ public void testPipelineWithBadProcessor() throws IOException {`
`605`	`605`	`""";`
`606`	`606`
`607`	`607`	`response = EntityUtils.toString(client().performRequest(simulateRequest(source)).getEntity());`
`608`		`- assertThat(response, containsString("warning"));`
	`608`	`+ assertThat(response, containsString("no value could be found for input field [input]"));`
	`609`	`+ assertThat(response, containsString("status_exception"));`
`609`	`610`	`}`
`610`	`611`
`611`	`612`	`private int sumInferenceCountOnNodes(List<Map<String, Object>> nodes) {`