[ML] mute inference_rescorer yaml tests and add single node rest tests (#97179)

benwtrent · web-flow · commit 4eb5dff59f20 · 2023-06-28T09:00:59.000-04:00
There is a common issue with the ML test cleanup code where we grab model stats before taking up a cleaning action. However, in grabbing those stats the search fails because the index was just recently created. This moves the yaml tests as they existed (pretty much line for line) into a single node rest test. I also mute the yaml tests instead of simply deleting them (as I would prefer having those). related to: #80703
diff --git a/x-pack/plugin/ml/qa/ml-with-security/build.gradle b/x-pack/plugin/ml/qa/ml-with-security/build.gradle
@@ -181,6 +181,7 @@ tasks.named("yamlRestTest").configure {
     'ml/inference_crud/Test put nlp model config with vocabulary set',
     'ml/inference_crud/Test put model model aliases with nlp model',
     'ml/inference_processor/Test create processor with missing mandatory fields',
+    'ml/inference_rescore/Test rescore with missing model',
     'ml/inference_stats_crud/Test get stats given missing trained model',
     'ml/inference_stats_crud/Test get stats given expression without matches and allow_no_match is false',
     'ml/jobs_crud/Test cannot create job with model snapshot id set',
diff --git a/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/InferenceRescorerIT.java b/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/InferenceRescorerIT.java
@@ -0,0 +1,194 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.ml.integration;
+
+import org.elasticsearch.client.Request;
+import org.elasticsearch.client.Response;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.support.XContentMapValues;
+import org.junit.Before;
+
+import java.io.IOException;
+import java.util.List;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class InferenceRescorerIT extends InferenceTestCase {
+
+    private static final String MODEL_ID = "ltr-model";
+    private static final String INDEX_NAME = "store";
+
+    @Before
+    public void setupModelAndData() throws IOException {
+        putRegressionModel(MODEL_ID, """
+            {
+                        "description": "super complex model for tests",
+                        "input": {"field_names": ["cost", "product"]},
+                        "inference_config": {
+                          "regression": {
+                          }
+                        },
+                        "definition": {
+                          "preprocessors" : [{
+                            "one_hot_encoding": {
+                              "field": "product",
+                              "hot_map": {
+                                "TV": "type_tv",
+                                "VCR": "type_vcr",
+                                "Laptop": "type_laptop"
+                              }
+                            }
+                          }],
+                          "trained_model": {
+                            "ensemble": {
+                              "feature_names": ["cost", "type_tv", "type_vcr", "type_laptop"],
+                              "target_type": "regression",
+                              "trained_models": [
+                                {
+                                  "tree": {
+                                    "feature_names": [
+                                      "cost"
+                                    ],
+                                    "tree_structure": [
+                                      {
+                                        "node_index": 0,
+                                        "split_feature": 0,
+                                        "split_gain": 12,
+                                        "threshold": 400,
+                                        "decision_type": "lte",
+                                        "default_left": true,
+                                        "left_child": 1,
+                                        "right_child": 2
+                                      },
+                                      {
+                                        "node_index": 1,
+                                        "leaf_value": 5.0
+                                      },
+                                      {
+                                        "node_index": 2,
+                                        "leaf_value": 2.0
+                                      }
+                                    ],
+                                    "target_type": "regression"
+                                  }
+                                },
+                                {
+                                  "tree": {
+                                    "feature_names": [
+                                      "type_tv"
+                                    ],
+                                    "tree_structure": [
+                                      {
+                                        "node_index": 0,
+                                        "split_feature": 0,
+                                        "split_gain": 12,
+                                        "threshold": 1,
+                                        "decision_type": "lt",
+                                        "default_left": true,
+                                        "left_child": 1,
+                                        "right_child": 2
+                                      },
+                                      {
+                                        "node_index": 1,
+                                        "leaf_value": 1.0
+                                      },
+                                      {
+                                        "node_index": 2,
+                                        "leaf_value": 12.0
+                                      }
+                                    ],
+                                    "target_type": "regression"
+                                  }
+                                }
+                              ]
+                            }
+                          }
+                        }
+                      }""");
+        createIndex(INDEX_NAME, Settings.EMPTY, """
+            "properties":{
+             "product":{"type": "keyword"},
+             "cost":{"type": "integer"}}""");
+        indexData("{ \"product\": \"TV\", \"cost\": 300}");
+        indexData("{ \"product\": \"TV\", \"cost\": 400}");
+        indexData("{ \"product\": \"TV\", \"cost\": 600}");
+        indexData("{ \"product\": \"VCR\", \"cost\": 15}");
+        indexData("{ \"product\": \"VCR\", \"cost\": 350}");
+        indexData("{ \"product\": \"VCR\", \"cost\": 580}");
+        indexData("{ \"product\": \"Laptop\", \"cost\": 100}");
+        indexData("{ \"product\": \"Laptop\", \"cost\": 300}");
+        indexData("{ \"product\": \"Laptop\", \"cost\": 500}");
+        adminClient().performRequest(new Request("POST", INDEX_NAME + "/_refresh"));
+    }
+
+    public void testInferenceRescore() throws Exception {
+        Request request = new Request("GET", "store/_search?size=3");
+        request.setJsonEntity("""
+            {
+              "rescore": {
+                "window_size": 10,
+                "inference": { "model_id": "ltr-model" }
+              }
+            }""");
+        assertHitScores(client().performRequest(request), List.of(17.0, 17.0, 14.0));
+        request.setJsonEntity("""
+            {
+              "query": {"term": {"product": "Laptop"}},
+              "rescore": {
+                "window_size": 10,
+                "inference": { "model_id": "ltr-model" }
+              }
+            }""");
+        assertHitScores(client().performRequest(request), List.of(6.0, 6.0, 3.0));
+    }
+
+    public void testInferenceRescoreSmallWindow() throws Exception {
+        Request request = new Request("GET", "store/_search?size=5");
+        request.setJsonEntity("""
+            {
+              "rescore": {
+                "window_size": 2,
+                "inference": { "model_id": "ltr-model" }
+              }
+            }""");
+        assertHitScores(client().performRequest(request), List.of(17.0, 17.0, 1.0, 1.0, 1.0));
+    }
+
+    public void testInferenceRescorerWithChainedRescorers() throws IOException {
+        Request request = new Request("GET", "store/_search?size=5");
+        request.setJsonEntity("""
+            {
+               "rescore": [
+               {
+                 "window_size": 4,
+                 "query": { "rescore_query":{ "script_score": {"query": {"match_all": {}}, "script": {"source": "return 4"}}}}
+               },
+               {
+                 "window_size": 3,
+                 "inference": { "model_id": "ltr-model" }
+               },
+               {
+                 "window_size": 2,
+                 "query": { "rescore_query": { "script_score": {"query": {"match_all": {}}, "script": {"source": "return 20"}}}}
+               }
+              ]
+             }""");
+        assertHitScores(client().performRequest(request), List.of(37.0, 37.0, 14.0, 5.0, 1.0));
+    }
+
+    private void indexData(String data) throws IOException {
+        Request request = new Request("POST", INDEX_NAME + "/_doc");
+        request.setJsonEntity(data);
+        client().performRequest(request);
+    }
+
+    @SuppressWarnings("unchecked")
+    private static void assertHitScores(Response response, List<Double> expectedScores) throws IOException {
+        assertThat((List<Double>) XContentMapValues.extractValue("hits.hits._score", responseAsMap(response)), equalTo(expectedScores));
+    }
+}
diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/inference_rescore.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/inference_rescore.yml
@@ -134,6 +134,10 @@ setup:
 
 ---
 "Test rescore with stored model":
+  - skip:
+      version: all
+      reason: "@AwaitsFix https://github.com/elastic/elasticsearch/issues/80703"
+
   - do:
       search:
         index: store
@@ -166,6 +170,10 @@ setup:
   - match: { hits.hits.2._score: 3.0 }
 ---
 "Test rescore with stored model and smaller window_size":
+  - skip:
+      version: all
+      reason: "@AwaitsFix https://github.com/elastic/elasticsearch/issues/80703"
+
   - do:
       search:
         index: store
@@ -184,6 +192,10 @@ setup:
   - match: { hits.hits.4._score: 1.0 }
 ---
 "Test rescore with stored model and chained rescorers":
+  - skip:
+      version: all
+      reason: "@AwaitsFix https://github.com/elastic/elasticsearch/issues/80703"
+
   - do:
       search:
         index: store