[ML] partitions model definitions into chunks (#55260) (#55484)

benwtrent · web-flow · commit 24d41eb69582 · 2020-04-20T16:08:54.000-04:00
This paves the data layer way so that exceptionally large models are partitioned across multiple documents.

This change means that nodes before 7.8.0 will not be able to use trained inference models created on nodes on or after 7.8.0.

I chose the definition document limit to be 100. This *SHOULD* be plenty for any large model. One of the largest models that I have created so far had the following stats:
~314MB of inflated JSON, ~66MB when compressed, ~177MB of heap.
With the chunking sizes of `16 * 1024 * 1024` its compressed string could be partitioned to 5 documents.
Supporting models 20 times this size (compressed) seems adequate for now.
diff --git a/docs/reference/ml/df-analytics/apis/put-inference.asciidoc b/docs/reference/ml/df-analytics/apis/put-inference.asciidoc
@@ -8,7 +8,13 @@
 ++++
 
 Creates an {infer} trained model.
-
++
+--
+WARNING: Models created in version 7.8.0 are not backwards compatible
+         with older node versions. If in a mixed cluster environment,
+         all nodes must be at least 7.8.0 to use a model stored by
+         a 7.8.0 node.
+--
 experimental[]
 
 
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportPutTrainedModelAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportPutTrainedModelAction.java
@@ -78,7 +78,18 @@ protected Response read(StreamInput in) throws IOException {
     }
 
     @Override
-    protected void masterOperation(Request request, ClusterState state, ActionListener<Response> listener) {
+    protected void masterOperation(Request request,
+                                   ClusterState state,
+                                   ActionListener<Response> listener) {
+        // 7.8.0 introduced splitting the model definition across multiple documents.
+        // This means that new models will not be usable on nodes that cannot handle multiple definition documents
+        if (state.nodes().getMinNodeVersion().before(Version.V_7_8_0)) {
+            listener.onFailure(ExceptionsHelper.badRequestException(
+                "Creating a new model requires that all nodes are at least version [{}]",
+                request.getTrainedModelConfig().getModelId(),
+                Version.V_7_8_0.toString()));
+            return;
+        }
         try {
             request.getTrainedModelConfig().ensureParsedDefinition(xContentRegistry);
             request.getTrainedModelConfig().getModelDefinition().getTrainedModel().validate();
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/persistence/TrainedModelProvider.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/persistence/TrainedModelProvider.java
@@ -15,7 +15,8 @@
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.DocWriteRequest;
 import org.elasticsearch.action.bulk.BulkAction;
-import org.elasticsearch.action.bulk.BulkRequest;
+import org.elasticsearch.action.bulk.BulkItemResponse;
+import org.elasticsearch.action.bulk.BulkRequestBuilder;
 import org.elasticsearch.action.bulk.BulkResponse;
 import org.elasticsearch.action.index.IndexRequest;
 import org.elasticsearch.action.search.MultiSearchAction;
@@ -86,6 +87,7 @@
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
+import java.util.stream.Collectors;
 
 import static org.elasticsearch.xpack.core.ClientHelper.ML_ORIGIN;
 import static org.elasticsearch.xpack.core.ClientHelper.executeAsyncWithOrigin;
@@ -96,6 +98,9 @@ public class TrainedModelProvider {
     public static final Set<String> MODELS_STORED_AS_RESOURCE = Collections.singleton("lang_ident_model_1");
     private static final String MODEL_RESOURCE_PATH = "/org/elasticsearch/xpack/ml/inference/persistence/";
     private static final String MODEL_RESOURCE_FILE_EXT = ".json";
+    private static final int COMPRESSED_STRING_CHUNK_SIZE = 16 * 1024 * 1024;
+    private static final int MAX_NUM_DEFINITION_DOCS = 100;
+    private static final int MAX_COMPRESSED_STRING_SIZE = COMPRESSED_STRING_CHUNK_SIZE * MAX_NUM_DEFINITION_DOCS;
 
     private static final Logger logger = LogManager.getLogger(TrainedModelProvider.class);
     private final Client client;
@@ -139,30 +144,41 @@ public void storeTrainedModel(TrainedModelConfig trainedModelConfig,
     private void storeTrainedModelAndDefinition(TrainedModelConfig trainedModelConfig,
                                                 ActionListener<Boolean> listener) {
 
-        TrainedModelDefinitionDoc trainedModelDefinitionDoc;
+        List<TrainedModelDefinitionDoc> trainedModelDefinitionDocs = new ArrayList<>();
         try {
-            // TODO should we check length against allowed stream size???
             String compressedString = trainedModelConfig.getCompressedDefinition();
-            trainedModelDefinitionDoc = new TrainedModelDefinitionDoc.Builder()
-                .setDocNum(0)
-                .setModelId(trainedModelConfig.getModelId())
-                .setCompressedString(compressedString)
-                .setCompressionVersion(TrainedModelConfig.CURRENT_DEFINITION_COMPRESSION_VERSION)
-                .setDefinitionLength(compressedString.length())
-                .setTotalDefinitionLength(compressedString.length())
-                .build();
+            if (compressedString.length() > MAX_COMPRESSED_STRING_SIZE) {
+                listener.onFailure(
+                    ExceptionsHelper.badRequestException(
+                        "Unable to store model as compressed definition has length [{}] the limit is [{}]",
+                        compressedString.length(),
+                        MAX_COMPRESSED_STRING_SIZE));
+                return;
+            }
+            List<String> chunkedStrings = chunkStringWithSize(compressedString, COMPRESSED_STRING_CHUNK_SIZE);
+            for(int i = 0; i < chunkedStrings.size(); ++i) {
+                trainedModelDefinitionDocs.add(new TrainedModelDefinitionDoc.Builder()
+                    .setDocNum(i)
+                    .setModelId(trainedModelConfig.getModelId())
+                    .setCompressedString(chunkedStrings.get(i))
+                    .setCompressionVersion(TrainedModelConfig.CURRENT_DEFINITION_COMPRESSION_VERSION)
+                    .setDefinitionLength(chunkedStrings.get(i).length())
+                    .setTotalDefinitionLength(compressedString.length())
+                    .build());
+            }
         } catch (IOException ex) {
             listener.onFailure(ExceptionsHelper.serverError(
-                "Unexpected IOException while serializing definition for storage for model [" + trainedModelConfig.getModelId() + "]",
-                ex));
+                "Unexpected IOException while serializing definition for storage for model [{}]",
+                ex,
+                trainedModelConfig.getModelId()));
             return;
         }
 
-        BulkRequest bulkRequest = client.prepareBulk(InferenceIndexConstants.LATEST_INDEX_NAME, MapperService.SINGLE_MAPPING_NAME)
+        BulkRequestBuilder bulkRequest = client.prepareBulk(InferenceIndexConstants.LATEST_INDEX_NAME, MapperService.SINGLE_MAPPING_NAME)
             .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
-            .add(createRequest(trainedModelConfig.getModelId(), trainedModelConfig))
-            .add(createRequest(TrainedModelDefinitionDoc.docId(trainedModelConfig.getModelId(), 0), trainedModelDefinitionDoc))
-            .request();
+            .add(createRequest(trainedModelConfig.getModelId(), trainedModelConfig));
+        trainedModelDefinitionDocs.forEach(defDoc ->
+            bulkRequest.add(createRequest(TrainedModelDefinitionDoc.docId(trainedModelConfig.getModelId(), defDoc.getDocNum()), defDoc)));
 
         ActionListener<Boolean> wrappedListener = ActionListener.wrap(
             listener::onResponse,
@@ -182,9 +198,8 @@ private void storeTrainedModelAndDefinition(TrainedModelConfig trainedModelConfi
 
         ActionListener<BulkResponse> bulkResponseActionListener = ActionListener.wrap(
             r -> {
-                assert r.getItems().length == 2;
+                assert r.getItems().length == trainedModelDefinitionDocs.size() + 1;
                 if (r.getItems()[0].isFailed()) {
-
                     logger.error(new ParameterizedMessage(
                             "[{}] failed to store trained model config for inference",
                             trainedModelConfig.getModelId()),
@@ -193,20 +208,26 @@ private void storeTrainedModelAndDefinition(TrainedModelConfig trainedModelConfi
                     wrappedListener.onFailure(r.getItems()[0].getFailure().getCause());
                     return;
                 }
-                if (r.getItems()[1].isFailed()) {
+                if (r.hasFailures()) {
+                    Exception firstFailure = Arrays.stream(r.getItems())
+                        .filter(BulkItemResponse::isFailed)
+                        .map(BulkItemResponse::getFailure)
+                        .map(BulkItemResponse.Failure::getCause)
+                        .findFirst()
+                        .orElse(new Exception("unknown failure"));
                     logger.error(new ParameterizedMessage(
                             "[{}] failed to store trained model definition for inference",
                             trainedModelConfig.getModelId()),
-                        r.getItems()[1].getFailure().getCause());
-                    wrappedListener.onFailure(r.getItems()[1].getFailure().getCause());
+                        firstFailure);
+                    wrappedListener.onFailure(firstFailure);
                     return;
                 }
                 wrappedListener.onResponse(true);
             },
             wrappedListener::onFailure
         );
 
-        executeAsyncWithOrigin(client, ML_ORIGIN, BulkAction.INSTANCE, bulkRequest, bulkResponseActionListener);
+        executeAsyncWithOrigin(client, ML_ORIGIN, BulkAction.INSTANCE, bulkRequest.request(), bulkResponseActionListener);
     }
 
     public void getTrainedModel(final String modelId, final boolean includeDefinition, final ActionListener<TrainedModelConfig> listener) {
@@ -235,11 +256,20 @@ public void getTrainedModel(final String modelId, final boolean includeDefinitio
         if (includeDefinition) {
             multiSearchRequestBuilder.add(client.prepareSearch(InferenceIndexConstants.INDEX_PATTERN)
                 .setQuery(QueryBuilders.constantScoreQuery(QueryBuilders
-                    .idsQuery()
-                    .addIds(TrainedModelDefinitionDoc.docId(modelId, 0))))
-                // use sort to get the last
+                    .boolQuery()
+                    .filter(QueryBuilders.termQuery(TrainedModelConfig.MODEL_ID.getPreferredName(), modelId))
+                    .filter(QueryBuilders.termQuery(InferenceIndexConstants.DOC_TYPE.getPreferredName(), TrainedModelDefinitionDoc.NAME))))
+                // There should be AT MOST these many docs. There might be more if definitions have been reindex to newer indices
+                // If this ends up getting duplicate groups of definition documents, the parsing logic will throw away any doc that
+                // is in a different index than the first index seen.
+                .setSize(MAX_NUM_DEFINITION_DOCS)
+                // First find the latest index
                 .addSort("_index", SortOrder.DESC)
-                .setSize(1)
+                // Then, sort by doc_num
+                .addSort(SortBuilders.fieldSort(TrainedModelDefinitionDoc.DOC_NUM.getPreferredName())
+                    .order(SortOrder.ASC)
+                    // We need this for the search not to fail when there are no mappings yet in the index
+                    .unmappedType("long"))
                 .request());
         }
 
@@ -259,15 +289,18 @@ public void getTrainedModel(final String modelId, final boolean includeDefinitio
 
                 if (includeDefinition) {
                     try {
-                        TrainedModelDefinitionDoc doc = handleSearchItem(multiSearchResponse.getResponses()[1],
+                        List<TrainedModelDefinitionDoc> docs = handleSearchItems(multiSearchResponse.getResponses()[1],
                             modelId,
                             this::parseModelDefinitionDocLenientlyFromSource);
-                        if (doc.getCompressedString().length() != doc.getTotalDefinitionLength()) {
+                        String compressedString = docs.stream()
+                            .map(TrainedModelDefinitionDoc::getCompressedString)
+                            .collect(Collectors.joining());
+                        if (compressedString.length() != docs.get(0).getTotalDefinitionLength()) {
                             listener.onFailure(ExceptionsHelper.serverError(
                                 Messages.getMessage(Messages.MODEL_DEFINITION_TRUNCATED, modelId)));
                             return;
                         }
-                        builder.setDefinitionFromString(doc.getCompressedString());
+                        builder.setDefinitionFromString(compressedString);
                     } catch (ResourceNotFoundException ex) {
                         listener.onFailure(new ResourceNotFoundException(
                             Messages.getMessage(Messages.MODEL_DEFINITION_NOT_FOUND, modelId)));
@@ -678,13 +711,36 @@ private Set<String> matchedResourceIds(String[] tokens) {
     private static <T> T handleSearchItem(MultiSearchResponse.Item item,
                                           String resourceId,
                                           CheckedBiFunction<BytesReference, String, T, Exception> parseLeniently) throws Exception {
+        return handleSearchItems(item, resourceId, parseLeniently).get(0);
+    }
+
+    // NOTE: This ignores any results that are in a different index than the first one seen in the search response.
+    private static <T> List<T> handleSearchItems(MultiSearchResponse.Item item,
+                                                 String resourceId,
+                                                 CheckedBiFunction<BytesReference, String, T, Exception> parseLeniently) throws Exception {
         if (item.isFailure()) {
             throw item.getFailure();
         }
         if (item.getResponse().getHits().getHits().length == 0) {
             throw new ResourceNotFoundException(resourceId);
         }
-        return parseLeniently.apply(item.getResponse().getHits().getHits()[0].getSourceRef(), resourceId);
+        List<T> results = new ArrayList<>(item.getResponse().getHits().getHits().length);
+        String initialIndex = item.getResponse().getHits().getHits()[0].getIndex();
+        for (SearchHit hit : item.getResponse().getHits().getHits()) {
+            // We don't want to spread across multiple backing indices
+            if (hit.getIndex().equals(initialIndex)) {
+                results.add(parseLeniently.apply(hit.getSourceRef(), resourceId));
+            }
+        }
+        return results;
+    }
+
+    static List<String> chunkStringWithSize(String str, int chunkSize) {
+        List<String> subStrings = new ArrayList<>((int)Math.ceil(str.length()/(double)chunkSize));
+        for (int i = 0; i < str.length();i += chunkSize) {
+            subStrings.add(str.substring(i, Math.min(i + chunkSize, str.length())));
+        }
+        return subStrings;
     }
 
     private TrainedModelConfig.Builder parseInferenceDocLenientlyFromSource(BytesReference source, String modelId) throws IOException {