Skip to content

Commit 8eaee7c

Browse files
[7.x][ML] Explain data frame analytics API (#49455) (#49504)
This commit replaces the _estimate_memory_usage API with a new API, the _explain API. The API consolidates information that is useful before creating a data frame analytics job. It includes: - memory estimation - field selection explanation Memory estimation is moved here from what was previously calculated in the _estimate_memory_usage API. Field selection is a new feature that explains to the user whether each available field was selected to be included or not in the analysis. In the case it was not included, it also explains the reason why. Backport of #49455
1 parent 69f570e commit 8eaee7c

File tree

46 files changed

+2312
-851
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+2312
-851
lines changed

client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java

+12-6
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.elasticsearch.client.RequestConverters.EndpointBuilder;
3030
import org.elasticsearch.client.core.PageParams;
3131
import org.elasticsearch.client.ml.CloseJobRequest;
32+
import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
3233
import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
3334
import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
3435
import org.elasticsearch.client.ml.DeleteCalendarRequest;
@@ -701,12 +702,17 @@ static Request evaluateDataFrame(EvaluateDataFrameRequest evaluateRequest) throw
701702
return request;
702703
}
703704

704-
static Request estimateMemoryUsage(PutDataFrameAnalyticsRequest estimateRequest) throws IOException {
705-
String endpoint = new EndpointBuilder()
706-
.addPathPartAsIs("_ml", "data_frame", "analytics", "_estimate_memory_usage")
707-
.build();
708-
Request request = new Request(HttpPost.METHOD_NAME, endpoint);
709-
request.setEntity(createEntity(estimateRequest, REQUEST_BODY_CONTENT_TYPE));
705+
static Request explainDataFrameAnalytics(ExplainDataFrameAnalyticsRequest explainRequest) throws IOException {
706+
EndpointBuilder endpoint = new EndpointBuilder().addPathPartAsIs("_ml", "data_frame", "analytics");
707+
if (explainRequest.getId() != null) {
708+
endpoint.addPathPart(explainRequest.getId());
709+
}
710+
endpoint.addPathPartAsIs("_explain");
711+
712+
Request request = new Request(HttpPost.METHOD_NAME, endpoint.build());
713+
if (explainRequest.getConfig() != null) {
714+
request.setEntity(createEntity(explainRequest.getConfig(), REQUEST_BODY_CONTENT_TYPE));
715+
}
710716
return request;
711717
}
712718

client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java

+19-18
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
import org.elasticsearch.action.support.master.AcknowledgedResponse;
2323
import org.elasticsearch.client.ml.CloseJobRequest;
2424
import org.elasticsearch.client.ml.CloseJobResponse;
25+
import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
26+
import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse;
2527
import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
2628
import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
2729
import org.elasticsearch.client.ml.DeleteCalendarRequest;
@@ -34,7 +36,6 @@
3436
import org.elasticsearch.client.ml.DeleteJobRequest;
3537
import org.elasticsearch.client.ml.DeleteJobResponse;
3638
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
37-
import org.elasticsearch.client.ml.EstimateMemoryUsageResponse;
3839
import org.elasticsearch.client.ml.EvaluateDataFrameRequest;
3940
import org.elasticsearch.client.ml.EvaluateDataFrameResponse;
4041
import org.elasticsearch.client.ml.FindFileStructureRequest;
@@ -2249,46 +2250,46 @@ public Cancellable evaluateDataFrameAsync(EvaluateDataFrameRequest request, Requ
22492250
}
22502251

22512252
/**
2252-
* Estimates memory usage for the given Data Frame Analytics
2253+
* Explains the given Data Frame Analytics
22532254
* <p>
22542255
* For additional info
2255-
* see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/estimate-memory-usage-dfanalytics.html">
2256-
* Estimate Memory Usage for Data Frame Analytics documentation</a>
2256+
* see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/explain-dfanalytics.html">
2257+
* Explain Data Frame Analytics documentation</a>
22572258
*
2258-
* @param request The {@link PutDataFrameAnalyticsRequest}
2259+
* @param request The {@link ExplainDataFrameAnalyticsRequest}
22592260
* @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
2260-
* @return {@link EstimateMemoryUsageResponse} response object
2261+
* @return {@link ExplainDataFrameAnalyticsResponse} response object
22612262
* @throws IOException when there is a serialization issue sending the request or receiving the response
22622263
*/
2263-
public EstimateMemoryUsageResponse estimateMemoryUsage(PutDataFrameAnalyticsRequest request,
2264-
RequestOptions options) throws IOException {
2264+
public ExplainDataFrameAnalyticsResponse explainDataFrameAnalytics(ExplainDataFrameAnalyticsRequest request,
2265+
RequestOptions options) throws IOException {
22652266
return restHighLevelClient.performRequestAndParseEntity(
22662267
request,
2267-
MLRequestConverters::estimateMemoryUsage,
2268+
MLRequestConverters::explainDataFrameAnalytics,
22682269
options,
2269-
EstimateMemoryUsageResponse::fromXContent,
2270+
ExplainDataFrameAnalyticsResponse::fromXContent,
22702271
Collections.emptySet());
22712272
}
22722273

22732274
/**
2274-
* Estimates memory usage for the given Data Frame Analytics asynchronously and notifies listener upon completion
2275+
* Explains the given Data Frame Analytics asynchronously and notifies listener upon completion
22752276
* <p>
22762277
* For additional info
2277-
* see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/estimate-memory-usage-dfanalytics.html">
2278-
* Estimate Memory Usage for Data Frame Analytics documentation</a>
2278+
* see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/explain-dfanalytics.html">
2279+
* Explain Data Frame Analytics documentation</a>
22792280
*
2280-
* @param request The {@link PutDataFrameAnalyticsRequest}
2281+
* @param request The {@link ExplainDataFrameAnalyticsRequest}
22812282
* @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
22822283
* @param listener Listener to be notified upon request completion
22832284
* @return cancellable that may be used to cancel the request
22842285
*/
2285-
public Cancellable estimateMemoryUsageAsync(PutDataFrameAnalyticsRequest request, RequestOptions options,
2286-
ActionListener<EstimateMemoryUsageResponse> listener) {
2286+
public Cancellable explainDataFrameAnalyticsAsync(ExplainDataFrameAnalyticsRequest request, RequestOptions options,
2287+
ActionListener<ExplainDataFrameAnalyticsResponse> listener) {
22872288
return restHighLevelClient.performRequestAsyncAndParseEntity(
22882289
request,
2289-
MLRequestConverters::estimateMemoryUsage,
2290+
MLRequestConverters::explainDataFrameAnalytics,
22902291
options,
2291-
EstimateMemoryUsageResponse::fromXContent,
2292+
ExplainDataFrameAnalyticsResponse::fromXContent,
22922293
listener,
22932294
Collections.emptySet());
22942295
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.client.ml;
20+
21+
import org.elasticsearch.client.Validatable;
22+
import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfig;
23+
import org.elasticsearch.common.Nullable;
24+
25+
import java.util.Objects;
26+
27+
/**
28+
* Request to explain the following about a data frame analytics job:
29+
* <ul>
30+
* <li>field selection: which fields are included or are not in the analysis</li>
31+
* <li>memory estimation: how much memory the job is estimated to require</li>
32+
* </ul>
33+
*/
34+
public class ExplainDataFrameAnalyticsRequest implements Validatable {
35+
36+
private final String id;
37+
private final DataFrameAnalyticsConfig config;
38+
39+
public ExplainDataFrameAnalyticsRequest(String id) {
40+
this.id = Objects.requireNonNull(id);
41+
this.config = null;
42+
}
43+
44+
public ExplainDataFrameAnalyticsRequest(DataFrameAnalyticsConfig config) {
45+
this.id = null;
46+
this.config = Objects.requireNonNull(config);
47+
}
48+
49+
@Nullable
50+
public String getId() {
51+
return id;
52+
}
53+
54+
@Nullable
55+
public DataFrameAnalyticsConfig getConfig() {
56+
return config;
57+
}
58+
59+
@Override
60+
public boolean equals(Object o) {
61+
if (this == o) return true;
62+
if (o == null || getClass() != o.getClass()) return false;
63+
64+
ExplainDataFrameAnalyticsRequest other = (ExplainDataFrameAnalyticsRequest) o;
65+
return Objects.equals(id, other.id) && Objects.equals(config, other.config);
66+
}
67+
68+
@Override
69+
public int hashCode() {
70+
return Objects.hash(id, config);
71+
}
72+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.client.ml;
20+
21+
import org.elasticsearch.client.ml.dataframe.explain.FieldSelection;
22+
import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation;
23+
import org.elasticsearch.common.ParseField;
24+
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
25+
import org.elasticsearch.common.xcontent.ToXContentObject;
26+
import org.elasticsearch.common.xcontent.XContentBuilder;
27+
import org.elasticsearch.common.xcontent.XContentParser;
28+
29+
import java.io.IOException;
30+
import java.util.List;
31+
import java.util.Objects;
32+
33+
public class ExplainDataFrameAnalyticsResponse implements ToXContentObject {
34+
35+
public static final ParseField TYPE = new ParseField("explain_data_frame_analytics_response");
36+
37+
public static final ParseField FIELD_SELECTION = new ParseField("field_selection");
38+
public static final ParseField MEMORY_ESTIMATION = new ParseField("memory_estimation");
39+
40+
public static ExplainDataFrameAnalyticsResponse fromXContent(XContentParser parser) throws IOException {
41+
return PARSER.parse(parser, null);
42+
}
43+
44+
@SuppressWarnings("unchecked")
45+
static final ConstructingObjectParser<ExplainDataFrameAnalyticsResponse, Void> PARSER =
46+
new ConstructingObjectParser<>(
47+
TYPE.getPreferredName(), true,
48+
args -> new ExplainDataFrameAnalyticsResponse((List<FieldSelection>) args[0], (MemoryEstimation) args[1]));
49+
50+
static {
51+
PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), FieldSelection.PARSER, FIELD_SELECTION);
52+
PARSER.declareObject(ConstructingObjectParser.constructorArg(), MemoryEstimation.PARSER, MEMORY_ESTIMATION);
53+
}
54+
55+
private final List<FieldSelection> fieldSelection;
56+
private final MemoryEstimation memoryEstimation;
57+
58+
public ExplainDataFrameAnalyticsResponse(List<FieldSelection> fieldSelection, MemoryEstimation memoryEstimation) {
59+
this.fieldSelection = Objects.requireNonNull(fieldSelection);
60+
this.memoryEstimation = Objects.requireNonNull(memoryEstimation);
61+
}
62+
63+
@Override
64+
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
65+
builder.startObject();
66+
builder.field(FIELD_SELECTION.getPreferredName(), fieldSelection);
67+
builder.field(MEMORY_ESTIMATION.getPreferredName(), memoryEstimation);
68+
builder.endObject();
69+
return builder;
70+
}
71+
72+
@Override
73+
public boolean equals(Object other) {
74+
if (this == other) return true;
75+
if (other == null || getClass() != other.getClass()) return false;
76+
77+
ExplainDataFrameAnalyticsResponse that = (ExplainDataFrameAnalyticsResponse) other;
78+
return Objects.equals(fieldSelection, that.fieldSelection)
79+
&& Objects.equals(memoryEstimation, that.memoryEstimation);
80+
}
81+
82+
@Override
83+
public int hashCode() {
84+
return Objects.hash(fieldSelection, memoryEstimation);
85+
}
86+
87+
public MemoryEstimation getMemoryEstimation() {
88+
return memoryEstimation;
89+
}
90+
91+
public List<FieldSelection> getFieldSelection() {
92+
return fieldSelection;
93+
}
94+
}

0 commit comments

Comments
 (0)