Skip to content

Commit 5fa36da

Browse files
[ML] Machine learning data frame analytics (#43544)
This merges the initial work that adds a framework for performing machine learning analytics on data frames. The feature is currently experimental and requires a platinum license. Note that the original commits can be found in the `feature-ml-data-frame-analytics` branch. A new set of APIs is added which allows the creation of data frame analytics jobs. Configuration allows specifying different types of analysis to be performed on a data frame. At first there is support for outlier detection. The APIs are: - PUT _ml/data_frame/analysis/{id} - GET _ml/data_frame/analysis/{id} - GET _ml/data_frame/analysis/{id}/_stats - POST _ml/data_frame/analysis/{id}/_start - POST _ml/data_frame/analysis/{id}/_stop - DELETE _ml/data_frame/analysis/{id} When a data frame analytics job is started a persistent task is created and started. The main steps of the task are: 1. reindex the source index into the dest index 2. analyze the data through the data_frame_analyzer c++ process 3. merge the results of the process back into the destination index In addition, an evaluation API is added which packages commonly used metrics that provide evaluation of various analysis: - POST _ml/data_frame/_evaluate
1 parent b4f30cf commit 5fa36da

File tree

244 files changed

+20924
-1335
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

244 files changed

+20924
-1335
lines changed

client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java

+116
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,23 @@
3232
import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
3333
import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
3434
import org.elasticsearch.client.ml.DeleteCalendarRequest;
35+
import org.elasticsearch.client.ml.DeleteDataFrameAnalyticsRequest;
3536
import org.elasticsearch.client.ml.DeleteDatafeedRequest;
3637
import org.elasticsearch.client.ml.DeleteExpiredDataRequest;
3738
import org.elasticsearch.client.ml.DeleteFilterRequest;
3839
import org.elasticsearch.client.ml.DeleteForecastRequest;
3940
import org.elasticsearch.client.ml.DeleteJobRequest;
4041
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
42+
import org.elasticsearch.client.ml.EvaluateDataFrameRequest;
4143
import org.elasticsearch.client.ml.FindFileStructureRequest;
4244
import org.elasticsearch.client.ml.FlushJobRequest;
4345
import org.elasticsearch.client.ml.ForecastJobRequest;
4446
import org.elasticsearch.client.ml.GetBucketsRequest;
4547
import org.elasticsearch.client.ml.GetCalendarEventsRequest;
4648
import org.elasticsearch.client.ml.GetCalendarsRequest;
4749
import org.elasticsearch.client.ml.GetCategoriesRequest;
50+
import org.elasticsearch.client.ml.GetDataFrameAnalyticsRequest;
51+
import org.elasticsearch.client.ml.GetDataFrameAnalyticsStatsRequest;
4852
import org.elasticsearch.client.ml.GetDatafeedRequest;
4953
import org.elasticsearch.client.ml.GetDatafeedStatsRequest;
5054
import org.elasticsearch.client.ml.GetFiltersRequest;
@@ -61,12 +65,15 @@
6165
import org.elasticsearch.client.ml.PreviewDatafeedRequest;
6266
import org.elasticsearch.client.ml.PutCalendarJobRequest;
6367
import org.elasticsearch.client.ml.PutCalendarRequest;
68+
import org.elasticsearch.client.ml.PutDataFrameAnalyticsRequest;
6469
import org.elasticsearch.client.ml.PutDatafeedRequest;
6570
import org.elasticsearch.client.ml.PutFilterRequest;
6671
import org.elasticsearch.client.ml.PutJobRequest;
6772
import org.elasticsearch.client.ml.RevertModelSnapshotRequest;
6873
import org.elasticsearch.client.ml.SetUpgradeModeRequest;
74+
import org.elasticsearch.client.ml.StartDataFrameAnalyticsRequest;
6975
import org.elasticsearch.client.ml.StartDatafeedRequest;
76+
import org.elasticsearch.client.ml.StopDataFrameAnalyticsRequest;
7077
import org.elasticsearch.client.ml.StopDatafeedRequest;
7178
import org.elasticsearch.client.ml.UpdateDatafeedRequest;
7279
import org.elasticsearch.client.ml.UpdateFilterRequest;
@@ -581,6 +588,115 @@ static Request deleteCalendarEvent(DeleteCalendarEventRequest deleteCalendarEven
581588
return new Request(HttpDelete.METHOD_NAME, endpoint);
582589
}
583590

591+
static Request putDataFrameAnalytics(PutDataFrameAnalyticsRequest putRequest) throws IOException {
592+
String endpoint = new EndpointBuilder()
593+
.addPathPartAsIs("_ml", "data_frame", "analytics")
594+
.addPathPart(putRequest.getConfig().getId())
595+
.build();
596+
Request request = new Request(HttpPut.METHOD_NAME, endpoint);
597+
request.setEntity(createEntity(putRequest, REQUEST_BODY_CONTENT_TYPE));
598+
return request;
599+
}
600+
601+
static Request getDataFrameAnalytics(GetDataFrameAnalyticsRequest getRequest) {
602+
String endpoint = new EndpointBuilder()
603+
.addPathPartAsIs("_ml", "data_frame", "analytics")
604+
.addPathPart(Strings.collectionToCommaDelimitedString(getRequest.getIds()))
605+
.build();
606+
Request request = new Request(HttpGet.METHOD_NAME, endpoint);
607+
RequestConverters.Params params = new RequestConverters.Params();
608+
if (getRequest.getPageParams() != null) {
609+
PageParams pageParams = getRequest.getPageParams();
610+
if (pageParams.getFrom() != null) {
611+
params.putParam(PageParams.FROM.getPreferredName(), pageParams.getFrom().toString());
612+
}
613+
if (pageParams.getSize() != null) {
614+
params.putParam(PageParams.SIZE.getPreferredName(), pageParams.getSize().toString());
615+
}
616+
}
617+
if (getRequest.getAllowNoMatch() != null) {
618+
params.putParam(GetDataFrameAnalyticsRequest.ALLOW_NO_MATCH.getPreferredName(), Boolean.toString(getRequest.getAllowNoMatch()));
619+
}
620+
request.addParameters(params.asMap());
621+
return request;
622+
}
623+
624+
static Request getDataFrameAnalyticsStats(GetDataFrameAnalyticsStatsRequest getStatsRequest) {
625+
String endpoint = new EndpointBuilder()
626+
.addPathPartAsIs("_ml", "data_frame", "analytics")
627+
.addPathPart(Strings.collectionToCommaDelimitedString(getStatsRequest.getIds()))
628+
.addPathPartAsIs("_stats")
629+
.build();
630+
Request request = new Request(HttpGet.METHOD_NAME, endpoint);
631+
RequestConverters.Params params = new RequestConverters.Params();
632+
if (getStatsRequest.getPageParams() != null) {
633+
PageParams pageParams = getStatsRequest.getPageParams();
634+
if (pageParams.getFrom() != null) {
635+
params.putParam(PageParams.FROM.getPreferredName(), pageParams.getFrom().toString());
636+
}
637+
if (pageParams.getSize() != null) {
638+
params.putParam(PageParams.SIZE.getPreferredName(), pageParams.getSize().toString());
639+
}
640+
}
641+
if (getStatsRequest.getAllowNoMatch() != null) {
642+
params.putParam(GetDataFrameAnalyticsStatsRequest.ALLOW_NO_MATCH.getPreferredName(),
643+
Boolean.toString(getStatsRequest.getAllowNoMatch()));
644+
}
645+
request.addParameters(params.asMap());
646+
return request;
647+
}
648+
649+
static Request startDataFrameAnalytics(StartDataFrameAnalyticsRequest startRequest) {
650+
String endpoint = new EndpointBuilder()
651+
.addPathPartAsIs("_ml", "data_frame", "analytics")
652+
.addPathPart(startRequest.getId())
653+
.addPathPartAsIs("_start")
654+
.build();
655+
Request request = new Request(HttpPost.METHOD_NAME, endpoint);
656+
RequestConverters.Params params = new RequestConverters.Params();
657+
if (startRequest.getTimeout() != null) {
658+
params.withTimeout(startRequest.getTimeout());
659+
}
660+
request.addParameters(params.asMap());
661+
return request;
662+
}
663+
664+
static Request stopDataFrameAnalytics(StopDataFrameAnalyticsRequest stopRequest) {
665+
String endpoint = new EndpointBuilder()
666+
.addPathPartAsIs("_ml", "data_frame", "analytics")
667+
.addPathPart(stopRequest.getId())
668+
.addPathPartAsIs("_stop")
669+
.build();
670+
Request request = new Request(HttpPost.METHOD_NAME, endpoint);
671+
RequestConverters.Params params = new RequestConverters.Params();
672+
if (stopRequest.getTimeout() != null) {
673+
params.withTimeout(stopRequest.getTimeout());
674+
}
675+
if (stopRequest.getAllowNoMatch() != null) {
676+
params.putParam(
677+
StopDataFrameAnalyticsRequest.ALLOW_NO_MATCH.getPreferredName(), Boolean.toString(stopRequest.getAllowNoMatch()));
678+
}
679+
request.addParameters(params.asMap());
680+
return request;
681+
}
682+
683+
static Request deleteDataFrameAnalytics(DeleteDataFrameAnalyticsRequest deleteRequest) {
684+
String endpoint = new EndpointBuilder()
685+
.addPathPartAsIs("_ml", "data_frame", "analytics")
686+
.addPathPart(deleteRequest.getId())
687+
.build();
688+
return new Request(HttpDelete.METHOD_NAME, endpoint);
689+
}
690+
691+
static Request evaluateDataFrame(EvaluateDataFrameRequest evaluateRequest) throws IOException {
692+
String endpoint = new EndpointBuilder()
693+
.addPathPartAsIs("_ml", "data_frame", "_evaluate")
694+
.build();
695+
Request request = new Request(HttpPost.METHOD_NAME, endpoint);
696+
request.setEntity(createEntity(evaluateRequest, REQUEST_BODY_CONTENT_TYPE));
697+
return request;
698+
}
699+
584700
static Request putFilter(PutFilterRequest putFilterRequest) throws IOException {
585701
String endpoint = new EndpointBuilder()
586702
.addPathPartAsIs("_ml")

0 commit comments

Comments
 (0)