Skip to content

Commit e8fa1dc

Browse files
committed
ML: Add support for rollup Indexes in Datafeeds (#34654)
* Adding rollup support for datafeeds * Fixing tests and adjusting formatting * minor formatting chagne * fixing some syntax and removing redundancies * Refactoring and fixing failing test * Refactoring, adding paranoid null check * Moving rollup into the aggregation package * making AggregationToJsonProcessor package private again * Addressing test failure * Fixing validations, chunking * Addressing failing test * rolling back RollupJobCaps changes * Adding comment and cleaning up test * Addressing review comments and test failures * Moving builder logic into separate methods * Addressing PR comments, adding test for rollup permissions * Fixing test failure * Adding rollup priv check on datafeed put * Handling missing index when getting caps * Fixing unused import
1 parent 4add939 commit e8fa1dc

File tree

17 files changed

+1298
-224
lines changed

17 files changed

+1298
-224
lines changed

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/datafeed/DatafeedConfig.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,9 +430,9 @@ private TimeValue defaultFrequencyTarget(TimeValue bucketSpan) {
430430

431431
public static class Builder {
432432

433+
public static final int DEFAULT_AGGREGATION_CHUNKING_BUCKETS = 1000;
433434
private static final TimeValue MIN_DEFAULT_QUERY_DELAY = TimeValue.timeValueMinutes(1);
434435
private static final TimeValue MAX_DEFAULT_QUERY_DELAY = TimeValue.timeValueMinutes(2);
435-
private static final int DEFAULT_AGGREGATION_CHUNKING_BUCKETS = 1000;
436436

437437
private String id;
438438
private String jobId;

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/datafeed/extractor/ExtractorUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ private static long validateAndGetDateHistogramInterval(DateHistogramAggregation
139139
}
140140
}
141141

142-
static long validateAndGetCalendarInterval(String calendarInterval) {
142+
public static long validateAndGetCalendarInterval(String calendarInterval) {
143143
TimeValue interval;
144144
DateTimeUnit dateTimeUnit = DateHistogramAggregationBuilder.DATE_FIELD_UNITS.get(calendarInterval);
145145
if (dateTimeUnit != null) {

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/rollup/action/RollupSearchAction.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@ public SearchResponse newResponse() {
3030
return new SearchResponse();
3131
}
3232

33-
static class RequestBuilder extends ActionRequestBuilder<SearchRequest, SearchResponse, RequestBuilder> {
33+
public static class RequestBuilder extends ActionRequestBuilder<SearchRequest, SearchResponse, RequestBuilder> {
34+
public RequestBuilder(ElasticsearchClient client, SearchRequest searchRequest) {
35+
super(client, INSTANCE, searchRequest);
36+
}
37+
3438
RequestBuilder(ElasticsearchClient client) {
3539
super(client, INSTANCE, new SearchRequest());
3640
}

x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/DatafeedJobsRestIT.java

Lines changed: 249 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.elasticsearch.test.rest.ESRestTestCase;
1818
import org.elasticsearch.xpack.core.ml.integration.MlRestTestStateCleaner;
1919
import org.elasticsearch.xpack.core.ml.notifications.AuditorField;
20+
import org.elasticsearch.xpack.core.rollup.job.RollupJob;
2021
import org.elasticsearch.xpack.ml.MachineLearning;
2122
import org.junit.After;
2223
import org.junit.Before;
@@ -27,6 +28,7 @@
2728
import java.util.Date;
2829
import java.util.List;
2930
import java.util.Locale;
31+
import java.util.concurrent.TimeUnit;
3032
import java.util.stream.Collectors;
3133

3234
import static org.elasticsearch.xpack.core.security.authc.support.UsernamePasswordToken.basicAuthHeaderValue;
@@ -63,6 +65,16 @@ private void setupDataAccessRole(String index) throws IOException {
6365
client().performRequest(request);
6466
}
6567

68+
private void setupFullAccessRole(String index) throws IOException {
69+
Request request = new Request("PUT", "/_xpack/security/role/test_data_access");
70+
request.setJsonEntity("{"
71+
+ " \"indices\" : ["
72+
+ " { \"names\": [\"" + index + "\"], \"privileges\": [\"all\"] }"
73+
+ " ]"
74+
+ "}");
75+
client().performRequest(request);
76+
}
77+
6678
private void setupUser(String user, List<String> roles) throws IOException {
6779
String password = new String(SecuritySettingsSourceField.TEST_PASSWORD_SECURE_STRING.getChars());
6880

@@ -359,7 +371,75 @@ public void testInsufficientSearchPrivilegesOnPut() throws Exception {
359371

360372
assertThat(e.getMessage(), containsString("Cannot create datafeed"));
361373
assertThat(e.getMessage(),
362-
containsString("user ml_admin lacks permissions on the indices to be searched"));
374+
containsString("user ml_admin lacks permissions on the indices"));
375+
}
376+
377+
public void testInsufficientSearchPrivilegesOnPutWithRollup() throws Exception {
378+
setupDataAccessRole("airline-data-aggs-rollup");
379+
String jobId = "privs-put-job-rollup";
380+
Request createJobRequest = new Request("PUT", MachineLearning.BASE_PATH + "anomaly_detectors/" + jobId);
381+
createJobRequest.setJsonEntity("{\n"
382+
+ " \"description\": \"Aggs job\",\n"
383+
+ " \"analysis_config\": {\n"
384+
+ " \"bucket_span\": \"1h\",\n"
385+
+ " \"summary_count_field_name\": \"doc_count\",\n"
386+
+ " \"detectors\": [\n"
387+
+ " {\n"
388+
+ " \"function\": \"mean\",\n"
389+
+ " \"field_name\": \"responsetime\",\n"
390+
+ " \"by_field_name\": \"airline\"\n"
391+
+ " }\n"
392+
+ " ]\n"
393+
+ " },\n"
394+
+ " \"data_description\": {\"time_field\": \"time stamp\"}\n"
395+
+ "}");
396+
client().performRequest(createJobRequest);
397+
398+
String rollupJobId = "rollup-" + jobId;
399+
Request createRollupRequest = new Request("PUT", "/_xpack/rollup/job/" + rollupJobId);
400+
createRollupRequest.setJsonEntity("{\n"
401+
+ "\"index_pattern\": \"airline-data-aggs\",\n"
402+
+ " \"rollup_index\": \"airline-data-aggs-rollup\",\n"
403+
+ " \"cron\": \"*/30 * * * * ?\",\n"
404+
+ " \"page_size\" :1000,\n"
405+
+ " \"groups\" : {\n"
406+
+ " \"date_histogram\": {\n"
407+
+ " \"field\": \"time stamp\",\n"
408+
+ " \"interval\": \"2m\",\n"
409+
+ " \"delay\": \"7d\"\n"
410+
+ " },\n"
411+
+ " \"terms\": {\n"
412+
+ " \"fields\": [\"airline\"]\n"
413+
+ " }"
414+
+ " },\n"
415+
+ " \"metrics\": [\n"
416+
+ " {\n"
417+
+ " \"field\": \"responsetime\",\n"
418+
+ " \"metrics\": [\"avg\",\"min\",\"max\",\"sum\"]\n"
419+
+ " },\n"
420+
+ " {\n"
421+
+ " \"field\": \"time stamp\",\n"
422+
+ " \"metrics\": [\"min\",\"max\"]\n"
423+
+ " }\n"
424+
+ " ]\n"
425+
+ "}");
426+
client().performRequest(createRollupRequest);
427+
428+
String datafeedId = "datafeed-" + jobId;
429+
String aggregations = "{\"buckets\":{\"date_histogram\":{\"field\":\"time stamp\",\"interval\":3600000},"
430+
+ "\"aggregations\":{"
431+
+ "\"time stamp\":{\"max\":{\"field\":\"time stamp\"}},"
432+
+ "\"responsetime\":{\"avg\":{\"field\":\"responsetime\"}}}}}";
433+
434+
435+
ResponseException e = expectThrows(ResponseException.class, () ->
436+
new DatafeedBuilder(datafeedId, jobId, "airline-data-aggs-rollup", "doc")
437+
.setAggregations(aggregations)
438+
.setAuthHeader(BASIC_AUTH_VALUE_ML_ADMIN_WITH_SOME_DATA_ACCESS) //want to search, but no admin access
439+
.build());
440+
assertThat(e.getMessage(), containsString("Cannot create datafeed"));
441+
assertThat(e.getMessage(),
442+
containsString("user ml_admin_plus_data lacks permissions on the indices"));
363443
}
364444

365445
public void testInsufficientSearchPrivilegesOnPreview() throws Exception {
@@ -615,7 +695,7 @@ public void testLookbackWithoutPermissions() throws Exception {
615695
// There should be a notification saying that there was a problem extracting data
616696
client().performRequest(new Request("POST", "/_refresh"));
617697
Response notificationsResponse = client().performRequest(
618-
new Request("GET", AuditorField.NOTIFICATIONS_INDEX + "/_search?q=job_id:" + jobId));
698+
new Request("GET", AuditorField.NOTIFICATIONS_INDEX + "/_search?size=1000&q=job_id:" + jobId));
619699
String notificationsResponseAsString = EntityUtils.toString(notificationsResponse.getEntity());
620700
assertThat(notificationsResponseAsString, containsString("\"message\":\"Datafeed is encountering errors extracting data: " +
621701
"action [indices:data/read/search] is unauthorized for user [ml_admin_plus_data]\""));
@@ -663,6 +743,171 @@ public void testLookbackWithPipelineBucketAgg() throws Exception {
663743
assertThat(jobStatsResponseAsString, containsString("\"missing_field_count\":0"));
664744
}
665745

746+
public void testLookbackOnlyGivenAggregationsWithHistogramAndRollupIndex() throws Exception {
747+
String jobId = "aggs-histogram-rollup-job";
748+
Request createJobRequest = new Request("PUT", MachineLearning.BASE_PATH + "anomaly_detectors/" + jobId);
749+
createJobRequest.setJsonEntity("{\n"
750+
+ " \"description\": \"Aggs job\",\n"
751+
+ " \"analysis_config\": {\n"
752+
+ " \"bucket_span\": \"1h\",\n"
753+
+ " \"summary_count_field_name\": \"doc_count\",\n"
754+
+ " \"detectors\": [\n"
755+
+ " {\n"
756+
+ " \"function\": \"mean\",\n"
757+
+ " \"field_name\": \"responsetime\",\n"
758+
+ " \"by_field_name\": \"airline\"\n"
759+
+ " }\n"
760+
+ " ]\n"
761+
+ " },\n"
762+
+ " \"data_description\": {\"time_field\": \"time stamp\"}\n"
763+
+ "}");
764+
client().performRequest(createJobRequest);
765+
766+
String rollupJobId = "rollup-" + jobId;
767+
Request createRollupRequest = new Request("PUT", "/_xpack/rollup/job/" + rollupJobId);
768+
createRollupRequest.setJsonEntity("{\n"
769+
+ "\"index_pattern\": \"airline-data-aggs\",\n"
770+
+ " \"rollup_index\": \"airline-data-aggs-rollup\",\n"
771+
+ " \"cron\": \"*/30 * * * * ?\",\n"
772+
+ " \"page_size\" :1000,\n"
773+
+ " \"groups\" : {\n"
774+
+ " \"date_histogram\": {\n"
775+
+ " \"field\": \"time stamp\",\n"
776+
+ " \"interval\": \"2m\",\n"
777+
+ " \"delay\": \"7d\"\n"
778+
+ " },\n"
779+
+ " \"terms\": {\n"
780+
+ " \"fields\": [\"airline\"]\n"
781+
+ " }"
782+
+ " },\n"
783+
+ " \"metrics\": [\n"
784+
+ " {\n"
785+
+ " \"field\": \"responsetime\",\n"
786+
+ " \"metrics\": [\"avg\",\"min\",\"max\",\"sum\"]\n"
787+
+ " },\n"
788+
+ " {\n"
789+
+ " \"field\": \"time stamp\",\n"
790+
+ " \"metrics\": [\"min\",\"max\"]\n"
791+
+ " }\n"
792+
+ " ]\n"
793+
+ "}");
794+
client().performRequest(createRollupRequest);
795+
client().performRequest(new Request("POST", "/_xpack/rollup/job/" + rollupJobId + "/_start"));
796+
797+
assertBusy(() -> {
798+
Response getRollup = client().performRequest(new Request("GET", "/_xpack/rollup/job/" + rollupJobId));
799+
String body = EntityUtils.toString(getRollup.getEntity());
800+
assertThat(body, containsString("\"job_state\":\"started\""));
801+
assertThat(body, containsString("\"rollups_indexed\":4"));
802+
}, 60, TimeUnit.SECONDS);
803+
804+
client().performRequest(new Request("POST", "/_xpack/rollup/job/" + rollupJobId + "/_stop"));
805+
assertBusy(() -> {
806+
Response getRollup = client().performRequest(new Request("GET", "/_xpack/rollup/job/" + rollupJobId));
807+
assertThat(EntityUtils.toString(getRollup.getEntity()), containsString("\"job_state\":\"stopped\""));
808+
}, 60, TimeUnit.SECONDS);
809+
810+
final Request refreshRollupIndex = new Request("POST", "airline-data-aggs-rollup/_refresh");
811+
client().performRequest(refreshRollupIndex);
812+
813+
String datafeedId = "datafeed-" + jobId;
814+
String aggregations = "{\"buckets\":{\"date_histogram\":{\"field\":\"time stamp\",\"interval\":3600000},"
815+
+ "\"aggregations\":{"
816+
+ "\"time stamp\":{\"max\":{\"field\":\"time stamp\"}},"
817+
+ "\"responsetime\":{\"avg\":{\"field\":\"responsetime\"}}}}}";
818+
new DatafeedBuilder(datafeedId, jobId, "airline-data-aggs-rollup", "response").setAggregations(aggregations).build();
819+
openJob(client(), jobId);
820+
821+
startDatafeedAndWaitUntilStopped(datafeedId);
822+
waitUntilJobIsClosed(jobId);
823+
Response jobStatsResponse = client().performRequest(new Request("GET",
824+
MachineLearning.BASE_PATH + "anomaly_detectors/" + jobId + "/_stats"));
825+
String jobStatsResponseAsString = EntityUtils.toString(jobStatsResponse.getEntity());
826+
assertThat(jobStatsResponseAsString, containsString("\"input_record_count\":2"));
827+
assertThat(jobStatsResponseAsString, containsString("\"processed_record_count\":2"));
828+
}
829+
830+
public void testLookbackWithoutPermissionsAndRollup() throws Exception {
831+
setupFullAccessRole("airline-data-aggs-rollup");
832+
String jobId = "rollup-permission-test-network-job";
833+
Request createJobRequest = new Request("PUT", MachineLearning.BASE_PATH + "anomaly_detectors/" + jobId);
834+
createJobRequest.setJsonEntity("{\n"
835+
+ " \"description\": \"Aggs job\",\n"
836+
+ " \"analysis_config\": {\n"
837+
+ " \"bucket_span\": \"1h\",\n"
838+
+ " \"summary_count_field_name\": \"doc_count\",\n"
839+
+ " \"detectors\": [\n"
840+
+ " {\n"
841+
+ " \"function\": \"mean\",\n"
842+
+ " \"field_name\": \"responsetime\",\n"
843+
+ " \"by_field_name\": \"airline\"\n"
844+
+ " }\n"
845+
+ " ]\n"
846+
+ " },\n"
847+
+ " \"data_description\": {\"time_field\": \"time stamp\"}\n"
848+
+ "}");
849+
client().performRequest(createJobRequest);
850+
851+
String rollupJobId = "rollup-" + jobId;
852+
Request createRollupRequest = new Request("PUT", "/_xpack/rollup/job/" + rollupJobId);
853+
createRollupRequest.setJsonEntity("{\n"
854+
+ "\"index_pattern\": \"airline-data-aggs\",\n"
855+
+ " \"rollup_index\": \"airline-data-aggs-rollup\",\n"
856+
+ " \"cron\": \"*/30 * * * * ?\",\n"
857+
+ " \"page_size\" :1000,\n"
858+
+ " \"groups\" : {\n"
859+
+ " \"date_histogram\": {\n"
860+
+ " \"field\": \"time stamp\",\n"
861+
+ " \"interval\": \"2m\",\n"
862+
+ " \"delay\": \"7d\"\n"
863+
+ " },\n"
864+
+ " \"terms\": {\n"
865+
+ " \"fields\": [\"airline\"]\n"
866+
+ " }"
867+
+ " },\n"
868+
+ " \"metrics\": [\n"
869+
+ " {\n"
870+
+ " \"field\": \"responsetime\",\n"
871+
+ " \"metrics\": [\"avg\",\"min\",\"max\",\"sum\"]\n"
872+
+ " },\n"
873+
+ " {\n"
874+
+ " \"field\": \"time stamp\",\n"
875+
+ " \"metrics\": [\"min\",\"max\"]\n"
876+
+ " }\n"
877+
+ " ]\n"
878+
+ "}");
879+
client().performRequest(createRollupRequest);
880+
881+
String datafeedId = "datafeed-" + jobId;
882+
String aggregations = "{\"buckets\":{\"date_histogram\":{\"field\":\"time stamp\",\"interval\":3600000},"
883+
+ "\"aggregations\":{"
884+
+ "\"time stamp\":{\"max\":{\"field\":\"time stamp\"}},"
885+
+ "\"responsetime\":{\"avg\":{\"field\":\"responsetime\"}}}}}";
886+
887+
888+
// At the time we create the datafeed the user can access the network-data index that we have access to
889+
new DatafeedBuilder(datafeedId, jobId, "airline-data-aggs-rollup", "doc")
890+
.setAggregations(aggregations)
891+
.setChunkingTimespan("300s")
892+
.setAuthHeader(BASIC_AUTH_VALUE_ML_ADMIN_WITH_SOME_DATA_ACCESS)
893+
.build();
894+
895+
// Change the role so that the user can no longer access network-data
896+
setupFullAccessRole("some-other-data");
897+
898+
openJob(client(), jobId);
899+
900+
startDatafeedAndWaitUntilStopped(datafeedId, BASIC_AUTH_VALUE_ML_ADMIN_WITH_SOME_DATA_ACCESS);
901+
waitUntilJobIsClosed(jobId);
902+
// There should be a notification saying that there was a problem extracting data
903+
client().performRequest(new Request("POST", "/_refresh"));
904+
Response notificationsResponse = client().performRequest(
905+
new Request("GET", AuditorField.NOTIFICATIONS_INDEX + "/_search?size=1000&q=job_id:" + jobId));
906+
String notificationsResponseAsString = EntityUtils.toString(notificationsResponse.getEntity());
907+
assertThat(notificationsResponseAsString, containsString("\"message\":\"Datafeed is encountering errors extracting data: " +
908+
"action [indices:admin/xpack/rollup/search] is unauthorized for user [ml_admin_plus_data]\""));
909+
}
910+
666911
public void testRealtime() throws Exception {
667912
String jobId = "job-realtime-1";
668913
createJob(jobId, "airline");
@@ -882,7 +1127,8 @@ public static void openJob(RestClient client, String jobId) throws IOException {
8821127
@After
8831128
public void clearMlState() throws Exception {
8841129
new MlRestTestStateCleaner(logger, adminClient()).clearMlMetadata();
885-
ESRestTestCase.waitForPendingTasks(adminClient());
1130+
// Don't check rollup jobs because we clear them in the superclass.
1131+
waitForPendingTasks(adminClient(), taskName -> taskName.startsWith(RollupJob.NAME));
8861132
}
8871133

8881134
private static class DatafeedBuilder {

0 commit comments

Comments
 (0)