Skip to content

[7.x][ML] Prevent node potentially going out of memory due to loading quantiles #70381

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -654,19 +654,23 @@ public void testGetSnapshots() {
indexModelSnapshot(new ModelSnapshot.Builder(jobId).setSnapshotId("snap_2")
.setTimestamp(Date.from(Instant.ofEpochMilli(10)))
.setMinVersion(Version.V_7_4_0)
.setQuantiles(new Quantiles(jobId, Date.from(Instant.ofEpochMilli(10)), randomAlphaOfLength(20)))
.build());
indexModelSnapshot(new ModelSnapshot.Builder(jobId).setSnapshotId("snap_1")
.setTimestamp(Date.from(Instant.ofEpochMilli(11)))
.setMinVersion(Version.V_7_2_0)
.setQuantiles(new Quantiles(jobId, Date.from(Instant.ofEpochMilli(11)), randomAlphaOfLength(20)))
.build());
indexModelSnapshot(new ModelSnapshot.Builder(jobId).setSnapshotId("other_snap")
.setTimestamp(Date.from(Instant.ofEpochMilli(12)))
.setMinVersion(Version.V_7_3_0)
.setQuantiles(new Quantiles(jobId, Date.from(Instant.ofEpochMilli(12)), randomAlphaOfLength(20)))
.build());
createJob("other_job");
indexModelSnapshot(new ModelSnapshot.Builder("other_job").setSnapshotId("other_snap")
.setTimestamp(Date.from(Instant.ofEpochMilli(10)))
.setMinVersion(Version.V_7_9_0)
.setQuantiles(new Quantiles("other_job", Date.from(Instant.ofEpochMilli(10)), randomAlphaOfLength(20)))
.build());
// Add a snapshot WITHOUT a min version.
client().prepareIndex(AnomalyDetectorsIndex.jobResultsAliasedName("other_job"), MapperService.SINGLE_MAPPING_NAME)
Expand All @@ -682,13 +686,17 @@ public void testGetSnapshots() {
jobProvider.modelSnapshots(jobId, 0, 4, "9", "15", "", false, "snap_2,snap_1", future::onResponse, future::onFailure);
List<ModelSnapshot> snapshots = future.actionGet().results();
assertThat(snapshots.get(0).getSnapshotId(), equalTo("snap_2"));
assertNull(snapshots.get(0).getQuantiles());
assertThat(snapshots.get(1).getSnapshotId(), equalTo("snap_1"));
assertNull(snapshots.get(1).getQuantiles());

future = new PlainActionFuture<>();
jobProvider.modelSnapshots(jobId, 0, 4, "9", "15", "", false, "snap_*", future::onResponse, future::onFailure);
snapshots = future.actionGet().results();
assertThat(snapshots.get(0).getSnapshotId(), equalTo("snap_2"));
assertThat(snapshots.get(1).getSnapshotId(), equalTo("snap_1"));
assertNull(snapshots.get(0).getQuantiles());
assertNull(snapshots.get(1).getQuantiles());

future = new PlainActionFuture<>();
jobProvider.modelSnapshots(jobId, 0, 4, "9", "15", "", false, "snap_*,other_snap", future::onResponse, future::onFailure);
Expand Down Expand Up @@ -721,6 +729,14 @@ public void testGetSnapshots() {
assertThat(snapshots.get(2).getSnapshotId(), equalTo("other_snap"));
assertThat(snapshots.get(3).getSnapshotId(), equalTo("snap_2"));
assertThat(snapshots.get(4).getSnapshotId(), equalTo("other_snap"));

// assert that quantiles are not loaded
assertNull(snapshots.get(0).getQuantiles());
assertNull(snapshots.get(1).getQuantiles());
assertNull(snapshots.get(2).getQuantiles());
assertNull(snapshots.get(3).getQuantiles());
assertNull(snapshots.get(4).getQuantiles());

}

public void testGetAutodetectParams() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,9 @@
import org.elasticsearch.tasks.Task;
import org.elasticsearch.transport.TransportService;
import org.elasticsearch.xpack.core.ml.action.GetModelSnapshotsAction;
import org.elasticsearch.xpack.core.action.util.QueryPage;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSnapshot;
import org.elasticsearch.xpack.ml.job.JobManager;
import org.elasticsearch.xpack.ml.job.persistence.JobResultsProvider;

import java.util.stream.Collectors;

public class TransportGetModelSnapshotsAction extends HandledTransportAction<GetModelSnapshotsAction.Request,
GetModelSnapshotsAction.Response> {

Expand Down Expand Up @@ -74,16 +70,7 @@ private void getModelSnapshots(GetModelSnapshotsAction.Request request, ActionLi
request.getSort(),
request.getDescOrder(),
request.getSnapshotId(),
page -> listener.onResponse(new GetModelSnapshotsAction.Response(clearQuantiles(page))),
page -> listener.onResponse(new GetModelSnapshotsAction.Response(page)),
listener::onFailure);
}

public static QueryPage<ModelSnapshot> clearQuantiles(QueryPage<ModelSnapshot> page) {
if (page.results() == null) {
return page;
}
return new QueryPage<>(page.results().stream().map(snapshot ->
new ModelSnapshot.Builder(snapshot).setQuantiles(null).build())
.collect(Collectors.toList()), page.count(), page.getResultsField());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
import org.elasticsearch.search.aggregations.metrics.ExtendedStats;
import org.elasticsearch.search.aggregations.metrics.Stats;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.search.sort.FieldSortBuilder;
import org.elasticsearch.search.sort.SortBuilders;
import org.elasticsearch.search.sort.SortOrder;
Expand Down Expand Up @@ -156,6 +157,13 @@ public class JobResultsProvider {
private static final double ESTABLISHED_MEMORY_CV_THRESHOLD = 0.1;
public static final Version HIDDEN_INTRODUCED_VERSION = Version.V_7_7_0;

// filter for quantiles in modelSnapshots to avoid memory overhead
private static final FetchSourceContext REMOVE_QUANTILES_FROM_SOURCE = new FetchSourceContext(
true,
null,
new String[] { ModelSnapshot.QUANTILES.getPreferredName() }
);

private final Client client;
private final Settings settings;
private final IndexNameExpressionResolver resolver;
Expand Down Expand Up @@ -1022,6 +1030,8 @@ public void getModelSnapshot(String jobId, @Nullable String modelSnapshotId, Con
/**
* Get model snapshots for the job ordered by descending timestamp (newest first).
*
* Note: quantiles are removed from the results.
*
* @param jobId the job id
* @param from number of snapshots to from
* @param size number of snapshots to retrieve
Expand All @@ -1034,6 +1044,8 @@ public void modelSnapshots(String jobId, int from, int size, Consumer<QueryPage<
/**
* Get model snapshots for the job ordered by descending restore priority.
*
* Note: quantiles are removed from the results.
*
* @param jobId the job id
* @param from number of snapshots to from
* @param size number of snapshots to retrieve
Expand Down Expand Up @@ -1098,6 +1110,7 @@ private void modelSnapshots(String jobId,
sourceBuilder.from(from);
sourceBuilder.size(size);
sourceBuilder.trackTotalHits(true);
sourceBuilder.fetchSource(REMOVE_QUANTILES_FROM_SOURCE);
searchRequest.source(sourceBuilder);
executeAsyncWithOrigin(client.threadPool().getThreadContext(), ML_ORIGIN, searchRequest,
ActionListener.<SearchResponse>wrap(searchResponse -> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,9 @@
*/
package org.elasticsearch.xpack.ml.modelsnapshots;

import org.elasticsearch.common.ParseField;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.core.ml.action.GetModelSnapshotsAction;
import org.elasticsearch.xpack.ml.action.TransportGetModelSnapshotsAction;
import org.elasticsearch.xpack.core.action.util.PageParams;
import org.elasticsearch.xpack.core.action.util.QueryPage;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSnapshot;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.Quantiles;

import java.util.Arrays;
import java.util.Date;
import org.elasticsearch.xpack.core.ml.action.GetModelSnapshotsAction;

public class GetModelSnapshotsTests extends ESTestCase {

Expand All @@ -31,17 +23,4 @@ public void testModelSnapshots_GivenNegativeSize() {
() -> new GetModelSnapshotsAction.Request("foo", null).setPageParams(new PageParams(10, -5)));
assertEquals("Parameter [size] cannot be < 0", e.getMessage());
}

public void testModelSnapshots_clearQuantiles() {
ModelSnapshot m1 = new ModelSnapshot.Builder("jobId").setQuantiles(
new Quantiles("jobId", new Date(), "quantileState")).build();
ModelSnapshot m2 = new ModelSnapshot.Builder("jobId").build();

QueryPage<ModelSnapshot> page = new QueryPage<>(Arrays.asList(m1, m2), 2, new ParseField("field"));
page = TransportGetModelSnapshotsAction.clearQuantiles(page);
assertEquals(2, page.results().size());
for (ModelSnapshot modelSnapshot : page.results()) {
assertNull(modelSnapshot.getQuantiles());
}
}
}