Skip to content

[ML] Add logging for failing PyTorch test #81044

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -234,20 +234,22 @@ public void testDeploymentStats() throws IOException {
CheckedBiConsumer<String, AllocationStatus.State, IOException> assertAtLeast = (modelId, state) -> {
startDeployment(modelId, state.toString());
Response response = getTrainedModelStats(modelId);
List<Map<String, Object>> stats = (List<Map<String, Object>>) entityAsMap(response).get("trained_model_stats");
var responseMap = entityAsMap(response);
List<Map<String, Object>> stats = (List<Map<String, Object>>) responseMap.get("trained_model_stats");
assertThat(stats, hasSize(1));
String statusState = (String) XContentMapValues.extractValue("deployment_stats.allocation_status.state", stats.get(0));
assertThat(stats.toString(), statusState, is(not(nullValue())));
assertThat(responseMap.toString(), statusState, is(not(nullValue())));
assertThat(AllocationStatus.State.fromString(statusState), greaterThanOrEqualTo(state));
Integer byteSize = (Integer) XContentMapValues.extractValue("deployment_stats.model_size_bytes", stats.get(0));
assertThat(byteSize, is(not(nullValue())));
assertThat(responseMap.toString(), byteSize, is(not(nullValue())));
assertThat(byteSize, equalTo((int) RAW_MODEL_SIZE));

Response humanResponse = client().performRequest(new Request("GET", "/_ml/trained_models/" + modelId + "/_stats?human"));
stats = (List<Map<String, Object>>) entityAsMap(humanResponse).get("trained_model_stats");
var humanResponseMap = entityAsMap(humanResponse);
stats = (List<Map<String, Object>>) humanResponseMap.get("trained_model_stats");
assertThat(stats, hasSize(1));
String stringBytes = (String) XContentMapValues.extractValue("deployment_stats.model_size", stats.get(0));
assertThat(stringBytes, is(not(nullValue())));
assertThat("stats response: " + responseMap + " human stats response" + humanResponseMap, stringBytes, is(not(nullValue())));
assertThat(stringBytes, equalTo("1.5kb"));
stopDeployment(model);
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

package org.elasticsearch.xpack.ml.action;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.FailedNodeException;
import org.elasticsearch.action.TaskOperationFailure;
Expand Down Expand Up @@ -54,6 +56,8 @@ public class TransportGetDeploymentStatsAction extends TransportTasksAction<
GetDeploymentStatsAction.Response,
AllocationStats> {

private static final Logger logger = LogManager.getLogger(TransportGetDeploymentStatsAction.class);

@Inject
public TransportGetDeploymentStatsAction(
TransportService transportService,
Expand Down Expand Up @@ -129,9 +133,6 @@ protected void doExecute(
}
}

// check request has been satisfied
ExpandedIdsMatcher requiredIdsMatcher = new ExpandedIdsMatcher(tokenizedRequestIds, true);
requiredIdsMatcher.filterMatchedIds(matchedDeploymentIds);
if (matchedDeploymentIds.isEmpty()) {
listener.onResponse(
new GetDeploymentStatsAction.Response(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), 0L)
Expand All @@ -154,8 +155,7 @@ protected void doExecute(
.collect(Collectors.toList());
// Set the allocation state and reason if we have it
for (AllocationStats stats : updatedResponse.getStats().results()) {
Optional<TrainedModelAllocation> modelAllocation = Optional.ofNullable(allocation.getModelAllocation(stats.getModelId()));
TrainedModelAllocation trainedModelAllocation = modelAllocation.orElse(null);
TrainedModelAllocation trainedModelAllocation = allocation.getModelAllocation(stats.getModelId());
if (trainedModelAllocation != null) {
stats.setState(trainedModelAllocation.getAllocationState()).setReason(trainedModelAllocation.getReason().orElse(null));
if (trainedModelAllocation.getAllocationState().isAnyOf(AllocationState.STARTED, AllocationState.STARTING)) {
Expand Down Expand Up @@ -274,6 +274,8 @@ static GetDeploymentStatsAction.Response addFailedRoutes(

nodeStats.sort(Comparator.comparing(n -> n.getNode().getId()));

// debug logging added for https://github.com/elastic/elasticsearch/issues/80819
logger.debug("[{}] deployment stats for non-started deployment", modelId);
updatedAllocationStats.add(new AllocationStats(modelId, null, null, null, null, allocation.getStartTime(), nodeStats));
}
}
Expand Down