Skip to content

Commit e5251bc

Browse files
[ML] Skip execution of timed out inference requests waiting in queue (#80087)
If by the time we get to execute an inference request the action has already been notified, it means the request timed out while it was waiting in the queue. We should return early from the `doRun` method to avoid unnecessary work.
1 parent 5a41fa4 commit e5251bc

File tree

1 file changed

+19
-0
lines changed

1 file changed

+19
-0
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/deployment/DeploymentManager.java

+19
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,14 @@ public void onFailure(Exception e) {
326326

327327
@Override
328328
protected void doRun() throws Exception {
329+
if (notified.get()) {
330+
// Should not execute request as it has already timed out while waiting in the queue
331+
logger.debug(
332+
() -> new ParameterizedMessage("[{}] skipping inference on request [{}] as it has timed out", modelId, requestId)
333+
);
334+
return;
335+
}
336+
329337
final String requestIdStr = String.valueOf(requestId);
330338
try {
331339
// The request builder expect a list of inputs which are then batched.
@@ -378,6 +386,17 @@ private void processResult(
378386
logger.debug(
379387
() -> new ParameterizedMessage("[{}] retrieved result for request [{}]", processContext.task.getModelId(), requestId)
380388
);
389+
if (notified.get()) {
390+
// The request has timed out. No need to spend cycles processing the result.
391+
logger.debug(
392+
() -> new ParameterizedMessage(
393+
"[{}] skipping result processing for request [{}] as the request has timed out",
394+
processContext.task.getModelId(),
395+
requestId
396+
)
397+
);
398+
return;
399+
}
381400
InferenceResults results = inferenceResultsProcessor.processResult(tokenization, pyTorchResult);
382401
logger.debug(
383402
() -> new ParameterizedMessage("[{}] processed result for request [{}]", processContext.task.getModelId(), requestId)

0 commit comments

Comments
 (0)