Skip to content

Commit c41357b

Browse files
authored
Log more info when search ops higher than expected (#61108)
We have seen a situation where the total search operations are higher than expected. Unfortunately, we did not have enough info to figure it out. This commit adds the failures to the error to provide more context and adjusts the log level in case of failure to debug.
1 parent 3ad4ee7 commit c41357b

File tree

1 file changed

+9
-25
lines changed

1 file changed

+9
-25
lines changed

server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -377,36 +377,20 @@ private void onShardFailure(final int shardIndex, @Nullable ShardRouting shard,
377377
onShardFailure(shardIndex, shardTarget, e);
378378
final ShardRouting nextShard = shardIt.nextOrNull();
379379
final boolean lastShard = nextShard == null;
380+
logger.debug(() -> new ParameterizedMessage("{}: Failed to execute [{}] lastShard [{}]",
381+
shard != null ? shard.shortSummary() : shardIt.shardId(), request, lastShard), e);
380382
if (lastShard) {
381383
onShardGroupFailure(shardIndex, shardTarget, e);
382384
}
383-
384-
if (totalOps.incrementAndGet() == expectedTotalOps) {
385-
if (logger.isDebugEnabled()) {
386-
if (e != null && !TransportActions.isShardNotAvailableException(e)) {
387-
logger.debug(new ParameterizedMessage(
388-
"{}: Failed to execute [{}]", shard != null ? shard.shortSummary() : shardIt.shardId(), request), e);
389-
} else if (logger.isTraceEnabled()) {
390-
logger.trace(new ParameterizedMessage("{}: Failed to execute [{}]", shard, request), e);
391-
}
392-
}
385+
final int totalOps = this.totalOps.incrementAndGet();
386+
if (totalOps == expectedTotalOps) {
393387
onPhaseDone();
388+
} else if (totalOps > expectedTotalOps) {
389+
throw new AssertionError("unexpected higher total ops [" + totalOps + "] compared to expected [" + expectedTotalOps + "]",
390+
new SearchPhaseExecutionException(getName(), "Shard failures", null, buildShardFailures()));
394391
} else {
395-
// trace log this exception
396-
logger.trace(() -> new ParameterizedMessage(
397-
"{}: Failed to execute [{}] lastShard [{}]",
398-
shard != null ? shard.shortSummary() : shardIt.shardId(), request, lastShard), e);
399392
if (lastShard == false) {
400393
performPhaseOnShard(shardIndex, shardIt, nextShard);
401-
} else {
402-
// no more shards active, add a failure
403-
if (logger.isDebugEnabled() && !logger.isTraceEnabled()) { // do not double log this exception
404-
if (e != null && !TransportActions.isShardNotAvailableException(e)) {
405-
logger.debug(new ParameterizedMessage(
406-
"{}: Failed to execute [{}] lastShard [{}]",
407-
shard != null ? shard.shortSummary() : shardIt.shardId(), request, lastShard), e);
408-
}
409-
}
410394
}
411395
}
412396
}
@@ -505,8 +489,8 @@ private void successfulShardExecution(SearchShardIterator shardsIt) {
505489
if (xTotalOps == expectedTotalOps) {
506490
onPhaseDone();
507491
} else if (xTotalOps > expectedTotalOps) {
508-
throw new AssertionError("unexpected higher total ops [" + xTotalOps + "] compared to expected ["
509-
+ expectedTotalOps + "]");
492+
throw new AssertionError("unexpected higher total ops [" + xTotalOps + "] compared to expected [" + expectedTotalOps + "]",
493+
new SearchPhaseExecutionException(getName(), "Shard failures", null, buildShardFailures()));
510494
}
511495
}
512496

0 commit comments

Comments
 (0)