-
Notifications
You must be signed in to change notification settings - Fork 25.2k
Avoid sending duplicate remote failed shard requests #31313
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
99562c0
251d44c
71a2db4
6a704c7
7601a13
19baf31
dcdaba7
a102531
56da910
496a6bd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,11 +22,11 @@ | |
import com.carrotsearch.hppc.cursors.ObjectCursor; | ||
import org.apache.lucene.index.CorruptIndexException; | ||
import org.elasticsearch.Version; | ||
import org.elasticsearch.cluster.action.shard.ShardStateAction.FailedShardEntry; | ||
import org.elasticsearch.cluster.ClusterName; | ||
import org.elasticsearch.cluster.ClusterState; | ||
import org.elasticsearch.cluster.ClusterStateTaskExecutor; | ||
import org.elasticsearch.cluster.ESAllocationTestCase; | ||
import org.elasticsearch.cluster.action.shard.ShardStateAction.FailedShardEntry; | ||
import org.elasticsearch.cluster.metadata.IndexMetaData; | ||
import org.elasticsearch.cluster.metadata.MetaData; | ||
import org.elasticsearch.cluster.node.DiscoveryNodes; | ||
|
@@ -52,10 +52,10 @@ | |
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.IdentityHashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
import java.util.function.Function; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.IntStream; | ||
|
||
|
@@ -131,9 +131,14 @@ ClusterState applyFailedShards(ClusterState currentState, List<FailedShard> fail | |
tasks.addAll(failingTasks); | ||
tasks.addAll(nonExistentTasks); | ||
ClusterStateTaskExecutor.ClusterTasksResult<FailedShardEntry> result = failingExecutor.execute(currentState, tasks); | ||
Map<FailedShardEntry, ClusterStateTaskExecutor.TaskResult> taskResultMap = | ||
failingTasks.stream().collect(Collectors.toMap(Function.identity(), task -> ClusterStateTaskExecutor.TaskResult.failure(new RuntimeException("simulated applyFailedShards failure")))); | ||
taskResultMap.putAll(nonExistentTasks.stream().collect(Collectors.toMap(Function.identity(), task -> ClusterStateTaskExecutor.TaskResult.success()))); | ||
Map<FailedShardEntry, ClusterStateTaskExecutor.TaskResult> taskResultMap = new IdentityHashMap<>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what's the reason for turning this into a identity map? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously we did not override FailedShardEntry, but now we do. I used an identity map because we verify that the size of the resultMap equals the number of tasks. I replaced these maps with a list. I think it's clearer now. |
||
for (FailedShardEntry failingTask : failingTasks) { | ||
taskResultMap.put(failingTask, | ||
ClusterStateTaskExecutor.TaskResult.failure(new RuntimeException("simulated applyFailedShards failure"))); | ||
} | ||
for (FailedShardEntry nonExistentTask : nonExistentTasks) { | ||
taskResultMap.put(nonExistentTask, ClusterStateTaskExecutor.TaskResult.success()); | ||
} | ||
assertTaskResults(taskResultMap, result, currentState, false); | ||
} | ||
|
||
|
@@ -147,12 +152,13 @@ public void testIllegalShardFailureRequests() throws Exception { | |
tasks.add(new FailedShardEntry(failingTask.shardId, failingTask.allocationId, | ||
randomIntBetween(1, (int) primaryTerm - 1), failingTask.message, failingTask.failure, randomBoolean())); | ||
} | ||
Map<FailedShardEntry, ClusterStateTaskExecutor.TaskResult> taskResultMap = | ||
tasks.stream().collect(Collectors.toMap( | ||
Function.identity(), | ||
task -> ClusterStateTaskExecutor.TaskResult.failure(new ShardStateAction.NoLongerPrimaryShardException(task.shardId, | ||
Map<FailedShardEntry, ClusterStateTaskExecutor.TaskResult> taskResultMap = new IdentityHashMap<>(); | ||
for (FailedShardEntry task : tasks) { | ||
taskResultMap.put(task, | ||
ClusterStateTaskExecutor.TaskResult.failure(new ShardStateAction.NoLongerPrimaryShardException(task.shardId, | ||
"primary term [" + task.primaryTerm + "] did not match current primary term [" + | ||
currentState.metaData().index(task.shardId.getIndex()).primaryTerm(task.shardId.id()) + "]")))); | ||
currentState.metaData().index(task.shardId.getIndex()).primaryTerm(task.shardId.id()) + "]"))); | ||
} | ||
ClusterStateTaskExecutor.ClusterTasksResult<FailedShardEntry> result = executor.execute(currentState, tasks); | ||
assertTaskResults(taskResultMap, result, currentState, false); | ||
} | ||
|
@@ -251,8 +257,10 @@ private static void assertTasksSuccessful( | |
ClusterState clusterState, | ||
boolean clusterStateChanged | ||
) { | ||
Map<ShardStateAction.FailedShardEntry, ClusterStateTaskExecutor.TaskResult> taskResultMap = | ||
tasks.stream().collect(Collectors.toMap(Function.identity(), task -> ClusterStateTaskExecutor.TaskResult.success())); | ||
Map<ShardStateAction.FailedShardEntry, ClusterStateTaskExecutor.TaskResult> taskResultMap = new IdentityHashMap<>(); | ||
for (FailedShardEntry task : tasks) { | ||
taskResultMap.put(task, ClusterStateTaskExecutor.TaskResult.success()); | ||
} | ||
assertTaskResults(taskResultMap, result, clusterState, clusterStateChanged); | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
where is this exception going to end up?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's an interesting question. It will be reported as unhandled by the transport threads. The same thing should happen for the current implementation.