Skip to content

Commit 0a28f93

Browse files
committed
Increase timeout for relocation tests (#46554)
There's nothing wrong in the logs from these failures. I think 30 seconds might not be enough to relocate shards with many documents as CI is quite slow. This change increases the timeout to 60 seconds for these relocation tests. It also dumps the hot threads in case of timed out. Closes #46526 Closes #46439
1 parent f5a2971 commit 0a28f93

File tree

3 files changed

+23
-8
lines changed

3 files changed

+23
-8
lines changed

server/src/test/java/org/elasticsearch/indices/recovery/IndexPrimaryRelocationIT.java

+15-3
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,23 @@
2121

2222
import org.elasticsearch.action.DocWriteResponse;
2323
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
24+
import org.elasticsearch.action.admin.cluster.node.hotthreads.NodeHotThreads;
2425
import org.elasticsearch.action.delete.DeleteResponse;
2526
import org.elasticsearch.action.index.IndexResponse;
2627
import org.elasticsearch.cluster.ClusterState;
2728
import org.elasticsearch.cluster.node.DiscoveryNode;
2829
import org.elasticsearch.cluster.routing.allocation.command.MoveAllocationCommand;
2930
import org.elasticsearch.common.Priority;
3031
import org.elasticsearch.common.settings.Settings;
32+
import org.elasticsearch.common.unit.TimeValue;
3133
import org.elasticsearch.index.query.QueryBuilders;
3234
import org.elasticsearch.test.ESIntegTestCase;
3335
import org.elasticsearch.test.hamcrest.ElasticsearchAssertions;
3436

3537
import java.util.concurrent.atomic.AtomicBoolean;
3638
import java.util.concurrent.atomic.AtomicInteger;
39+
import java.util.stream.Collectors;
3740

38-
import static org.hamcrest.Matchers.equalTo;
3941

4042
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST)
4143
public class IndexPrimaryRelocationIT extends ESIntegTestCase {
@@ -54,7 +56,7 @@ public void testPrimaryRelocationWhileIndexing() throws Exception {
5456
Thread indexingThread = new Thread() {
5557
@Override
5658
public void run() {
57-
while (finished.get() == false) {
59+
while (finished.get() == false && numAutoGenDocs.get() < 10_000) {
5860
IndexResponse indexResponse = client().prepareIndex("test", "type", "id").setSource("field", "value").get();
5961
assertEquals(DocWriteResponse.Result.CREATED, indexResponse.getResult());
6062
DeleteResponse deleteResponse = client().prepareDelete("test", "type", "id").get();
@@ -80,8 +82,18 @@ public void run() {
8082
.add(new MoveAllocationCommand("test", 0, relocationSource.getId(), relocationTarget.getId()))
8183
.execute().actionGet();
8284
ClusterHealthResponse clusterHealthResponse = client().admin().cluster().prepareHealth()
85+
.setTimeout(TimeValue.timeValueSeconds(60))
8386
.setWaitForEvents(Priority.LANGUID).setWaitForNoRelocatingShards(true).execute().actionGet();
84-
assertThat(clusterHealthResponse.isTimedOut(), equalTo(false));
87+
if (clusterHealthResponse.isTimedOut()) {
88+
final String hotThreads = client().admin().cluster().prepareNodesHotThreads().setIgnoreIdleThreads(false).get().getNodes()
89+
.stream().map(NodeHotThreads::getHotThreads).collect(Collectors.joining("\n"));
90+
final ClusterState clusterState = client().admin().cluster().prepareState().get().getState();
91+
logger.info("timed out for waiting for relocation iteration [{}] \ncluster state {} \nhot threads {}",
92+
i, clusterState, hotThreads);
93+
finished.set(true);
94+
indexingThread.join();
95+
throw new AssertionError("timed out waiting for relocation iteration [" + i + "] ");
96+
}
8597
logger.info("--> [iteration {}] relocation complete", i);
8698
relocationSource = relocationTarget;
8799
// indexing process aborted early, no need for more relocations as test has already failed

server/src/test/java/org/elasticsearch/recovery/RelocationIT.java

+2-3
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@
7878
import java.util.Collection;
7979
import java.util.List;
8080
import java.util.concurrent.CountDownLatch;
81-
import java.util.concurrent.ExecutionException;
8281
import java.util.concurrent.Semaphore;
8382
import java.util.concurrent.TimeUnit;
8483
import java.util.stream.Stream;
@@ -446,7 +445,7 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
446445
}
447446
}
448447

449-
public void testIndexAndRelocateConcurrently() throws ExecutionException, InterruptedException {
448+
public void testIndexAndRelocateConcurrently() throws Exception {
450449
int halfNodes = randomIntBetween(1, 3);
451450
Settings[] nodeSettings = Stream.concat(
452451
Stream.generate(() -> Settings.builder().put("node.attr.color", "blue").build()).limit(halfNodes),
@@ -494,7 +493,7 @@ public void testIndexAndRelocateConcurrently() throws ExecutionException, Interr
494493
numDocs *= 2;
495494

496495
logger.info(" --> waiting for relocation to complete");
497-
ensureGreen("test"); // move all shards to the new nodes (it waits on relocation)
496+
ensureGreen(TimeValue.timeValueSeconds(60), "test"); // move all shards to the new nodes (it waits on relocation)
498497

499498
final int numIters = randomIntBetween(10, 20);
500499
for (int i = 0; i < numIters; i++) {

test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.elasticsearch.action.DocWriteResponse;
3434
import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest;
3535
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
36+
import org.elasticsearch.action.admin.cluster.node.hotthreads.NodeHotThreads;
3637
import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
3738
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
3839
import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
@@ -918,10 +919,13 @@ private ClusterHealthStatus ensureColor(ClusterHealthStatus clusterHealthStatus,
918919

919920
ClusterHealthResponse actionGet = client().admin().cluster().health(healthRequest).actionGet();
920921
if (actionGet.isTimedOut()) {
921-
logger.info("{} timed out, cluster state:\n{}\n{}",
922+
final String hotThreads = client().admin().cluster().prepareNodesHotThreads().setIgnoreIdleThreads(false).get().getNodes()
923+
.stream().map(NodeHotThreads::getHotThreads).collect(Collectors.joining("\n"));
924+
logger.info("{} timed out, cluster state:\n{}\npending tasks:\n{}\nhot threads:\n{}\n",
922925
method,
923926
client().admin().cluster().prepareState().get().getState(),
924-
client().admin().cluster().preparePendingClusterTasks().get());
927+
client().admin().cluster().preparePendingClusterTasks().get(),
928+
hotThreads);
925929
fail("timed out waiting for " + color + " state");
926930
}
927931
assertThat("Expected at least " + clusterHealthStatus + " but got " + actionGet.getStatus(),

0 commit comments

Comments
 (0)