Skip to content

Commit 16ac1b1

Browse files
authored
Increase timeout for relocation tests (#46554)
There's nothing wrong in the logs from these failures. I think 30 seconds might not be enough to relocate shards with many documents as CI is quite slow. This change increases the timeout to 60 seconds for these relocation tests. It also dumps the hot threads in case of timed out. Closes #46526 Closes #46439
1 parent 5eeb3c5 commit 16ac1b1

File tree

3 files changed

+24
-10
lines changed

3 files changed

+24
-10
lines changed

server/src/test/java/org/elasticsearch/indices/recovery/IndexPrimaryRelocationIT.java

+15-5
Original file line numberDiff line numberDiff line change
@@ -19,32 +19,32 @@
1919

2020
package org.elasticsearch.indices.recovery;
2121

22-
import org.apache.lucene.util.Constants;
2322
import org.elasticsearch.action.DocWriteResponse;
2423
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
24+
import org.elasticsearch.action.admin.cluster.node.hotthreads.NodeHotThreads;
2525
import org.elasticsearch.action.delete.DeleteResponse;
2626
import org.elasticsearch.action.index.IndexResponse;
2727
import org.elasticsearch.cluster.ClusterState;
2828
import org.elasticsearch.cluster.node.DiscoveryNode;
2929
import org.elasticsearch.cluster.routing.allocation.command.MoveAllocationCommand;
3030
import org.elasticsearch.common.Priority;
3131
import org.elasticsearch.common.settings.Settings;
32+
import org.elasticsearch.common.unit.TimeValue;
3233
import org.elasticsearch.index.query.QueryBuilders;
3334
import org.elasticsearch.test.ESIntegTestCase;
3435
import org.elasticsearch.test.hamcrest.ElasticsearchAssertions;
3536

3637
import java.util.concurrent.atomic.AtomicBoolean;
3738
import java.util.concurrent.atomic.AtomicInteger;
39+
import java.util.stream.Collectors;
3840

39-
import static org.hamcrest.Matchers.equalTo;
4041

4142
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST)
4243
public class IndexPrimaryRelocationIT extends ESIntegTestCase {
4344

4445
private static final int RELOCATION_COUNT = 15;
4546

4647
public void testPrimaryRelocationWhileIndexing() throws Exception {
47-
assumeFalse("https://github.com/elastic/elasticsearch/issues/46526", Constants.MAC_OS_X);
4848
internalCluster().ensureAtLeastNumDataNodes(randomIntBetween(2, 3));
4949
client().admin().indices().prepareCreate("test")
5050
.setSettings(Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", 0))
@@ -56,7 +56,7 @@ public void testPrimaryRelocationWhileIndexing() throws Exception {
5656
Thread indexingThread = new Thread() {
5757
@Override
5858
public void run() {
59-
while (finished.get() == false) {
59+
while (finished.get() == false && numAutoGenDocs.get() < 10_000) {
6060
IndexResponse indexResponse = client().prepareIndex("test", "type", "id").setSource("field", "value").get();
6161
assertEquals(DocWriteResponse.Result.CREATED, indexResponse.getResult());
6262
DeleteResponse deleteResponse = client().prepareDelete("test", "type", "id").get();
@@ -82,8 +82,18 @@ public void run() {
8282
.add(new MoveAllocationCommand("test", 0, relocationSource.getId(), relocationTarget.getId()))
8383
.execute().actionGet();
8484
ClusterHealthResponse clusterHealthResponse = client().admin().cluster().prepareHealth()
85+
.setTimeout(TimeValue.timeValueSeconds(60))
8586
.setWaitForEvents(Priority.LANGUID).setWaitForNoRelocatingShards(true).execute().actionGet();
86-
assertThat(clusterHealthResponse.isTimedOut(), equalTo(false));
87+
if (clusterHealthResponse.isTimedOut()) {
88+
final String hotThreads = client().admin().cluster().prepareNodesHotThreads().setIgnoreIdleThreads(false).get().getNodes()
89+
.stream().map(NodeHotThreads::getHotThreads).collect(Collectors.joining("\n"));
90+
final ClusterState clusterState = client().admin().cluster().prepareState().get().getState();
91+
logger.info("timed out for waiting for relocation iteration [{}] \ncluster state {} \nhot threads {}",
92+
i, clusterState, hotThreads);
93+
finished.set(true);
94+
indexingThread.join();
95+
throw new AssertionError("timed out waiting for relocation iteration [" + i + "] ");
96+
}
8797
logger.info("--> [iteration {}] relocation complete", i);
8898
relocationSource = relocationTarget;
8999
// indexing process aborted early, no need for more relocations as test has already failed

server/src/test/java/org/elasticsearch/recovery/RelocationIT.java

+2-3
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@
7878
import java.util.Collection;
7979
import java.util.List;
8080
import java.util.concurrent.CountDownLatch;
81-
import java.util.concurrent.ExecutionException;
8281
import java.util.concurrent.Semaphore;
8382
import java.util.concurrent.TimeUnit;
8483
import java.util.stream.Stream;
@@ -446,7 +445,7 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
446445
}
447446
}
448447

449-
public void testIndexAndRelocateConcurrently() throws ExecutionException, InterruptedException {
448+
public void testIndexAndRelocateConcurrently() throws Exception {
450449
int halfNodes = randomIntBetween(1, 3);
451450
Settings[] nodeSettings = Stream.concat(
452451
Stream.generate(() -> Settings.builder().put("node.attr.color", "blue").build()).limit(halfNodes),
@@ -494,7 +493,7 @@ public void testIndexAndRelocateConcurrently() throws ExecutionException, Interr
494493
numDocs *= 2;
495494

496495
logger.info(" --> waiting for relocation to complete");
497-
ensureGreen("test"); // move all shards to the new nodes (it waits on relocation)
496+
ensureGreen(TimeValue.timeValueSeconds(60), "test"); // move all shards to the new nodes (it waits on relocation)
498497

499498
final int numIters = randomIntBetween(10, 20);
500499
for (int i = 0; i < numIters; i++) {

test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java

+7-2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.elasticsearch.action.DocWriteResponse;
3434
import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest;
3535
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
36+
import org.elasticsearch.action.admin.cluster.node.hotthreads.NodeHotThreads;
3637
import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
3738
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
3839
import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
@@ -166,6 +167,7 @@
166167
import java.util.concurrent.atomic.AtomicLong;
167168
import java.util.function.BooleanSupplier;
168169
import java.util.function.Function;
170+
import java.util.stream.Collectors;
169171

170172
import static org.elasticsearch.client.Requests.syncedFlushRequest;
171173
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
@@ -880,10 +882,13 @@ private ClusterHealthStatus ensureColor(ClusterHealthStatus clusterHealthStatus,
880882

881883
ClusterHealthResponse actionGet = client().admin().cluster().health(healthRequest).actionGet();
882884
if (actionGet.isTimedOut()) {
883-
logger.info("{} timed out, cluster state:\n{}\n{}",
885+
final String hotThreads = client().admin().cluster().prepareNodesHotThreads().setIgnoreIdleThreads(false).get().getNodes()
886+
.stream().map(NodeHotThreads::getHotThreads).collect(Collectors.joining("\n"));
887+
logger.info("{} timed out, cluster state:\n{}\npending tasks:\n{}\nhot threads:\n{}\n",
884888
method,
885889
client().admin().cluster().prepareState().get().getState(),
886-
client().admin().cluster().preparePendingClusterTasks().get());
890+
client().admin().cluster().preparePendingClusterTasks().get(),
891+
hotThreads);
887892
fail("timed out waiting for " + color + " state");
888893
}
889894
assertThat("Expected at least " + clusterHealthStatus + " but got " + actionGet.getStatus(),

0 commit comments

Comments
 (0)