Skip to content

Commit 918dfaf

Browse files
Increase disruption test publish timeout to 5s (#51803)
With the new mechanism for storing cluster state in lucene, we store index metadata in multiple data paths too. This causes cluster state publish to timeout too frequently with a 1s timeout, so increasing it to 5s. Also increasing follower check timeout to 5s since it also sometimes has fsync in its timeout path and leader check for symmetry. Closes #51329
1 parent 8138805 commit 918dfaf

File tree

3 files changed

+5
-9
lines changed

3 files changed

+5
-9
lines changed

server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,12 +123,12 @@ List<String> startCluster(int numberOfNodes) {
123123
}
124124

125125
static final Settings DEFAULT_SETTINGS = Settings.builder()
126-
.put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly
126+
.put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "5s") // for hitting simulated network failures quickly
127127
.put(LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly
128-
.put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly
128+
.put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "5s") // for hitting simulated network failures quickly
129129
.put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly
130130
.put(JoinHelper.JOIN_TIMEOUT_SETTING.getKey(), "10s") // still long to induce failures but to long so test won't time out
131-
.put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "1s") // <-- for hitting simulated network failures quickly
131+
.put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "5s") // <-- for hitting simulated network failures quickly
132132
.put(TransportSettings.CONNECT_TIMEOUT.getKey(), "10s") // Network delay disruption waits for the min between this
133133
// value and the time of disruption and does not recover immediately
134134
// when disruption is stop. We should make sure we recover faster

server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,8 @@ static ConflictMode randomMode() {
107107
@TestIssueLogging(value = "_root:DEBUG,org.elasticsearch.action.bulk:TRACE,org.elasticsearch.action.get:TRACE," +
108108
"org.elasticsearch.discovery:TRACE,org.elasticsearch.action.support.replication:TRACE," +
109109
"org.elasticsearch.cluster.service:TRACE,org.elasticsearch.indices.recovery:TRACE," +
110-
"org.elasticsearch.indices.cluster:TRACE,org.elasticsearch.index.shard:TRACE," +
111-
"org.elasticsearch.gateway.PersistedClusterStateService:TRACE",
112-
issueUrl = "https://github.com/elastic/elasticsearch/issues/41068,https://github.com/elastic/elasticsearch/issues/51329")
110+
"org.elasticsearch.indices.cluster:TRACE,org.elasticsearch.index.shard:TRACE",
111+
issueUrl = "https://github.com/elastic/elasticsearch/issues/41068")
113112
public void testAckedIndexing() throws Exception {
114113

115114
final int seconds = !(TEST_NIGHTLY && rarely()) ? 1 : 5;

server/src/test/java/org/elasticsearch/versioning/ConcurrentSeqNoVersioningIT.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
import org.elasticsearch.index.engine.VersionConflictEngineException;
3939
import org.elasticsearch.test.ESIntegTestCase;
4040
import org.elasticsearch.test.disruption.ServiceDisruptionScheme;
41-
import org.elasticsearch.test.junit.annotations.TestIssueLogging;
4241
import org.elasticsearch.threadpool.Scheduler;
4342
import org.elasticsearch.threadpool.ThreadPool;
4443

@@ -132,8 +131,6 @@ public class ConcurrentSeqNoVersioningIT extends AbstractDisruptionTestCase {
132131
// multiple threads doing CAS updates.
133132
// Wait up to 1 minute (+10s in thread to ensure it does not time out) for threads to complete previous round before initiating next
134133
// round.
135-
@TestIssueLogging(value = "org.elasticsearch.gateway.PersistedClusterStateService:TRACE",
136-
issueUrl = "https://github.com/elastic/elasticsearch/issues/51329")
137134
public void testSeqNoCASLinearizability() {
138135
final int disruptTimeSeconds = scaledRandomIntBetween(1, 8);
139136

0 commit comments

Comments
 (0)