Increase disruption test publish timeout to 5s (#51803)

henningandersen · henningandersen · commit 918dfaff1fbf · 2020-02-03T13:57:57.000+01:00
With the new mechanism for storing cluster state in lucene, we store index metadata in multiple data paths too. This causes cluster state publish to timeout too frequently with a 1s timeout, so increasing it to 5s. Also increasing follower check timeout to 5s since it also sometimes has fsync in its timeout path and leader check for symmetry. Closes #51329
diff --git a/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java b/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java
@@ -123,12 +123,12 @@ List<String> startCluster(int numberOfNodes) {
     }
 
     static final Settings DEFAULT_SETTINGS = Settings.builder()
-            .put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly
+            .put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "5s") // for hitting simulated network failures quickly
             .put(LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly
-            .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly
+            .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "5s") // for hitting simulated network failures quickly
             .put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly
             .put(JoinHelper.JOIN_TIMEOUT_SETTING.getKey(), "10s") // still long to induce failures but to long so test won't time out
-            .put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "1s") // <-- for hitting simulated network failures quickly
+            .put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "5s") // <-- for hitting simulated network failures quickly
             .put(TransportSettings.CONNECT_TIMEOUT.getKey(), "10s") // Network delay disruption waits for the min between this
             // value and the time of disruption and does not recover immediately
             // when disruption is stop. We should make sure we recover faster
diff --git a/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java
@@ -107,9 +107,8 @@ static ConflictMode randomMode() {
     @TestIssueLogging(value = "_root:DEBUG,org.elasticsearch.action.bulk:TRACE,org.elasticsearch.action.get:TRACE," +
         "org.elasticsearch.discovery:TRACE,org.elasticsearch.action.support.replication:TRACE," +
         "org.elasticsearch.cluster.service:TRACE,org.elasticsearch.indices.recovery:TRACE," +
-        "org.elasticsearch.indices.cluster:TRACE,org.elasticsearch.index.shard:TRACE," +
-        "org.elasticsearch.gateway.PersistedClusterStateService:TRACE",
-        issueUrl = "https://github.com/elastic/elasticsearch/issues/41068,https://github.com/elastic/elasticsearch/issues/51329")
+        "org.elasticsearch.indices.cluster:TRACE,org.elasticsearch.index.shard:TRACE",
+        issueUrl = "https://github.com/elastic/elasticsearch/issues/41068")
     public void testAckedIndexing() throws Exception {
 
         final int seconds = !(TEST_NIGHTLY && rarely()) ? 1 : 5;
diff --git a/server/src/test/java/org/elasticsearch/versioning/ConcurrentSeqNoVersioningIT.java b/server/src/test/java/org/elasticsearch/versioning/ConcurrentSeqNoVersioningIT.java
@@ -38,7 +38,6 @@
 import org.elasticsearch.index.engine.VersionConflictEngineException;
 import org.elasticsearch.test.ESIntegTestCase;
 import org.elasticsearch.test.disruption.ServiceDisruptionScheme;
-import org.elasticsearch.test.junit.annotations.TestIssueLogging;
 import org.elasticsearch.threadpool.Scheduler;
 import org.elasticsearch.threadpool.ThreadPool;
 
@@ -132,8 +131,6 @@ public class ConcurrentSeqNoVersioningIT extends AbstractDisruptionTestCase {
     // multiple threads doing CAS updates.
     // Wait up to 1 minute (+10s in thread to ensure it does not time out) for threads to complete previous round before initiating next
     // round.
-    @TestIssueLogging(value = "org.elasticsearch.gateway.PersistedClusterStateService:TRACE",
-        issueUrl = "https://github.com/elastic/elasticsearch/issues/51329")
     public void testSeqNoCASLinearizability() {
         final int disruptTimeSeconds = scaledRandomIntBetween(1, 8);