[ML] Increase assertBusy timeout in ML node failure tests (#52425)

droberts195 · droberts195 · commit f243ff3f38c4 · 2020-02-17T17:14:23.000Z
Following the change to store cluster state in Lucene indices (#50907) it can take longer for all the cluster state updates associated with node failure scenarios to be processed during internal cluster tests where several nodes all run in the same JVM.
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/integration/MlDistributedFailureIT.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/integration/MlDistributedFailureIT.java
@@ -449,6 +449,10 @@ private void run(String jobId, CheckedRunnable<Exception> disrupt) throws Except
         // else.
         persistentTasksClusterService.setRecheckInterval(TimeValue.timeValueMillis(200));
 
+        // The timeout here was increased from 10 seconds to 20 seconds in response to the changes in
+        // https://github.com/elastic/elasticsearch/pull/50907 - now that the cluster state is stored
+        // in a Lucene index it can take a while to update when there are many updates in quick
+        // succession, like we see in internal cluster tests of node failure scenarios
         assertBusy(() -> {
             ClusterState clusterState = client().admin().cluster().prepareState().get().getState();
             PersistentTasksCustomMetaData tasks = clusterState.metaData().custom(PersistentTasksCustomMetaData.TYPE);
@@ -469,7 +473,7 @@ private void run(String jobId, CheckedRunnable<Exception> disrupt) throws Except
                     .getResponse().results().get(0);
             assertEquals(DatafeedState.STARTED, datafeedStats.getDatafeedState());
             assertNotNull(datafeedStats.getNode());
-        });
+        }, 20, TimeUnit.SECONDS);
 
         long numDocs2 = randomIntBetween(2, 64);
         long now2 = System.currentTimeMillis();