Skip to content

Commit d3aee75

Browse files
committed
Drain all disruption events until all nodes are healthy on AbstractCoordinatorTestCase.
Closes elastic#61711
1 parent b8a10b3 commit d3aee75

File tree

1 file changed

+12
-2
lines changed

1 file changed

+12
-2
lines changed

test/framework/src/main/java/org/elasticsearch/cluster/coordination/AbstractCoordinatorTestCase.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,14 @@ void runRandomly(boolean allowReboots, boolean coolDown, long delayVariability)
381381
}
382382
}
383383

384+
// Drain all the disruption events during cool down period until all nodes are healthy.
385+
// This prevents some edge cases where a disruption is scheduled to run some time after the stabilization period
386+
// (i.e. black-holed connections throw a disconnected exception after 1 day), preventing the cluster to reach
387+
// a stable state. See #61711 for a particular instance of this scenario.
388+
if (finishTime != -1 && (disconnectedNodes.isEmpty() == false || blackholedNodes.isEmpty() == false)) {
389+
finishTime = deterministicTaskQueue.getLatestDeferredExecutionTime();
390+
}
391+
384392
try {
385393
if (finishTime == -1 && randomBoolean() && randomBoolean() && randomBoolean()) {
386394
final ClusterNode clusterNode = getAnyNodePreferringLeaders();
@@ -445,12 +453,14 @@ public String toString() {
445453
}
446454
break;
447455
case 1:
448-
if (clusterNode.disconnect()) {
456+
// Avoid disruptions during cool down period
457+
if (finishTime == -1 && clusterNode.disconnect()) {
449458
logger.debug("----> [runRandomly {}] disconnecting {}", step, clusterNode.getId());
450459
}
451460
break;
452461
case 2:
453-
if (clusterNode.blackhole()) {
462+
// Avoid disruptions during cool down period
463+
if (finishTime == -1 && clusterNode.blackhole()) {
454464
logger.debug("----> [runRandomly {}] blackholing {}", step, clusterNode.getId());
455465
}
456466
break;

0 commit comments

Comments
 (0)