-
Notifications
You must be signed in to change notification settings - Fork 25.2k
Zen2: Move disruption tests to Zen2 #35724
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,10 @@ | |
import org.elasticsearch.cluster.ClusterState; | ||
import org.elasticsearch.cluster.block.ClusterBlock; | ||
import org.elasticsearch.cluster.block.ClusterBlockLevel; | ||
import org.elasticsearch.cluster.coordination.Coordinator; | ||
import org.elasticsearch.cluster.coordination.FollowersChecker; | ||
import org.elasticsearch.cluster.coordination.JoinHelper; | ||
import org.elasticsearch.cluster.coordination.LeaderChecker; | ||
import org.elasticsearch.cluster.node.DiscoveryNodes; | ||
import org.elasticsearch.common.Nullable; | ||
import org.elasticsearch.common.settings.Settings; | ||
|
@@ -63,7 +67,6 @@ public abstract class AbstractDisruptionTestCase extends ESIntegTestCase { | |
@Override | ||
protected Settings nodeSettings(int nodeOrdinal) { | ||
return Settings.builder().put(super.nodeSettings(nodeOrdinal)).put(DEFAULT_SETTINGS) | ||
.put(TestZenDiscovery.USE_ZEN2.getKey(), false) // requires more work | ||
.put(TestZenDiscovery.USE_MOCK_PINGS.getKey(), false).build(); | ||
} | ||
|
||
|
@@ -114,18 +117,31 @@ List<String> startCluster(int numberOfNodes) { | |
ensureStableCluster(numberOfNodes); | ||
|
||
// TODO: this is a temporary solution so that nodes will not base their reaction to a partition based on previous successful results | ||
ZenPing zenPing = ((TestZenDiscovery) internalCluster().getInstance(Discovery.class)).getZenPing(); | ||
if (zenPing instanceof UnicastZenPing) { | ||
((UnicastZenPing) zenPing).clearTemporalResponses(); | ||
} | ||
clearTemporalResponses(); | ||
return nodes; | ||
} | ||
|
||
protected void clearTemporalResponses() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This thing is weird: it clears out the received pings from a single randomly-chosen node in the cluster. It was weird before this change too. But it will go away soon. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree it's weird. I did not think it was worth investigating further there because this is going away soon. |
||
final Discovery discovery = internalCluster().getInstance(Discovery.class); | ||
if (discovery instanceof TestZenDiscovery) { | ||
ZenPing zenPing = ((TestZenDiscovery) discovery).getZenPing(); | ||
if (zenPing instanceof UnicastZenPing) { | ||
((UnicastZenPing) zenPing).clearTemporalResponses(); | ||
} | ||
} | ||
} | ||
|
||
static final Settings DEFAULT_SETTINGS = Settings.builder() | ||
.put(FaultDetection.PING_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly | ||
.put(FaultDetection.PING_RETRIES_SETTING.getKey(), "1") // for hitting simulated network failures quickly | ||
.put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly | ||
.put(LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly | ||
.put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly | ||
.put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly | ||
.put("discovery.zen.join_timeout", "10s") // still long to induce failures but to long so test won't time out | ||
.put(JoinHelper.JOIN_TIMEOUT_SETTING.getKey(), "10s") // still long to induce failures but to long so test won't time out | ||
.put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "1s") // <-- for hitting simulated network failures quickly | ||
.put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "1s") // <-- for hitting simulated network failures quickly | ||
.put(TransportService.TCP_CONNECT_TIMEOUT.getKey(), "10s") // Network delay disruption waits for the min between this | ||
// value and the time of disruption and does not recover immediately | ||
// when disruption is stop. We should make sure we recover faster | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,6 +39,7 @@ | |
import org.elasticsearch.discovery.zen.ZenDiscovery; | ||
import org.elasticsearch.monitor.jvm.HotThreads; | ||
import org.elasticsearch.test.ESIntegTestCase; | ||
import org.elasticsearch.test.discovery.TestZenDiscovery; | ||
import org.elasticsearch.test.disruption.BlockMasterServiceOnMaster; | ||
import org.elasticsearch.test.disruption.IntermittentLongGCDisruption; | ||
import org.elasticsearch.test.disruption.LongGCDisruption; | ||
|
@@ -379,7 +380,8 @@ public void testIsolateMasterAndVerifyClusterStateConsensus() throws Exception { | |
* Verify that the proper block is applied when nodes loose their master | ||
*/ | ||
public void testVerifyApiBlocksDuringPartition() throws Exception { | ||
startCluster(3); | ||
// TODO: NO_MASTER_BLOCKS not dynamic in Zen2 yet | ||
internalCluster().startNodes(3, Settings.builder().put(TestZenDiscovery.USE_ZEN2.getKey(), false).build()); | ||
|
||
// Makes sure that the get request can be executed on each node locally: | ||
assertAcked(prepareCreate("test").setSettings(Settings.builder() | ||
|
@@ -511,7 +513,13 @@ void assertDiscoveryCompleted(List<String> nodes) throws InterruptedException { | |
assertTrue( | ||
"node [" + node + "] is still joining master", | ||
awaitBusy( | ||
() -> !((ZenDiscovery) internalCluster().getInstance(Discovery.class, node)).joiningCluster(), | ||
() -> { | ||
final Discovery discovery = internalCluster().getInstance(Discovery.class, node); | ||
if (discovery instanceof ZenDiscovery) { | ||
return !((ZenDiscovery) discovery).joiningCluster(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ☠️ But it will go away soon. |
||
} | ||
return true; | ||
}, | ||
30, | ||
TimeUnit.SECONDS | ||
) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oops.