Skip to content

Commit d8d01ee

Browse files
committed
QA: Switch rolling upgrade to 3 nodes
Switches the rolling upgrade tests from upgrading two nodes to upgrading three nodes which is much more realistic and much better able to find unexpected bugs. It upgrades the nodes one at a time and runs tests between each upgrade. As such this now has four test runs: 1. Old 2. One third upgraded 3. Two thirds upgraded 4. Upgraded It sets system properties so the tests can figure out which stage they are in. It reuses the same yml tests for the "one third" and "two thirds" cases because they are *almost* the same case. This rewrites the yml-based indexing tests to be Java based because the yml-based tests can't handle different expected values for the counts. And the indexing tests need that when they are run twice. Closes #25336
1 parent 4f1b6fa commit d8d01ee

File tree

8 files changed

+352
-333
lines changed

8 files changed

+352
-333
lines changed

qa/rolling-upgrade/build.gradle

Lines changed: 61 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,26 @@ task bwcTest {
3030
}
3131

3232
for (Version version : bwcVersions.wireCompatible) {
33+
/*
34+
* The goal here is to:
35+
* <ul>
36+
* <li>start three nodes on the old version
37+
* <li>run tests with systemProperty 'tests.rest.suite', 'old_cluster'
38+
* <li>shut down one node
39+
* <li>start a node with the new version
40+
* <li>run tests with systemProperty 'tests.rest.suite', 'mixed_cluster'
41+
* <li>shut down one node on the old version
42+
* <li>start a node with the new version
43+
* <li>run tests with systemProperty 'tests.rest.suite', 'mixed_cluster' again
44+
* <li>shut down the last node with the old version
45+
* <li>start a node with the new version
46+
* <li>run tests with systemProperty 'tests.rest.suite', 'upgraded_cluster'
47+
* <li>shut down the entire cluster
48+
* </ul>
49+
*
50+
* Be careful: gradle dry run spits out tasks in the wrong order but,
51+
* strangely, running the tasks works properly.
52+
*/
3353
String baseName = "v${version}"
3454

3555
Task oldClusterTest = tasks.create(name: "${baseName}#oldClusterTest", type: RestIntegTestTask) {
@@ -39,8 +59,8 @@ for (Version version : bwcVersions.wireCompatible) {
3959
Object extension = extensions.findByName("${baseName}#oldClusterTestCluster")
4060
configure(extensions.findByName("${baseName}#oldClusterTestCluster")) {
4161
bwcVersion = version
42-
numBwcNodes = 2
43-
numNodes = 2
62+
numBwcNodes = 3
63+
numNodes = 3
4464
clusterName = 'rolling-upgrade'
4565
setting 'repositories.url.allowed_urls', 'http://snapshot.test*'
4666
setting 'node.attr.gen', 'old'
@@ -54,45 +74,57 @@ for (Version version : bwcVersions.wireCompatible) {
5474
systemProperty 'tests.rest.suite', 'old_cluster'
5575
}
5676

57-
Task mixedClusterTest = tasks.create(name: "${baseName}#mixedClusterTest", type: RestIntegTestTask)
77+
Closure configureUpgradeCluster = {String name, Task lastRunner, int stopNode, Closure unicastSeed ->
78+
configure(extensions.findByName("${baseName}#${name}")) {
79+
dependsOn lastRunner, "${baseName}#oldClusterTestCluster#node${stopNode}.stop"
80+
clusterName = 'rolling-upgrade'
81+
unicastTransportUri = { seedNode, node, ant -> unicastSeed() }
82+
minimumMasterNodes = { 3 }
83+
/* Override the data directory so the new node always gets the node we
84+
* just stopped's data directory. */
85+
dataDir = { nodeNumber -> oldClusterTest.nodes[stopNode].dataDir }
86+
setting 'repositories.url.allowed_urls', 'http://snapshot.test*'
87+
}
88+
}
5889

59-
configure(extensions.findByName("${baseName}#mixedClusterTestCluster")) {
60-
dependsOn oldClusterTestRunner, "${baseName}#oldClusterTestCluster#node1.stop"
61-
clusterName = 'rolling-upgrade'
62-
unicastTransportUri = { seedNode, node, ant -> oldClusterTest.nodes.get(0).transportUri() }
63-
minimumMasterNodes = { 2 }
64-
/* Override the data directory so the new node always gets the node we
65-
* just stopped's data directory. */
66-
dataDir = { nodeNumber -> oldClusterTest.nodes[1].dataDir }
67-
setting 'repositories.url.allowed_urls', 'http://snapshot.test*'
68-
setting 'node.attr.gen', 'new'
90+
Task oneThirdUpgradedTest = tasks.create(name: "${baseName}#oneThirdUpgradedTest", type: RestIntegTestTask)
91+
92+
configureUpgradeCluster("oneThirdUpgradedTestCluster", oldClusterTestRunner,
93+
0, { oldClusterTest.nodes.get(1).transportUri() })
94+
95+
Task oneThirdUpgradedTestRunner = tasks.getByName("${baseName}#oneThirdUpgradedTestRunner")
96+
oneThirdUpgradedTestRunner.configure {
97+
systemProperty 'tests.rest.suite', 'mixed_cluster'
98+
systemProperty 'tests.first_round', 'true'
99+
finalizedBy "${baseName}#oldClusterTestCluster#node1.stop"
69100
}
70101

71-
Task mixedClusterTestRunner = tasks.getByName("${baseName}#mixedClusterTestRunner")
72-
mixedClusterTestRunner.configure {
102+
Task twoThirdsUpgradedTest = tasks.create(name: "${baseName}#twoThirdsUpgradedTest", type: RestIntegTestTask)
103+
104+
configureUpgradeCluster("twoThirdsUpgradedTestCluster", oneThirdUpgradedTestRunner,
105+
1, { oneThirdUpgradedTest.nodes.get(0).transportUri() })
106+
107+
Task twoThirdsUpgradedTestRunner = tasks.getByName("${baseName}#twoThirdsUpgradedTestRunner")
108+
twoThirdsUpgradedTestRunner.configure {
73109
systemProperty 'tests.rest.suite', 'mixed_cluster'
74-
finalizedBy "${baseName}#oldClusterTestCluster#node0.stop"
110+
systemProperty 'tests.first_round', 'false'
111+
finalizedBy "${baseName}#oldClusterTestCluster#node2.stop"
75112
}
76113

77114
Task upgradedClusterTest = tasks.create(name: "${baseName}#upgradedClusterTest", type: RestIntegTestTask)
78115

79-
configure(extensions.findByName("${baseName}#upgradedClusterTestCluster")) {
80-
dependsOn mixedClusterTestRunner, "${baseName}#oldClusterTestCluster#node0.stop"
81-
clusterName = 'rolling-upgrade'
82-
unicastTransportUri = { seedNode, node, ant -> mixedClusterTest.nodes.get(0).transportUri() }
83-
minimumMasterNodes = { 2 }
84-
/* Override the data directory so the new node always gets the node we
85-
* just stopped's data directory. */
86-
dataDir = { nodeNumber -> oldClusterTest.nodes[0].dataDir}
87-
setting 'repositories.url.allowed_urls', 'http://snapshot.test*'
88-
setting 'node.attr.gen', 'new'
89-
}
116+
configureUpgradeCluster("upgradedClusterTestCluster", twoThirdsUpgradedTestRunner,
117+
2, { twoThirdsUpgradedTest.nodes.get(0).transportUri() })
90118

91119
Task upgradedClusterTestRunner = tasks.getByName("${baseName}#upgradedClusterTestRunner")
92120
upgradedClusterTestRunner.configure {
93121
systemProperty 'tests.rest.suite', 'upgraded_cluster'
94-
// only need to kill the mixed cluster tests node here because we explicitly told it to not stop nodes upon completion
95-
finalizedBy "${baseName}#mixedClusterTestCluster#stop"
122+
/*
123+
* Force stopping all the upgraded nodes after the test runner
124+
* so they are alive during the test.
125+
*/
126+
finalizedBy "${baseName}#oneThirdUpgradedTestCluster#stop"
127+
finalizedBy "${baseName}#twoThirdsUpgradedTestCluster#stop"
96128
}
97129

98130
Task versionBwcTest = tasks.create(name: "${baseName}#bwcTest") {
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.upgrades;
20+
21+
import org.apache.http.entity.ContentType;
22+
import org.apache.http.entity.StringEntity;
23+
import org.elasticsearch.Version;
24+
import org.elasticsearch.action.support.PlainActionFuture;
25+
import org.elasticsearch.client.Response;
26+
import org.elasticsearch.cluster.metadata.IndexMetaData;
27+
import org.elasticsearch.common.settings.Settings;
28+
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
29+
import org.elasticsearch.test.rest.ESRestTestCase;
30+
import org.elasticsearch.test.rest.yaml.ObjectPath;
31+
32+
import java.io.IOException;
33+
import java.util.ArrayList;
34+
import java.util.Collections;
35+
import java.util.List;
36+
import java.util.Map;
37+
import java.util.concurrent.Future;
38+
import java.util.function.Predicate;
39+
40+
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomAsciiOfLength;
41+
import static java.util.Collections.emptyMap;
42+
import static org.elasticsearch.cluster.routing.UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING;
43+
import static org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider.INDEX_ROUTING_ALLOCATION_ENABLE_SETTING;
44+
import static org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY;
45+
import static org.hamcrest.Matchers.equalTo;
46+
import static org.hamcrest.Matchers.hasSize;
47+
import static org.hamcrest.Matchers.notNullValue;
48+
49+
public abstract class AbstractRollingTestCase extends ESRestTestCase {
50+
protected enum ClusterType {
51+
OLD,
52+
MIXED,
53+
UPGRADED;
54+
55+
public static ClusterType parse(String value) {
56+
switch (value) {
57+
case "old_cluster":
58+
return OLD;
59+
case "mixed_cluster":
60+
return MIXED;
61+
case "upgraded_cluster":
62+
return UPGRADED;
63+
default:
64+
throw new AssertionError("unknown cluster type: " + value);
65+
}
66+
}
67+
}
68+
69+
protected static final ClusterType CLUSTER_TYPE = ClusterType.parse(System.getProperty("tests.rest.suite"));
70+
71+
@Override
72+
protected final boolean preserveIndicesUponCompletion() {
73+
return true;
74+
}
75+
76+
@Override
77+
protected final boolean preserveReposUponCompletion() {
78+
return true;
79+
}
80+
81+
@Override
82+
protected final Settings restClientSettings() {
83+
return Settings.builder().put(super.restClientSettings())
84+
// increase the timeout here to 90 seconds to handle long waits for a green
85+
// cluster health. the waits for green need to be longer than a minute to
86+
// account for delayed shards
87+
.put(ESRestTestCase.CLIENT_RETRY_TIMEOUT, "90s")
88+
.put(ESRestTestCase.CLIENT_SOCKET_TIMEOUT, "90s")
89+
.build();
90+
}
91+
}
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.upgrades;
20+
21+
import org.apache.http.util.EntityUtils;
22+
import org.elasticsearch.common.Booleans;
23+
import org.elasticsearch.client.Request;
24+
import org.elasticsearch.client.Response;
25+
26+
import java.io.IOException;
27+
import java.nio.charset.StandardCharsets;
28+
29+
/**
30+
* Basic test that indexed documents survive the rolling restart. See
31+
* {@link RecoveryIT} for much more in depth testing of the mechanism
32+
* by which they survive.
33+
*/
34+
public class IndexingIT extends AbstractRollingTestCase {
35+
public void testIndexing() throws IOException {
36+
switch (CLUSTER_TYPE) {
37+
case OLD:
38+
break;
39+
case MIXED:
40+
Request waitForYellow = new Request("GET", "/_cluster/health");
41+
waitForYellow.addParameter("wait_for_nodes", "3");
42+
waitForYellow.addParameter("wait_for_status", "yellow");
43+
client().performRequest(waitForYellow);
44+
break;
45+
case UPGRADED:
46+
Request waitForGreen = new Request("GET", "/_cluster/health/test_index,index_with_replicas,empty_index");
47+
waitForGreen.addParameter("wait_for_nodes", "3");
48+
waitForGreen.addParameter("wait_for_status", "green");
49+
// wait for long enough that we give delayed unassigned shards to stop being delayed
50+
waitForGreen.addParameter("timeout", "70s");
51+
waitForGreen.addParameter("level", "shards");
52+
client().performRequest(waitForGreen);
53+
break;
54+
default:
55+
throw new UnsupportedOperationException("Unknown cluster type [" + CLUSTER_TYPE + "]");
56+
}
57+
58+
if (CLUSTER_TYPE == ClusterType.OLD) {
59+
Request createTestIndex = new Request("PUT", "/test_index");
60+
createTestIndex.setJsonEntity("{\"settings\": {\"index.number_of_replicas\": 0}}");
61+
client().performRequest(createTestIndex);
62+
63+
String recoverQuickly = "{\"settings\": {\"index.unassigned.node_left.delayed_timeout\": \"100ms\"}}";
64+
Request createIndexWithReplicas = new Request("PUT", "/index_with_replicas");
65+
createIndexWithReplicas.setJsonEntity(recoverQuickly);
66+
client().performRequest(createIndexWithReplicas);
67+
68+
Request createEmptyIndex = new Request("PUT", "/empty_index");
69+
// Ask for recovery to be quick
70+
createEmptyIndex.setJsonEntity(recoverQuickly);
71+
client().performRequest(createEmptyIndex);
72+
73+
bulk("test_index", "_OLD", 5);
74+
bulk("index_with_replicas", "_OLD", 5);
75+
}
76+
77+
int expectedCount;
78+
switch (CLUSTER_TYPE) {
79+
case OLD:
80+
expectedCount = 5;
81+
break;
82+
case MIXED:
83+
if (Booleans.parseBoolean(System.getProperty("tests.first_round"))) {
84+
expectedCount = 5;
85+
} else {
86+
expectedCount = 10;
87+
}
88+
break;
89+
case UPGRADED:
90+
expectedCount = 15;
91+
break;
92+
default:
93+
throw new UnsupportedOperationException("Unknown cluster type [" + CLUSTER_TYPE + "]");
94+
}
95+
96+
assertCount("test_index", expectedCount);
97+
assertCount("index_with_replicas", 5);
98+
assertCount("empty_index", 0);
99+
100+
if (CLUSTER_TYPE != ClusterType.OLD) {
101+
bulk("test_index", "_" + CLUSTER_TYPE, 5);
102+
Request toBeDeleted = new Request("PUT", "/test_index/doc/to_be_deleted");
103+
toBeDeleted.addParameter("refresh", "true");
104+
toBeDeleted.setJsonEntity("{\"f1\": \"delete-me\"}");
105+
client().performRequest(toBeDeleted);
106+
assertCount("test_index", expectedCount + 6);
107+
108+
Request delete = new Request("DELETE", "/test_index/doc/to_be_deleted");
109+
delete.addParameter("refresh", "true");
110+
client().performRequest(delete);
111+
112+
assertCount("test_index", expectedCount + 5);
113+
}
114+
}
115+
116+
private void bulk(String index, String valueSuffix, int count) throws IOException {
117+
StringBuilder b = new StringBuilder();
118+
for (int i = 0; i < count; i++) {
119+
b.append("{\"index\": {\"_index\": \"").append(index).append("\", \"_type\": \"doc\"}}\n");
120+
b.append("{\"f1\": \"v").append(i).append(valueSuffix).append("\", \"f2\": ").append(i).append("}\n");
121+
}
122+
Request bulk = new Request("POST", "/_bulk");
123+
bulk.addParameter("refresh", "true");
124+
bulk.setJsonEntity(b.toString());
125+
client().performRequest(bulk);
126+
}
127+
128+
private void assertCount(String index, int count) throws IOException {
129+
Request searchTestIndexRequest = new Request("POST", "/" + index + "/_search");
130+
searchTestIndexRequest.addParameter("filter_path", "hits.total");
131+
Response searchTestIndexResponse = client().performRequest(searchTestIndexRequest);
132+
assertEquals("{\"hits\":{\"total\":" + count + "}}",
133+
EntityUtils.toString(searchTestIndexResponse.getEntity(), StandardCharsets.UTF_8));
134+
}
135+
}

0 commit comments

Comments
 (0)