|
| 1 | +/* |
| 2 | + * Licensed to Elasticsearch under one or more contributor |
| 3 | + * license agreements. See the NOTICE file distributed with |
| 4 | + * this work for additional information regarding copyright |
| 5 | + * ownership. Elasticsearch licenses this file to you under |
| 6 | + * the Apache License, Version 2.0 (the "License"); you may |
| 7 | + * not use this file except in compliance with the License. |
| 8 | + * You may obtain a copy of the License at |
| 9 | + * |
| 10 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | + * |
| 12 | + * Unless required by applicable law or agreed to in writing, |
| 13 | + * software distributed under the License is distributed on an |
| 14 | + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 15 | + * KIND, either express or implied. See the License for the |
| 16 | + * specific language governing permissions and limitations |
| 17 | + * under the License. |
| 18 | + */ |
| 19 | + |
| 20 | +package org.elasticsearch.cluster.routing; |
| 21 | + |
| 22 | +import org.apache.lucene.store.SimpleFSDirectory; |
| 23 | +import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplanation; |
| 24 | +import org.elasticsearch.action.admin.indices.stats.ShardStats; |
| 25 | +import org.elasticsearch.action.index.IndexRequestBuilder; |
| 26 | +import org.elasticsearch.client.Requests; |
| 27 | +import org.elasticsearch.cluster.ClusterState; |
| 28 | +import org.elasticsearch.cluster.health.ClusterHealthStatus; |
| 29 | +import org.elasticsearch.cluster.metadata.IndexMetaData; |
| 30 | +import org.elasticsearch.cluster.routing.allocation.AllocationDecision; |
| 31 | +import org.elasticsearch.cluster.routing.allocation.ShardAllocationDecision; |
| 32 | +import org.elasticsearch.cluster.routing.allocation.command.AllocateStalePrimaryAllocationCommand; |
| 33 | +import org.elasticsearch.common.settings.Settings; |
| 34 | +import org.elasticsearch.index.IndexService; |
| 35 | +import org.elasticsearch.index.IndexSettings; |
| 36 | +import org.elasticsearch.index.MockEngineFactoryPlugin; |
| 37 | +import org.elasticsearch.index.engine.Engine; |
| 38 | +import org.elasticsearch.index.shard.RemoveCorruptedShardDataCommandIT; |
| 39 | +import org.elasticsearch.index.shard.ShardId; |
| 40 | +import org.elasticsearch.index.shard.ShardPath; |
| 41 | +import org.elasticsearch.index.store.Store; |
| 42 | +import org.elasticsearch.indices.IndicesService; |
| 43 | +import org.elasticsearch.plugins.Plugin; |
| 44 | +import org.elasticsearch.test.DummyShardLock; |
| 45 | +import org.elasticsearch.test.ESIntegTestCase; |
| 46 | +import org.elasticsearch.test.InternalSettingsPlugin; |
| 47 | +import org.elasticsearch.test.InternalTestCluster; |
| 48 | +import org.elasticsearch.test.transport.MockTransportService; |
| 49 | + |
| 50 | +import java.io.IOException; |
| 51 | +import java.nio.file.Path; |
| 52 | +import java.util.Arrays; |
| 53 | +import java.util.Collection; |
| 54 | +import java.util.Set; |
| 55 | +import java.util.concurrent.ExecutionException; |
| 56 | +import java.util.stream.Collectors; |
| 57 | + |
| 58 | +import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; |
| 59 | +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; |
| 60 | +import static org.hamcrest.Matchers.equalTo; |
| 61 | +import static org.hamcrest.Matchers.greaterThan; |
| 62 | +import static org.hamcrest.Matchers.hasSize; |
| 63 | +import static org.hamcrest.Matchers.is; |
| 64 | +import static org.hamcrest.Matchers.not; |
| 65 | + |
| 66 | +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE, numDataNodes = 0) |
| 67 | +public class AllocationIdIT extends ESIntegTestCase { |
| 68 | + |
| 69 | + @Override |
| 70 | + protected Collection<Class<? extends Plugin>> nodePlugins() { |
| 71 | + return Arrays.asList(MockTransportService.TestPlugin.class, MockEngineFactoryPlugin.class, InternalSettingsPlugin.class); |
| 72 | + } |
| 73 | + |
| 74 | + public void testFailedRecoveryOnAllocateStalePrimaryRequiresAnotherAllocateStalePrimary() throws Exception { |
| 75 | + /* |
| 76 | + * Allocation id is put on start of shard while historyUUID is adjusted after recovery is done. |
| 77 | + * |
| 78 | + * If during execution of AllocateStalePrimary a proper allocation id is stored in allocation id set and recovery is failed |
| 79 | + * shard restart skips the stage where historyUUID is changed. |
| 80 | + * |
| 81 | + * That leads to situation where allocated stale primary and its replica belongs to the same historyUUID and |
| 82 | + * replica will receive operations after local checkpoint while documents before checkpoints could be significant different. |
| 83 | + * |
| 84 | + * Therefore, on AllocateStalePrimary we put some fake allocation id (no real one could be generated like that) |
| 85 | + * and any failure during recovery requires extra AllocateStalePrimary command to be executed. |
| 86 | + */ |
| 87 | + |
| 88 | + // initial set up |
| 89 | + final String indexName = "index42"; |
| 90 | + final String master = internalCluster().startMasterOnlyNode(); |
| 91 | + String node1 = internalCluster().startNode(); |
| 92 | + createIndex(indexName, Settings.builder() |
| 93 | + .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) |
| 94 | + .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1) |
| 95 | + .put(IndexSettings.INDEX_CHECK_ON_STARTUP.getKey(), "checksum").build()); |
| 96 | + final int numDocs = indexDocs(indexName, "foo", "bar"); |
| 97 | + final IndexSettings indexSettings = getIndexSettings(indexName, node1); |
| 98 | + final Set<String> allocationIds = getAllocationIds(indexName); |
| 99 | + final ShardId shardId = new ShardId(resolveIndex(indexName), 0); |
| 100 | + final Path indexPath = getIndexPath(node1, shardId); |
| 101 | + assertThat(allocationIds, hasSize(1)); |
| 102 | + final String historyUUID = historyUUID(node1, indexName); |
| 103 | + String node2 = internalCluster().startNode(); |
| 104 | + ensureGreen(indexName); |
| 105 | + internalCluster().assertSameDocIdsOnShards(); |
| 106 | + // initial set up is done |
| 107 | + |
| 108 | + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node1)); |
| 109 | + |
| 110 | + // index more docs to node2 that marks node1 as stale |
| 111 | + int numExtraDocs = indexDocs(indexName, "foo", "bar2"); |
| 112 | + assertHitCount(client(node2).prepareSearch(indexName).setQuery(matchAllQuery()).get(), numDocs + numExtraDocs); |
| 113 | + |
| 114 | + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node2)); |
| 115 | + |
| 116 | + // create fake corrupted marker on node1 |
| 117 | + putFakeCorruptionMarker(indexSettings, shardId, indexPath); |
| 118 | + |
| 119 | + // thanks to master node1 is out of sync |
| 120 | + node1 = internalCluster().startNode(); |
| 121 | + |
| 122 | + // there is only _stale_ primary |
| 123 | + checkNoValidShardCopy(indexName, shardId); |
| 124 | + |
| 125 | + // allocate stale primary |
| 126 | + client(node1).admin().cluster().prepareReroute() |
| 127 | + .add(new AllocateStalePrimaryAllocationCommand(indexName, 0, node1, true)) |
| 128 | + .get(); |
| 129 | + |
| 130 | + // allocation fails due to corruption marker |
| 131 | + assertBusy(() -> { |
| 132 | + final ClusterState state = client().admin().cluster().prepareState().get().getState(); |
| 133 | + final ShardRouting shardRouting = state.routingTable().index(indexName).shard(shardId.id()).primaryShard(); |
| 134 | + assertThat(shardRouting.state(), equalTo(ShardRoutingState.UNASSIGNED)); |
| 135 | + assertThat(shardRouting.unassignedInfo().getReason(), equalTo(UnassignedInfo.Reason.ALLOCATION_FAILED)); |
| 136 | + }); |
| 137 | + |
| 138 | + try(Store store = new Store(shardId, indexSettings, new SimpleFSDirectory(indexPath), new DummyShardLock(shardId))) { |
| 139 | + store.removeCorruptionMarker(); |
| 140 | + } |
| 141 | + |
| 142 | + // index is red: no any shard is allocated (allocation id is a fake id that does not match to anything) |
| 143 | + checkHealthStatus(indexName, ClusterHealthStatus.RED); |
| 144 | + checkNoValidShardCopy(indexName, shardId); |
| 145 | + |
| 146 | + internalCluster().restartNode(node1, InternalTestCluster.EMPTY_CALLBACK); |
| 147 | + |
| 148 | + // index is still red due to mismatch of allocation id |
| 149 | + checkHealthStatus(indexName, ClusterHealthStatus.RED); |
| 150 | + checkNoValidShardCopy(indexName, shardId); |
| 151 | + |
| 152 | + // no any valid shard is there; have to invoke AllocateStalePrimary again |
| 153 | + client().admin().cluster().prepareReroute() |
| 154 | + .add(new AllocateStalePrimaryAllocationCommand(indexName, 0, node1, true)) |
| 155 | + .get(); |
| 156 | + |
| 157 | + ensureYellow(indexName); |
| 158 | + |
| 159 | + // bring node2 back |
| 160 | + node2 = internalCluster().startNode(); |
| 161 | + ensureGreen(indexName); |
| 162 | + |
| 163 | + assertThat(historyUUID(node1, indexName), not(equalTo(historyUUID))); |
| 164 | + assertThat(historyUUID(node1, indexName), equalTo(historyUUID(node2, indexName))); |
| 165 | + |
| 166 | + internalCluster().assertSameDocIdsOnShards(); |
| 167 | + } |
| 168 | + |
| 169 | + public void checkHealthStatus(String indexName, ClusterHealthStatus healthStatus) { |
| 170 | + final ClusterHealthStatus indexHealthStatus = client().admin().cluster() |
| 171 | + .health(Requests.clusterHealthRequest(indexName)).actionGet().getStatus(); |
| 172 | + assertThat(indexHealthStatus, is(healthStatus)); |
| 173 | + } |
| 174 | + |
| 175 | + private int indexDocs(String indexName, Object ... source) throws InterruptedException, ExecutionException { |
| 176 | + // index some docs in several segments |
| 177 | + int numDocs = 0; |
| 178 | + for (int k = 0, attempts = randomIntBetween(5, 10); k < attempts; k++) { |
| 179 | + final int numExtraDocs = between(10, 100); |
| 180 | + IndexRequestBuilder[] builders = new IndexRequestBuilder[numExtraDocs]; |
| 181 | + for (int i = 0; i < builders.length; i++) { |
| 182 | + builders[i] = client().prepareIndex(indexName, "type").setSource(source); |
| 183 | + } |
| 184 | + |
| 185 | + indexRandom(true, false, true, Arrays.asList(builders)); |
| 186 | + numDocs += numExtraDocs; |
| 187 | + } |
| 188 | + |
| 189 | + return numDocs; |
| 190 | + } |
| 191 | + |
| 192 | + private Path getIndexPath(String nodeName, ShardId shardId) { |
| 193 | + final Set<Path> indexDirs = RemoveCorruptedShardDataCommandIT.getDirs(nodeName, shardId, ShardPath.INDEX_FOLDER_NAME); |
| 194 | + assertThat(indexDirs, hasSize(1)); |
| 195 | + return indexDirs.iterator().next(); |
| 196 | + } |
| 197 | + |
| 198 | + private Set<String> getAllocationIds(String indexName) { |
| 199 | + final ClusterState state = client().admin().cluster().prepareState().get().getState(); |
| 200 | + final Set<String> allocationIds = state.metaData().index(indexName).inSyncAllocationIds(0); |
| 201 | + return allocationIds; |
| 202 | + } |
| 203 | + |
| 204 | + private IndexSettings getIndexSettings(String indexName, String nodeName) { |
| 205 | + final IndicesService indicesService = internalCluster().getInstance(IndicesService.class, nodeName); |
| 206 | + final IndexService indexService = indicesService.indexService(resolveIndex(indexName)); |
| 207 | + return indexService.getIndexSettings(); |
| 208 | + } |
| 209 | + |
| 210 | + private String historyUUID(String node, String indexName) { |
| 211 | + final ShardStats[] shards = client(node).admin().indices().prepareStats(indexName).clear().get().getShards(); |
| 212 | + assertThat(shards.length, greaterThan(0)); |
| 213 | + final Set<String> historyUUIDs = Arrays.stream(shards) |
| 214 | + .map(shard -> shard.getCommitStats().getUserData().get(Engine.HISTORY_UUID_KEY)) |
| 215 | + .collect(Collectors.toSet()); |
| 216 | + assertThat(historyUUIDs, hasSize(1)); |
| 217 | + return historyUUIDs.iterator().next(); |
| 218 | + } |
| 219 | + |
| 220 | + private void putFakeCorruptionMarker(IndexSettings indexSettings, ShardId shardId, Path indexPath) throws IOException { |
| 221 | + try(Store store = new Store(shardId, indexSettings, new SimpleFSDirectory(indexPath), new DummyShardLock(shardId))) { |
| 222 | + store.markStoreCorrupted(new IOException("fake ioexception")); |
| 223 | + } |
| 224 | + } |
| 225 | + |
| 226 | + private void checkNoValidShardCopy(String indexName, ShardId shardId) throws Exception { |
| 227 | + final ClusterAllocationExplanation explanation = |
| 228 | + client().admin().cluster().prepareAllocationExplain() |
| 229 | + .setIndex(indexName).setShard(shardId.id()).setPrimary(true) |
| 230 | + .get().getExplanation(); |
| 231 | + |
| 232 | + final ShardAllocationDecision shardAllocationDecision = explanation.getShardAllocationDecision(); |
| 233 | + assertThat(shardAllocationDecision.isDecisionTaken(), equalTo(true)); |
| 234 | + assertThat(shardAllocationDecision.getAllocateDecision().getAllocationDecision(), |
| 235 | + equalTo(AllocationDecision.NO_VALID_SHARD_COPY)); |
| 236 | + } |
| 237 | + |
| 238 | +} |
0 commit comments