Skip to content

Commit 1a50af7

Browse files
committed
Do not close bad indices on startup (#39500)
With #17187, we verified IndexService creation during initial state recovery on the master and if the recovery failed the index was imported as closed, not allocating any shards. This was mainly done to prevent endless allocation loops and full log files on data-nodes when the indexmetadata contained broken settings / analyzers. Zen2 loads the cluster state eagerly, and this check currently runs on all nodes (not only the elected master), which can significantly slow down startup on data nodes. Furthermore, with replicated closed indices (#33888) on the horizon, importing the index as closed will no longer not allocate any shards. Fortunately, the original issue for endless allocation loops is no longer a problem due to #18467, where we limit the retries of failed allocations. The solution here is therefore to just undo #17187, as it's no longer necessary, and covered by #18467, which will solve the issue for Zen2 and replicated closed indices as well.
1 parent b9b46fd commit 1a50af7

File tree

6 files changed

+52
-67
lines changed

6 files changed

+52
-67
lines changed

server/src/main/java/org/elasticsearch/gateway/ClusterStateUpdaters.java

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@
3131
import org.elasticsearch.cluster.node.DiscoveryNodes;
3232
import org.elasticsearch.cluster.routing.RoutingTable;
3333
import org.elasticsearch.common.settings.ClusterSettings;
34-
import org.elasticsearch.index.Index;
35-
import org.elasticsearch.indices.IndicesService;
3634

3735
import java.util.Map;
3836

@@ -92,26 +90,6 @@ static ClusterState recoverClusterBlocks(final ClusterState state) {
9290
return ClusterState.builder(state).blocks(blocks).build();
9391
}
9492

95-
static ClusterState closeBadIndices(final ClusterState clusterState, final IndicesService indicesService) {
96-
final MetaData.Builder builder = MetaData.builder(clusterState.metaData()).removeAllIndices();
97-
98-
for (IndexMetaData metaData : clusterState.metaData()) {
99-
try {
100-
if (metaData.getState() == IndexMetaData.State.OPEN) {
101-
// verify that we can actually create this index - if not we recover it as closed with lots of warn logs
102-
indicesService.verifyIndexMetadata(metaData, metaData);
103-
}
104-
} catch (final Exception e) {
105-
final Index electedIndex = metaData.getIndex();
106-
logger.warn(() -> new ParameterizedMessage("recovering index {} failed - recovering as closed", electedIndex), e);
107-
metaData = IndexMetaData.builder(metaData).state(IndexMetaData.State.CLOSE).build();
108-
}
109-
builder.put(metaData, false);
110-
}
111-
112-
return ClusterState.builder(clusterState).metaData(builder).build();
113-
}
114-
11593
static ClusterState updateRoutingTable(final ClusterState state) {
11694
// initialize all index routing tables as empty
11795
final RoutingTable.Builder routingTableBuilder = RoutingTable.builder(state.routingTable());

server/src/main/java/org/elasticsearch/gateway/Gateway.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,6 @@ public void performStateRecovery(final GatewayStateRecoveredListener listener) t
126126
}
127127
ClusterState recoveredState = Function.<ClusterState>identity()
128128
.andThen(state -> ClusterStateUpdaters.upgradeAndArchiveUnknownOrInvalidSettings(state, clusterService.getClusterSettings()))
129-
.andThen(state -> ClusterStateUpdaters.closeBadIndices(state, indicesService))
130129
.apply(ClusterState.builder(clusterService.getClusterName()).metaData(metaDataBuilder).build());
131130

132131
listener.onSuccess(recoveredState);

server/src/main/java/org/elasticsearch/gateway/GatewayMetaState.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,6 @@ public void applyClusterStateUpdaters() {
137137
.andThen(ClusterStateUpdaters::addStateNotRecoveredBlock)
138138
.andThen(state -> ClusterStateUpdaters.setLocalNode(state, transportService.getLocalNode()))
139139
.andThen(state -> ClusterStateUpdaters.upgradeAndArchiveUnknownOrInvalidSettings(state, clusterService.getClusterSettings()))
140-
.andThen(state -> ClusterStateUpdaters.closeBadIndices(state, indicesService))
141140
.andThen(ClusterStateUpdaters::recoverClusterBlocks)
142141
.apply(previousClusterState);
143142
}

server/src/test/java/org/elasticsearch/action/admin/indices/create/CreateIndexIT.java

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@
3434
import org.elasticsearch.cluster.metadata.MappingMetaData;
3535
import org.elasticsearch.cluster.metadata.MetaData;
3636
import org.elasticsearch.cluster.node.DiscoveryNode;
37+
import org.elasticsearch.cluster.routing.IndexRoutingTable;
38+
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
39+
import org.elasticsearch.cluster.routing.RoutingTable;
40+
import org.elasticsearch.cluster.routing.UnassignedInfo;
3741
import org.elasticsearch.common.collect.ImmutableOpenMap;
3842
import org.elasticsearch.common.settings.Settings;
3943
import org.elasticsearch.common.unit.TimeValue;
@@ -50,6 +54,7 @@
5054
import java.util.HashSet;
5155
import java.util.Set;
5256
import java.util.concurrent.CountDownLatch;
57+
import java.util.concurrent.TimeUnit;
5358
import java.util.concurrent.atomic.AtomicInteger;
5459
import java.util.function.BiFunction;
5560

@@ -60,6 +65,7 @@
6065
import static org.hamcrest.Matchers.allOf;
6166
import static org.hamcrest.Matchers.containsString;
6267
import static org.hamcrest.Matchers.equalTo;
68+
import static org.hamcrest.Matchers.greaterThan;
6369
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
6470
import static org.hamcrest.Matchers.hasToString;
6571
import static org.hamcrest.Matchers.instanceOf;
@@ -419,11 +425,20 @@ public Settings onNodeStopped(String nodeName) throws Exception {
419425
return super.onNodeStopped(nodeName);
420426
}
421427
});
422-
ensureGreen(metaData.getIndex().getName()); // we have to wait for the index to show up in the metadata or we will fail in a race
423-
final ClusterState stateAfterRestart = client().admin().cluster().prepareState().get().getState();
424428

425-
// the index should not be open after we restart and recover the broken index metadata
426-
assertThat(stateAfterRestart.getMetaData().index(metaData.getIndex()).getState(), equalTo(IndexMetaData.State.CLOSE));
429+
// check that the cluster does not keep reallocating shards
430+
assertBusy(() -> {
431+
final RoutingTable routingTable = client().admin().cluster().prepareState().get().getState().routingTable();
432+
final IndexRoutingTable indexRoutingTable = routingTable.index("test");
433+
assertNotNull(indexRoutingTable);
434+
for (IndexShardRoutingTable shardRoutingTable : indexRoutingTable) {
435+
assertTrue(shardRoutingTable.primaryShard().unassigned());
436+
assertEquals(UnassignedInfo.AllocationStatus.DECIDERS_NO,
437+
shardRoutingTable.primaryShard().unassignedInfo().getLastAllocationStatus());
438+
assertThat(shardRoutingTable.primaryShard().unassignedInfo().getNumFailedAllocations(), greaterThan(0));
439+
}
440+
}, 60, TimeUnit.SECONDS);
441+
client().admin().indices().prepareClose("test").get();
427442

428443
// try to open the index
429444
final ElasticsearchException e =

server/src/test/java/org/elasticsearch/gateway/ClusterStateUpdatersTests.java

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,8 @@
3333
import org.elasticsearch.common.settings.Settings;
3434
import org.elasticsearch.common.util.set.Sets;
3535
import org.elasticsearch.index.Index;
36-
import org.elasticsearch.indices.IndicesService;
3736
import org.elasticsearch.test.ESTestCase;
3837

39-
import java.io.IOException;
4038
import java.util.Arrays;
4139
import java.util.Collections;
4240
import java.util.Set;
@@ -47,7 +45,6 @@
4745

4846
import static org.elasticsearch.cluster.metadata.MetaData.CLUSTER_READ_ONLY_BLOCK;
4947
import static org.elasticsearch.gateway.ClusterStateUpdaters.addStateNotRecoveredBlock;
50-
import static org.elasticsearch.gateway.ClusterStateUpdaters.closeBadIndices;
5148
import static org.elasticsearch.gateway.ClusterStateUpdaters.hideStateIfNotRecovered;
5249
import static org.elasticsearch.gateway.ClusterStateUpdaters.mixCurrentStateAndRecoveredState;
5350
import static org.elasticsearch.gateway.ClusterStateUpdaters.recoverClusterBlocks;
@@ -59,8 +56,6 @@
5956
import static org.hamcrest.Matchers.equalTo;
6057
import static org.hamcrest.Matchers.is;
6158
import static org.hamcrest.Matchers.not;
62-
import static org.mockito.Mockito.doThrow;
63-
import static org.mockito.Mockito.mock;
6459

6560
public class ClusterStateUpdatersTests extends ESTestCase {
6661

@@ -201,32 +196,6 @@ public void testAddStateNotRecoveredBlock() {
201196
assertTrue(newState.blocks().hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK));
202197
}
203198

204-
public void testCloseBadIndices() throws IOException {
205-
final IndicesService indicesService = mock(IndicesService.class);
206-
final IndexMetaData good = createIndexMetaData("good", Settings.EMPTY);
207-
final IndexMetaData bad = createIndexMetaData("bad", Settings.EMPTY);
208-
final IndexMetaData ugly = IndexMetaData.builder(createIndexMetaData("ugly", Settings.EMPTY))
209-
.state(IndexMetaData.State.CLOSE)
210-
.build();
211-
212-
final ClusterState initialState = ClusterState
213-
.builder(ClusterState.EMPTY_STATE)
214-
.metaData(MetaData.builder()
215-
.put(good, false)
216-
.put(bad, false)
217-
.put(ugly, false)
218-
.build())
219-
.build();
220-
221-
doThrow(new RuntimeException("test")).when(indicesService).verifyIndexMetadata(bad, bad);
222-
doThrow(new AssertionError("ugly index is already closed")).when(indicesService).verifyIndexMetadata(ugly, ugly);
223-
224-
final ClusterState newState = closeBadIndices(initialState, indicesService);
225-
assertThat(newState.metaData().index(good.getIndex()).getState(), equalTo(IndexMetaData.State.OPEN));
226-
assertThat(newState.metaData().index(bad.getIndex()).getState(), equalTo(IndexMetaData.State.CLOSE));
227-
assertThat(newState.metaData().index(ugly.getIndex()).getState(), equalTo(IndexMetaData.State.CLOSE));
228-
}
229-
230199
public void testUpdateRoutingTable() {
231200
final int numOfShards = randomIntBetween(1, 10);
232201

server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,11 @@
3434
import org.elasticsearch.cluster.metadata.IndexMetaData;
3535
import org.elasticsearch.cluster.metadata.MappingMetaData;
3636
import org.elasticsearch.cluster.metadata.MetaData;
37+
import org.elasticsearch.cluster.routing.IndexRoutingTable;
38+
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
39+
import org.elasticsearch.cluster.routing.RoutingTable;
3740
import org.elasticsearch.cluster.routing.ShardRoutingState;
41+
import org.elasticsearch.cluster.routing.UnassignedInfo;
3842
import org.elasticsearch.common.Priority;
3943
import org.elasticsearch.common.settings.Settings;
4044
import org.elasticsearch.common.xcontent.XContentFactory;
@@ -51,12 +55,14 @@
5155

5256
import java.io.IOException;
5357
import java.util.List;
58+
import java.util.concurrent.TimeUnit;
5459

5560
import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE;
5661
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
5762
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
5863
import static org.hamcrest.Matchers.containsString;
5964
import static org.hamcrest.Matchers.equalTo;
65+
import static org.hamcrest.Matchers.greaterThan;
6066
import static org.hamcrest.Matchers.nullValue;
6167

6268
@ClusterScope(scope = Scope.TEST, numDataNodes = 0)
@@ -369,9 +375,20 @@ public Settings onNodeStopped(String nodeName) throws Exception {
369375
}
370376
});
371377

372-
// ensureGreen(closedIndex) waits for the index to show up in the metadata
373-
// this is crucial otherwise the state call below might not contain the index yet
374-
ensureGreen(metaData.getIndex().getName());
378+
// check that the cluster does not keep reallocating shards
379+
assertBusy(() -> {
380+
final RoutingTable routingTable = client().admin().cluster().prepareState().get().getState().routingTable();
381+
final IndexRoutingTable indexRoutingTable = routingTable.index("test");
382+
assertNotNull(indexRoutingTable);
383+
for (IndexShardRoutingTable shardRoutingTable : indexRoutingTable) {
384+
assertTrue(shardRoutingTable.primaryShard().unassigned());
385+
assertEquals(UnassignedInfo.AllocationStatus.DECIDERS_NO,
386+
shardRoutingTable.primaryShard().unassignedInfo().getLastAllocationStatus());
387+
assertThat(shardRoutingTable.primaryShard().unassignedInfo().getNumFailedAllocations(), greaterThan(0));
388+
}
389+
}, 60, TimeUnit.SECONDS);
390+
client().admin().indices().prepareClose("test").get();
391+
375392
state = client().admin().cluster().prepareState().get().getState();
376393
assertEquals(IndexMetaData.State.CLOSE, state.getMetaData().index(metaData.getIndex()).getState());
377394
assertEquals("classic", state.getMetaData().index(metaData.getIndex()).getSettings().get("archived.index.similarity.BM25.type"));
@@ -432,11 +449,19 @@ public Settings onNodeStopped(String nodeName) throws Exception {
432449
}
433450
});
434451

435-
// ensureGreen(closedIndex) waits for the index to show up in the metadata
436-
// this is crucial otherwise the state call below might not contain the index yet
437-
ensureGreen(metaData.getIndex().getName());
438-
state = client().admin().cluster().prepareState().get().getState();
439-
assertEquals(IndexMetaData.State.CLOSE, state.getMetaData().index(metaData.getIndex()).getState());
452+
// check that the cluster does not keep reallocating shards
453+
assertBusy(() -> {
454+
final RoutingTable routingTable = client().admin().cluster().prepareState().get().getState().routingTable();
455+
final IndexRoutingTable indexRoutingTable = routingTable.index("test");
456+
assertNotNull(indexRoutingTable);
457+
for (IndexShardRoutingTable shardRoutingTable : indexRoutingTable) {
458+
assertTrue(shardRoutingTable.primaryShard().unassigned());
459+
assertEquals(UnassignedInfo.AllocationStatus.DECIDERS_NO,
460+
shardRoutingTable.primaryShard().unassignedInfo().getLastAllocationStatus());
461+
assertThat(shardRoutingTable.primaryShard().unassignedInfo().getNumFailedAllocations(), greaterThan(0));
462+
}
463+
}, 60, TimeUnit.SECONDS);
464+
client().admin().indices().prepareClose("test").get();
440465

441466
// try to open it with the broken setting - fail again!
442467
ElasticsearchException ex = expectThrows(ElasticsearchException.class, () -> client().admin().indices().prepareOpen("test").get());

0 commit comments

Comments
 (0)