Skip to content

Commit 1390f36

Browse files
authored
[CCR] Only auto follow indices when all primary shards have started (elastic#35814)
This change adds an extra check that verifies that all primary shards have been started of an index that is about to be auto followed. If not all primary shards have been started for an index then the next auto follow run will try to follow to auto follow this index again. Closes elastic#35480
1 parent fbdfec4 commit 1390f36

File tree

2 files changed

+97
-22
lines changed

2 files changed

+97
-22
lines changed

x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/AutoFollowCoordinator.java

+10-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.elasticsearch.cluster.ClusterStateUpdateTask;
2020
import org.elasticsearch.cluster.metadata.IndexMetaData;
2121
import org.elasticsearch.cluster.metadata.MetaData;
22+
import org.elasticsearch.cluster.routing.IndexRoutingTable;
2223
import org.elasticsearch.cluster.service.ClusterService;
2324
import org.elasticsearch.common.collect.Tuple;
2425
import org.elasticsearch.common.settings.Settings;
@@ -164,6 +165,7 @@ void getLeaderClusterState(final String remoteCluster,
164165
final ClusterStateRequest request = new ClusterStateRequest();
165166
request.clear();
166167
request.metaData(true);
168+
request.routingTable(true);
167169
// TODO: set non-compliant status on auto-follow coordination that can be viewed via a stats API
168170
ccrLicenseChecker.checkRemoteClusterLicenseAndFetchClusterState(
169171
client,
@@ -367,7 +369,14 @@ static List<Index> getLeaderIndicesToFollow(String remoteCluster,
367369
List<Index> leaderIndicesToFollow = new ArrayList<>();
368370
for (IndexMetaData leaderIndexMetaData : leaderClusterState.getMetaData()) {
369371
if (autoFollowPattern.match(leaderIndexMetaData.getIndex().getName())) {
370-
if (followedIndexUUIDs.contains(leaderIndexMetaData.getIndex().getUUID()) == false) {
372+
IndexRoutingTable indexRoutingTable = leaderClusterState.routingTable().index(leaderIndexMetaData.getIndex());
373+
if (indexRoutingTable != null &&
374+
// Leader indices can be in the cluster state, but not all primary shards may be ready yet.
375+
// This checks ensures all primary shards have started, so that index following does not fail.
376+
// If not all primary shards are ready, then the next time the auto follow coordinator runs
377+
// this index will be auto followed.
378+
indexRoutingTable.allPrimaryShardsActive() &&
379+
followedIndexUUIDs.contains(leaderIndexMetaData.getIndex().getUUID()) == false) {
371380
// TODO: iterate over the indices in the followerClusterState and check whether a IndexMetaData
372381
// has a leader index uuid custom metadata entry that matches with uuid of leaderIndexMetaData variable
373382
// If so then handle it differently: not follow it, but just add an entry to

x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/action/AutoFollowCoordinatorTests.java

+87-21
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@
1111
import org.elasticsearch.cluster.ClusterState;
1212
import org.elasticsearch.cluster.metadata.IndexMetaData;
1313
import org.elasticsearch.cluster.metadata.MetaData;
14+
import org.elasticsearch.cluster.routing.IndexRoutingTable;
15+
import org.elasticsearch.cluster.routing.RoutingTable;
16+
import org.elasticsearch.cluster.routing.ShardRouting;
17+
import org.elasticsearch.cluster.routing.ShardRoutingState;
18+
import org.elasticsearch.cluster.routing.TestShardRouting;
1419
import org.elasticsearch.cluster.service.ClusterService;
1520
import org.elasticsearch.common.collect.Tuple;
1621
import org.elasticsearch.common.settings.Settings;
@@ -49,12 +54,7 @@ public void testAutoFollower() {
4954
Client client = mock(Client.class);
5055
when(client.getRemoteClusterClient(anyString())).thenReturn(client);
5156

52-
ClusterState leaderState = ClusterState.builder(new ClusterName("remote"))
53-
.metaData(MetaData.builder().put(IndexMetaData.builder("logs-20190101")
54-
.settings(settings(Version.CURRENT).put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true))
55-
.numberOfShards(1)
56-
.numberOfReplicas(0)))
57-
.build();
57+
ClusterState leaderState = createRemoteClusterState("logs-20190101");
5858

5959
AutoFollowPattern autoFollowPattern = new AutoFollowPattern("remote", Collections.singletonList("logs-*"),
6060
null, null, null, null, null, null, null, null, null, null, null);
@@ -168,13 +168,7 @@ void updateAutoFollowMetadata(Function<ClusterState, ClusterState> updateFunctio
168168
public void testAutoFollowerUpdateClusterStateFailure() {
169169
Client client = mock(Client.class);
170170
when(client.getRemoteClusterClient(anyString())).thenReturn(client);
171-
172-
ClusterState leaderState = ClusterState.builder(new ClusterName("remote"))
173-
.metaData(MetaData.builder().put(IndexMetaData.builder("logs-20190101")
174-
.settings(settings(Version.CURRENT).put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true))
175-
.numberOfShards(1)
176-
.numberOfReplicas(0)))
177-
.build();
171+
ClusterState leaderState = createRemoteClusterState("logs-20190101");
178172

179173
AutoFollowPattern autoFollowPattern = new AutoFollowPattern("remote", Collections.singletonList("logs-*"),
180174
null, null, null, null, null, null, null, null, null, null, null);
@@ -230,13 +224,7 @@ void updateAutoFollowMetadata(Function<ClusterState, ClusterState> updateFunctio
230224
public void testAutoFollowerCreateAndFollowApiCallFailure() {
231225
Client client = mock(Client.class);
232226
when(client.getRemoteClusterClient(anyString())).thenReturn(client);
233-
234-
ClusterState leaderState = ClusterState.builder(new ClusterName("remote"))
235-
.metaData(MetaData.builder().put(IndexMetaData.builder("logs-20190101")
236-
.settings(settings(Version.CURRENT).put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true))
237-
.numberOfShards(1)
238-
.numberOfReplicas(0)))
239-
.build();
227+
ClusterState leaderState = createRemoteClusterState("logs-20190101");
240228

241229
AutoFollowPattern autoFollowPattern = new AutoFollowPattern("remote", Collections.singletonList("logs-*"),
242230
null, null, null, null, null, null, null, null, null, null, null);
@@ -299,24 +287,39 @@ public void testGetLeaderIndicesToFollow() {
299287
new AutoFollowMetadata(Collections.singletonMap("remote", autoFollowPattern), Collections.emptyMap(), headers)))
300288
.build();
301289

290+
RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
302291
MetaData.Builder imdBuilder = MetaData.builder();
303292
for (int i = 0; i < 5; i++) {
293+
String indexName = "metrics-" + i;
304294
Settings.Builder builder = Settings.builder()
305295
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
306-
.put(IndexMetaData.SETTING_INDEX_UUID, "metrics-" + i)
296+
.put(IndexMetaData.SETTING_INDEX_UUID, indexName)
307297
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), i % 2 == 0);
308298
imdBuilder.put(IndexMetaData.builder("metrics-" + i)
309299
.settings(builder)
310300
.numberOfShards(1)
311301
.numberOfReplicas(0));
302+
303+
ShardRouting shardRouting =
304+
TestShardRouting.newShardRouting(indexName, 0, "1", true, ShardRoutingState.INITIALIZING).moveToStarted();
305+
IndexRoutingTable indexRoutingTable = IndexRoutingTable.builder(imdBuilder.get(indexName).getIndex())
306+
.addShard(shardRouting)
307+
.build();
308+
routingTableBuilder.add(indexRoutingTable);
312309
}
310+
313311
imdBuilder.put(IndexMetaData.builder("logs-0")
314312
.settings(settings(Version.CURRENT))
315313
.numberOfShards(1)
316314
.numberOfReplicas(0));
315+
ShardRouting shardRouting =
316+
TestShardRouting.newShardRouting("logs-0", 0, "1", true, ShardRoutingState.INITIALIZING).moveToStarted();
317+
IndexRoutingTable indexRoutingTable = IndexRoutingTable.builder(imdBuilder.get("logs-0").getIndex()).addShard(shardRouting).build();
318+
routingTableBuilder.add(indexRoutingTable);
317319

318320
ClusterState leaderState = ClusterState.builder(new ClusterName("remote"))
319321
.metaData(imdBuilder)
322+
.routingTable(routingTableBuilder.build())
320323
.build();
321324

322325
List<Index> result = AutoFollower.getLeaderIndicesToFollow("remote", autoFollowPattern, leaderState, followerState,
@@ -335,6 +338,52 @@ public void testGetLeaderIndicesToFollow() {
335338
assertThat(result.get(1).getName(), equalTo("metrics-4"));
336339
}
337340

341+
public void testGetLeaderIndicesToFollow_shardsNotStarted() {
342+
AutoFollowPattern autoFollowPattern = new AutoFollowPattern("remote", Collections.singletonList("*"),
343+
null, null, null, null, null, null, null, null, null, null, null);
344+
Map<String, Map<String, String>> headers = new HashMap<>();
345+
ClusterState followerState = ClusterState.builder(new ClusterName("remote"))
346+
.metaData(MetaData.builder().putCustom(AutoFollowMetadata.TYPE,
347+
new AutoFollowMetadata(Collections.singletonMap("remote", autoFollowPattern), Collections.emptyMap(), headers)))
348+
.build();
349+
350+
// 1 shard started and another not started:
351+
ClusterState leaderState = createRemoteClusterState("index1");
352+
MetaData.Builder mBuilder= MetaData.builder(leaderState.metaData());
353+
mBuilder.put(IndexMetaData.builder("index2")
354+
.settings(settings(Version.CURRENT).put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true))
355+
.numberOfShards(1)
356+
.numberOfReplicas(0));
357+
ShardRouting shardRouting =
358+
TestShardRouting.newShardRouting("index2", 0, "1", true, ShardRoutingState.INITIALIZING);
359+
IndexRoutingTable indexRoutingTable = IndexRoutingTable.builder(mBuilder.get("index2").getIndex()
360+
).addShard(shardRouting).build();
361+
leaderState = ClusterState.builder(leaderState.getClusterName())
362+
.metaData(mBuilder)
363+
.routingTable(RoutingTable.builder(leaderState.routingTable()).add(indexRoutingTable).build())
364+
.build();
365+
366+
List<Index> result = AutoFollower.getLeaderIndicesToFollow("remote", autoFollowPattern, leaderState, followerState,
367+
Collections.emptyList());
368+
assertThat(result.size(), equalTo(1));
369+
assertThat(result.get(0).getName(), equalTo("index1"));
370+
371+
// Start second shard:
372+
shardRouting = shardRouting.moveToStarted();
373+
indexRoutingTable = IndexRoutingTable.builder(leaderState.metaData().indices().get("index2").getIndex())
374+
.addShard(shardRouting).build();
375+
leaderState = ClusterState.builder(leaderState.getClusterName())
376+
.metaData(leaderState.metaData())
377+
.routingTable(RoutingTable.builder(leaderState.routingTable()).add(indexRoutingTable).build())
378+
.build();
379+
380+
result = AutoFollower.getLeaderIndicesToFollow("remote", autoFollowPattern, leaderState, followerState, Collections.emptyList());
381+
assertThat(result.size(), equalTo(2));
382+
result.sort(Comparator.comparing(Index::getName));
383+
assertThat(result.get(0).getName(), equalTo("index1"));
384+
assertThat(result.get(1).getName(), equalTo("index2"));
385+
}
386+
338387
public void testGetFollowerIndexName() {
339388
AutoFollowPattern autoFollowPattern = new AutoFollowPattern("remote", Collections.singletonList("metrics-*"), null, null,
340389
null, null, null, null, null, null, null, null, null);
@@ -408,4 +457,21 @@ public void testStats() {
408457
assertThat(autoFollowStats.getRecentAutoFollowErrors().get("_alias2:index2").getCause().getMessage(), equalTo("error"));
409458
}
410459

460+
private static ClusterState createRemoteClusterState(String indexName) {
461+
IndexMetaData indexMetaData = IndexMetaData.builder(indexName)
462+
.settings(settings(Version.CURRENT).put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true))
463+
.numberOfShards(1)
464+
.numberOfReplicas(0)
465+
.build();
466+
ClusterState.Builder csBuilder = ClusterState.builder(new ClusterName("remote"))
467+
.metaData(MetaData.builder().put(indexMetaData, true));
468+
469+
ShardRouting shardRouting =
470+
TestShardRouting.newShardRouting(indexName, 0, "1", true, ShardRoutingState.INITIALIZING).moveToStarted();
471+
IndexRoutingTable indexRoutingTable = IndexRoutingTable.builder(indexMetaData.getIndex()).addShard(shardRouting).build();
472+
csBuilder.routingTable(RoutingTable.builder().add(indexRoutingTable).build()).build();
473+
474+
return csBuilder.build();
475+
}
476+
411477
}

0 commit comments

Comments
 (0)