Skip to content

Commit 1f72f2e

Browse files
authored
Propagate last node to reinitialized routing tables (#91549)
When closing or opening an index, or restoring a snapshot over a closed index, we reinitialize its routing table from scratch and expect the gateway allocators to select the appropriate node for each shard copy. With this commit we also keep track of the last-allocated node ID for each copy which makes it more likely that the desired balance of these shards remains unchanged too. Closes #91472
1 parent 089ee1d commit 1f72f2e

File tree

4 files changed

+265
-26
lines changed

4 files changed

+265
-26
lines changed

server/src/main/java/org/elasticsearch/cluster/routing/IndexRoutingTable.java

Lines changed: 70 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -340,35 +340,35 @@ public Builder(Index index) {
340340
* Initializes a new empty index, as if it was created from an API.
341341
*/
342342
public Builder initializeAsNew(IndexMetadata indexMetadata) {
343-
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null));
343+
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null), null);
344344
}
345345

346346
/**
347347
* Initializes an existing index.
348348
*/
349349
public Builder initializeAsRecovery(IndexMetadata indexMetadata) {
350-
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.CLUSTER_RECOVERED, null));
350+
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.CLUSTER_RECOVERED, null), null);
351351
}
352352

353353
/**
354354
* Initializes a new index caused by dangling index imported.
355355
*/
356356
public Builder initializeAsFromDangling(IndexMetadata indexMetadata) {
357-
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.DANGLING_INDEX_IMPORTED, null));
357+
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.DANGLING_INDEX_IMPORTED, null), null);
358358
}
359359

360360
/**
361361
* Initializes a new empty index, as a result of opening a closed index.
362362
*/
363-
public Builder initializeAsFromCloseToOpen(IndexMetadata indexMetadata) {
364-
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.INDEX_REOPENED, null));
363+
public Builder initializeAsFromCloseToOpen(IndexMetadata indexMetadata, IndexRoutingTable indexRoutingTable) {
364+
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.INDEX_REOPENED, null), indexRoutingTable);
365365
}
366366

367367
/**
368368
* Initializes a new empty index, as a result of closing an opened index.
369369
*/
370-
public Builder initializeAsFromOpenToClose(IndexMetadata indexMetadata) {
371-
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CLOSED, null));
370+
public Builder initializeAsFromOpenToClose(IndexMetadata indexMetadata, IndexRoutingTable indexRoutingTable) {
371+
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CLOSED, null), indexRoutingTable);
372372
}
373373

374374
/**
@@ -387,13 +387,17 @@ public Builder initializeAsNewRestore(
387387
+ recoverySource.snapshot().getSnapshotId().getName()
388388
+ "]"
389389
);
390-
return initializeAsRestore(indexMetadata, recoverySource, ignoreShards, true, unassignedInfo);
390+
return initializeAsRestore(indexMetadata, recoverySource, ignoreShards, true, unassignedInfo, null);
391391
}
392392

393393
/**
394394
* Initializes an existing index, to be restored from a snapshot
395395
*/
396-
public Builder initializeAsRestore(IndexMetadata indexMetadata, SnapshotRecoverySource recoverySource) {
396+
public Builder initializeAsRestore(
397+
IndexMetadata indexMetadata,
398+
SnapshotRecoverySource recoverySource,
399+
IndexRoutingTable previousIndexRoutingTable
400+
) {
397401
final UnassignedInfo unassignedInfo = new UnassignedInfo(
398402
UnassignedInfo.Reason.EXISTING_INDEX_RESTORED,
399403
"restore_source["
@@ -402,7 +406,7 @@ public Builder initializeAsRestore(IndexMetadata indexMetadata, SnapshotRecovery
402406
+ recoverySource.snapshot().getSnapshotId().getName()
403407
+ "]"
404408
);
405-
return initializeAsRestore(indexMetadata, recoverySource, null, false, unassignedInfo);
409+
return initializeAsRestore(indexMetadata, recoverySource, null, false, unassignedInfo, previousIndexRoutingTable);
406410
}
407411

408412
/**
@@ -413,7 +417,8 @@ private Builder initializeAsRestore(
413417
SnapshotRecoverySource recoverySource,
414418
Set<Integer> ignoreShards,
415419
boolean asNew,
416-
UnassignedInfo unassignedInfo
420+
UnassignedInfo unassignedInfo,
421+
@Nullable IndexRoutingTable previousIndexRoutingTable
417422
) {
418423
assert indexMetadata.getIndex().equals(index);
419424
if (shards != null) {
@@ -422,6 +427,7 @@ private Builder initializeAsRestore(
422427
shards = new IndexShardRoutingTable.Builder[indexMetadata.getNumberOfShards()];
423428
for (int shardNumber = 0; shardNumber < indexMetadata.getNumberOfShards(); shardNumber++) {
424429
ShardId shardId = new ShardId(index, shardNumber);
430+
final var previousNodes = getPreviousNodes(previousIndexRoutingTable, shardNumber);
425431
IndexShardRoutingTable.Builder indexShardRoutingBuilder = IndexShardRoutingTable.builder(shardId);
426432
for (int i = 0; i <= indexMetadata.getNumberOfReplicas(); i++) {
427433
boolean primary = i == 0;
@@ -441,7 +447,7 @@ private Builder initializeAsRestore(
441447
shardId,
442448
primary,
443449
primary ? recoverySource : PeerRecoverySource.INSTANCE,
444-
unassignedInfo
450+
withLastAllocatedNodeId(unassignedInfo, previousNodes, i)
445451
)
446452
);
447453
}
@@ -454,14 +460,20 @@ private Builder initializeAsRestore(
454460
/**
455461
* Initializes a new empty index, with an option to control if its from an API or not.
456462
*/
457-
private Builder initializeEmpty(IndexMetadata indexMetadata, UnassignedInfo unassignedInfo) {
463+
private Builder initializeEmpty(
464+
IndexMetadata indexMetadata,
465+
UnassignedInfo unassignedInfo,
466+
@Nullable IndexRoutingTable previousIndexRoutingTable
467+
) {
458468
assert indexMetadata.getIndex().equals(index);
469+
assert previousIndexRoutingTable == null || previousIndexRoutingTable.size() == indexMetadata.getNumberOfShards();
459470
if (shards != null) {
460471
throw new IllegalStateException("trying to initialize an index with fresh shards, but already has shards created");
461472
}
462473
shards = new IndexShardRoutingTable.Builder[indexMetadata.getNumberOfShards()];
463474
for (int shardNumber = 0; shardNumber < indexMetadata.getNumberOfShards(); shardNumber++) {
464475
ShardId shardId = new ShardId(index, shardNumber);
476+
final var previousNodes = getPreviousNodes(previousIndexRoutingTable, shardNumber);
465477
final RecoverySource primaryRecoverySource;
466478
if (indexMetadata.inSyncAllocationIds(shardNumber).isEmpty() == false) {
467479
// we have previous valid copies for this shard. use them for recovery
@@ -481,7 +493,7 @@ private Builder initializeEmpty(IndexMetadata indexMetadata, UnassignedInfo unas
481493
shardId,
482494
primary,
483495
primary ? primaryRecoverySource : PeerRecoverySource.INSTANCE,
484-
unassignedInfo
496+
withLastAllocatedNodeId(unassignedInfo, previousNodes, i)
485497
)
486498
);
487499
}
@@ -490,6 +502,50 @@ private Builder initializeEmpty(IndexMetadata indexMetadata, UnassignedInfo unas
490502
return this;
491503
}
492504

505+
private static List<String> getPreviousNodes(@Nullable IndexRoutingTable previousIndexRoutingTable, int shardId) {
506+
if (previousIndexRoutingTable == null) {
507+
return null;
508+
}
509+
final var previousShardRoutingTable = previousIndexRoutingTable.shard(shardId);
510+
if (previousShardRoutingTable == null) {
511+
return null;
512+
}
513+
final var primaryNodeId = previousShardRoutingTable.primaryShard().currentNodeId();
514+
if (primaryNodeId == null) {
515+
return null;
516+
}
517+
final var previousNodes = new ArrayList<String>(previousShardRoutingTable.size());
518+
previousNodes.add(primaryNodeId); // primary is recreated first, so re-use its location
519+
for (final var assignedShard : previousShardRoutingTable.assignedShards()) {
520+
if (assignedShard.initializing() && assignedShard.relocatingNodeId() != null) {
521+
continue;
522+
}
523+
final var currentNodeId = assignedShard.currentNodeId();
524+
assert currentNodeId != null;
525+
if (primaryNodeId.equals(currentNodeId) == false) {
526+
previousNodes.add(currentNodeId);
527+
}
528+
}
529+
return previousNodes;
530+
}
531+
532+
private static UnassignedInfo withLastAllocatedNodeId(UnassignedInfo unassignedInfo, List<String> previousNodes, int shardCopy) {
533+
return previousNodes == null || previousNodes.size() <= shardCopy
534+
? unassignedInfo
535+
: new UnassignedInfo(
536+
unassignedInfo.getReason(),
537+
unassignedInfo.getMessage(),
538+
unassignedInfo.getFailure(),
539+
unassignedInfo.getNumFailedAllocations(),
540+
unassignedInfo.getUnassignedTimeInNanos(),
541+
unassignedInfo.getUnassignedTimeInMillis(),
542+
unassignedInfo.isDelayed(),
543+
unassignedInfo.getLastAllocationStatus(),
544+
unassignedInfo.getFailedNodeIds(),
545+
previousNodes.get(shardCopy)
546+
);
547+
}
548+
493549
public Builder addReplica() {
494550
assert shards != null;
495551
for (IndexShardRoutingTable.Builder existing : shards) {

server/src/main/java/org/elasticsearch/cluster/routing/RoutingTable.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -537,7 +537,7 @@ public Builder addAsFromDangling(IndexMetadata indexMetadata) {
537537
public Builder addAsFromCloseToOpen(IndexMetadata indexMetadata) {
538538
if (indexMetadata.getState() == IndexMetadata.State.OPEN) {
539539
IndexRoutingTable.Builder indexRoutingBuilder = new IndexRoutingTable.Builder(indexMetadata.getIndex())
540-
.initializeAsFromCloseToOpen(indexMetadata);
540+
.initializeAsFromCloseToOpen(indexMetadata, indicesRouting.get(indexMetadata.getIndex().getName()));
541541
add(indexRoutingBuilder);
542542
}
543543
return this;
@@ -546,14 +546,15 @@ public Builder addAsFromCloseToOpen(IndexMetadata indexMetadata) {
546546
public Builder addAsFromOpenToClose(IndexMetadata indexMetadata) {
547547
assert isIndexVerifiedBeforeClosed(indexMetadata);
548548
IndexRoutingTable.Builder indexRoutingBuilder = new IndexRoutingTable.Builder(indexMetadata.getIndex())
549-
.initializeAsFromOpenToClose(indexMetadata);
549+
.initializeAsFromOpenToClose(indexMetadata, indicesRouting.get(indexMetadata.getIndex().getName()));
550550
return add(indexRoutingBuilder);
551551
}
552552

553553
public Builder addAsRestore(IndexMetadata indexMetadata, SnapshotRecoverySource recoverySource) {
554554
IndexRoutingTable.Builder indexRoutingBuilder = new IndexRoutingTable.Builder(indexMetadata.getIndex()).initializeAsRestore(
555555
indexMetadata,
556-
recoverySource
556+
recoverySource,
557+
indicesRouting.get(indexMetadata.getIndex().getName())
557558
);
558559
add(indexRoutingBuilder);
559560
return this;

server/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,9 @@ public String shortSummary() {
492492
sb.append(", failed_nodes[").append(failedNodeIds).append("]");
493493
}
494494
sb.append(", delayed=").append(delayed);
495+
if (lastAllocatedNodeId != null) {
496+
sb.append(", last_node[").append(lastAllocatedNodeId).append("]");
497+
}
495498
String details = getDetails();
496499

497500
if (details != null) {

0 commit comments

Comments
 (0)