Skip to content

Propagate last node to reinitialized routing tables #91549

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -340,35 +340,35 @@ public Builder(Index index) {
* Initializes a new empty index, as if it was created from an API.
*/
public Builder initializeAsNew(IndexMetadata indexMetadata) {
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null));
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null), null);
}

/**
* Initializes an existing index.
*/
public Builder initializeAsRecovery(IndexMetadata indexMetadata) {
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.CLUSTER_RECOVERED, null));
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.CLUSTER_RECOVERED, null), null);
}

/**
* Initializes a new index caused by dangling index imported.
*/
public Builder initializeAsFromDangling(IndexMetadata indexMetadata) {
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.DANGLING_INDEX_IMPORTED, null));
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.DANGLING_INDEX_IMPORTED, null), null);
}

/**
* Initializes a new empty index, as a result of opening a closed index.
*/
public Builder initializeAsFromCloseToOpen(IndexMetadata indexMetadata) {
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.INDEX_REOPENED, null));
public Builder initializeAsFromCloseToOpen(IndexMetadata indexMetadata, IndexRoutingTable indexRoutingTable) {
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.INDEX_REOPENED, null), indexRoutingTable);
}

/**
* Initializes a new empty index, as a result of closing an opened index.
*/
public Builder initializeAsFromOpenToClose(IndexMetadata indexMetadata) {
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CLOSED, null));
public Builder initializeAsFromOpenToClose(IndexMetadata indexMetadata, IndexRoutingTable indexRoutingTable) {
return initializeEmpty(indexMetadata, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CLOSED, null), indexRoutingTable);
}

/**
Expand All @@ -387,13 +387,17 @@ public Builder initializeAsNewRestore(
+ recoverySource.snapshot().getSnapshotId().getName()
+ "]"
);
return initializeAsRestore(indexMetadata, recoverySource, ignoreShards, true, unassignedInfo);
return initializeAsRestore(indexMetadata, recoverySource, ignoreShards, true, unassignedInfo, null);
}

/**
* Initializes an existing index, to be restored from a snapshot
*/
public Builder initializeAsRestore(IndexMetadata indexMetadata, SnapshotRecoverySource recoverySource) {
public Builder initializeAsRestore(
IndexMetadata indexMetadata,
SnapshotRecoverySource recoverySource,
IndexRoutingTable previousIndexRoutingTable
) {
final UnassignedInfo unassignedInfo = new UnassignedInfo(
UnassignedInfo.Reason.EXISTING_INDEX_RESTORED,
"restore_source["
Expand All @@ -402,7 +406,7 @@ public Builder initializeAsRestore(IndexMetadata indexMetadata, SnapshotRecovery
+ recoverySource.snapshot().getSnapshotId().getName()
+ "]"
);
return initializeAsRestore(indexMetadata, recoverySource, null, false, unassignedInfo);
return initializeAsRestore(indexMetadata, recoverySource, null, false, unassignedInfo, previousIndexRoutingTable);
}

/**
Expand All @@ -413,7 +417,8 @@ private Builder initializeAsRestore(
SnapshotRecoverySource recoverySource,
Set<Integer> ignoreShards,
boolean asNew,
UnassignedInfo unassignedInfo
UnassignedInfo unassignedInfo,
@Nullable IndexRoutingTable previousIndexRoutingTable
) {
assert indexMetadata.getIndex().equals(index);
if (shards != null) {
Expand All @@ -422,6 +427,7 @@ private Builder initializeAsRestore(
shards = new IndexShardRoutingTable.Builder[indexMetadata.getNumberOfShards()];
for (int shardNumber = 0; shardNumber < indexMetadata.getNumberOfShards(); shardNumber++) {
ShardId shardId = new ShardId(index, shardNumber);
final var previousNodes = getPreviousNodes(previousIndexRoutingTable, shardNumber);
IndexShardRoutingTable.Builder indexShardRoutingBuilder = IndexShardRoutingTable.builder(shardId);
for (int i = 0; i <= indexMetadata.getNumberOfReplicas(); i++) {
boolean primary = i == 0;
Expand All @@ -441,7 +447,7 @@ private Builder initializeAsRestore(
shardId,
primary,
primary ? recoverySource : PeerRecoverySource.INSTANCE,
unassignedInfo
withLastAllocatedNodeId(unassignedInfo, previousNodes, i)
)
);
}
Expand All @@ -454,14 +460,20 @@ private Builder initializeAsRestore(
/**
* Initializes a new empty index, with an option to control if its from an API or not.
*/
private Builder initializeEmpty(IndexMetadata indexMetadata, UnassignedInfo unassignedInfo) {
private Builder initializeEmpty(
IndexMetadata indexMetadata,
UnassignedInfo unassignedInfo,
@Nullable IndexRoutingTable previousIndexRoutingTable
) {
assert indexMetadata.getIndex().equals(index);
assert previousIndexRoutingTable == null || previousIndexRoutingTable.size() == indexMetadata.getNumberOfShards();
if (shards != null) {
throw new IllegalStateException("trying to initialize an index with fresh shards, but already has shards created");
}
shards = new IndexShardRoutingTable.Builder[indexMetadata.getNumberOfShards()];
for (int shardNumber = 0; shardNumber < indexMetadata.getNumberOfShards(); shardNumber++) {
ShardId shardId = new ShardId(index, shardNumber);
final var previousNodes = getPreviousNodes(previousIndexRoutingTable, shardNumber);
final RecoverySource primaryRecoverySource;
if (indexMetadata.inSyncAllocationIds(shardNumber).isEmpty() == false) {
// we have previous valid copies for this shard. use them for recovery
Expand All @@ -481,7 +493,7 @@ private Builder initializeEmpty(IndexMetadata indexMetadata, UnassignedInfo unas
shardId,
primary,
primary ? primaryRecoverySource : PeerRecoverySource.INSTANCE,
unassignedInfo
withLastAllocatedNodeId(unassignedInfo, previousNodes, i)
)
);
}
Expand All @@ -490,6 +502,50 @@ private Builder initializeEmpty(IndexMetadata indexMetadata, UnassignedInfo unas
return this;
}

private static List<String> getPreviousNodes(@Nullable IndexRoutingTable previousIndexRoutingTable, int shardId) {
if (previousIndexRoutingTable == null) {
return null;
}
final var previousShardRoutingTable = previousIndexRoutingTable.shard(shardId);
if (previousShardRoutingTable == null) {
return null;
}
final var primaryNodeId = previousShardRoutingTable.primaryShard().currentNodeId();
if (primaryNodeId == null) {
return null;
}
final var previousNodes = new ArrayList<String>(previousShardRoutingTable.size());
previousNodes.add(primaryNodeId); // primary is recreated first, so re-use its location
for (final var assignedShard : previousShardRoutingTable.assignedShards()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also includes the target of relocations. I wonder if we should only look at active shards, since anything less will anyway not be considered good enough by the gateway allocator?

The problem I see with this is that if a relocation is ongoing, we risk a copy having a last allocated node id that is much worse than it could be (i.e., a node that only has just started the recovery)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, thanks - see bd12ab9.

if (assignedShard.initializing() && assignedShard.relocatingNodeId() != null) {
continue;
}
final var currentNodeId = assignedShard.currentNodeId();
assert currentNodeId != null;
if (primaryNodeId.equals(currentNodeId) == false) {
previousNodes.add(currentNodeId);
}
}
return previousNodes;
}

private static UnassignedInfo withLastAllocatedNodeId(UnassignedInfo unassignedInfo, List<String> previousNodes, int shardCopy) {
return previousNodes == null || previousNodes.size() <= shardCopy
? unassignedInfo
: new UnassignedInfo(
unassignedInfo.getReason(),
unassignedInfo.getMessage(),
unassignedInfo.getFailure(),
unassignedInfo.getNumFailedAllocations(),
unassignedInfo.getUnassignedTimeInNanos(),
unassignedInfo.getUnassignedTimeInMillis(),
unassignedInfo.isDelayed(),
unassignedInfo.getLastAllocationStatus(),
unassignedInfo.getFailedNodeIds(),
previousNodes.get(shardCopy)
);
}

public Builder addReplica() {
assert shards != null;
for (IndexShardRoutingTable.Builder existing : shards) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ public Builder addAsFromDangling(IndexMetadata indexMetadata) {
public Builder addAsFromCloseToOpen(IndexMetadata indexMetadata) {
if (indexMetadata.getState() == IndexMetadata.State.OPEN) {
IndexRoutingTable.Builder indexRoutingBuilder = new IndexRoutingTable.Builder(indexMetadata.getIndex())
.initializeAsFromCloseToOpen(indexMetadata);
.initializeAsFromCloseToOpen(indexMetadata, indicesRouting.get(indexMetadata.getIndex().getName()));
add(indexRoutingBuilder);
}
return this;
Expand All @@ -546,14 +546,15 @@ public Builder addAsFromCloseToOpen(IndexMetadata indexMetadata) {
public Builder addAsFromOpenToClose(IndexMetadata indexMetadata) {
assert isIndexVerifiedBeforeClosed(indexMetadata);
IndexRoutingTable.Builder indexRoutingBuilder = new IndexRoutingTable.Builder(indexMetadata.getIndex())
.initializeAsFromOpenToClose(indexMetadata);
.initializeAsFromOpenToClose(indexMetadata, indicesRouting.get(indexMetadata.getIndex().getName()));
return add(indexRoutingBuilder);
}

public Builder addAsRestore(IndexMetadata indexMetadata, SnapshotRecoverySource recoverySource) {
IndexRoutingTable.Builder indexRoutingBuilder = new IndexRoutingTable.Builder(indexMetadata.getIndex()).initializeAsRestore(
indexMetadata,
recoverySource
recoverySource,
indicesRouting.get(indexMetadata.getIndex().getName())
);
add(indexRoutingBuilder);
return this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,9 @@ public String shortSummary() {
sb.append(", failed_nodes[").append(failedNodeIds).append("]");
}
sb.append(", delayed=").append(delayed);
if (lastAllocatedNodeId != null) {
sb.append(", last_node[").append(lastAllocatedNodeId).append("]");
}
String details = getDetails();

if (details != null) {
Expand Down
Loading