Skip to content

Reset starting seqno if fail to read last commit #45106

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -309,25 +309,6 @@ public RecoveryResponse read(StreamInput in) throws IOException {
}
}

/**
* Obtains a snapshot of the store metadata for the recovery target.
*
* @param recoveryTarget the target of the recovery
* @return a snapshot of the store metadata
*/
private static Store.MetadataSnapshot getStoreMetadataSnapshot(final Logger logger, final RecoveryTarget recoveryTarget) {
try {
return recoveryTarget.indexShard().snapshotStoreMetadata();
} catch (final org.apache.lucene.index.IndexNotFoundException e) {
// happens on an empty folder. no need to log
logger.trace("{} shard folder empty, recovering all files", recoveryTarget);
return Store.MetadataSnapshot.EMPTY;
} catch (final IOException e) {
logger.warn("error while listing local files, recovering as if there are none", e);
return Store.MetadataSnapshot.EMPTY;
}
}

/**
* Prepare the start recovery request.
*
Expand All @@ -343,7 +324,24 @@ public static StartRecoveryRequest getStartRecoveryRequest(Logger logger, Discov
final StartRecoveryRequest request;
logger.trace("{} collecting local files for [{}]", recoveryTarget.shardId(), recoveryTarget.sourceNode());

final Store.MetadataSnapshot metadataSnapshot = getStoreMetadataSnapshot(logger, recoveryTarget);
Store.MetadataSnapshot metadataSnapshot;
try {
metadataSnapshot = recoveryTarget.indexShard().snapshotStoreMetadata();
} catch (final org.apache.lucene.index.IndexNotFoundException e) {
// happens on an empty folder. no need to log
assert startingSeqNo == UNASSIGNED_SEQ_NO : startingSeqNo;
logger.trace("{} shard folder empty, recovering all files", recoveryTarget);
metadataSnapshot = Store.MetadataSnapshot.EMPTY;
} catch (final IOException e) {
if (startingSeqNo != UNASSIGNED_SEQ_NO) {
logger.warn(new ParameterizedMessage("error while listing local files, resetting the starting sequence number from {} " +
"to unassigned and recovering as if there are none", startingSeqNo), e);
startingSeqNo = UNASSIGNED_SEQ_NO;
} else {
logger.warn("error while listing local files, recovering as if there are none", e);
}
metadataSnapshot = Store.MetadataSnapshot.EMPTY;
}
logger.trace("{} local file count [{}]", recoveryTarget.shardId(), metadataSnapshot.size());
request = new StartRecoveryRequest(
recoveryTarget.shardId(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,4 +258,24 @@ public void testClosedIndexSkipsLocalRecovery() throws Exception {
assertThat(replica.getLastKnownGlobalCheckpoint(), equalTo(UNASSIGNED_SEQ_NO));
closeShards(replica);
}

public void testResetStartingSeqNoIfLastCommitCorrupted() throws Exception {
IndexShard shard = newStartedShard(false);
populateRandomData(shard);
DiscoveryNode pNode = new DiscoveryNode("foo", buildNewFakeTransportAddress(),
Collections.emptyMap(), Collections.emptySet(), Version.CURRENT);
DiscoveryNode rNode = new DiscoveryNode("foo", buildNewFakeTransportAddress(),
Collections.emptyMap(), Collections.emptySet(), Version.CURRENT);
shard = reinitShard(shard, ShardRoutingHelper.initWithSameId(shard.routingEntry(), RecoverySource.PeerRecoverySource.INSTANCE));
shard.markAsRecovering("peer recovery", new RecoveryState(shard.routingEntry(), pNode, rNode));
shard.prepareForIndexRecovery();
long startingSeqNo = shard.recoverLocallyUpToGlobalCheckpoint();
shard.store().markStoreCorrupted(new IOException("simulated"));
RecoveryTarget recoveryTarget = new RecoveryTarget(shard, null, null);
StartRecoveryRequest request = PeerRecoveryTargetService.getStartRecoveryRequest(logger, rNode, recoveryTarget, startingSeqNo);
assertThat(request.startingSeqNo(), equalTo(UNASSIGNED_SEQ_NO));
assertThat(request.metadataSnapshot().size(), equalTo(0));
recoveryTarget.decRef();
closeShards(shard);
}
}