|
37 | 37 | import org.elasticsearch.test.IndexSettingsModule;
|
38 | 38 |
|
39 | 39 | import java.io.IOException;
|
| 40 | +import java.util.ArrayList; |
40 | 41 | import java.util.Arrays;
|
41 | 42 | import java.util.Collection;
|
42 | 43 | import java.util.Collections;
|
|
50 | 51 | import java.util.concurrent.atomic.AtomicBoolean;
|
51 | 52 | import java.util.concurrent.atomic.AtomicLong;
|
52 | 53 | import java.util.function.BiConsumer;
|
| 54 | +import java.util.function.Consumer; |
53 | 55 | import java.util.function.Function;
|
54 | 56 | import java.util.function.LongConsumer;
|
55 | 57 | import java.util.stream.Collectors;
|
|
61 | 63 | import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO;
|
62 | 64 | import static org.hamcrest.Matchers.equalTo;
|
63 | 65 | import static org.hamcrest.Matchers.greaterThan;
|
| 66 | +import static org.hamcrest.Matchers.greaterThanOrEqualTo; |
| 67 | +import static org.hamcrest.Matchers.hasItem; |
| 68 | +import static org.hamcrest.Matchers.lessThanOrEqualTo; |
64 | 69 | import static org.hamcrest.Matchers.not;
|
65 | 70 |
|
66 | 71 | public class ReplicationTrackerTests extends ReplicationTrackerTestCase {
|
@@ -975,4 +980,158 @@ private static void addPeerRecoveryRetentionLease(final ReplicationTracker track
|
975 | 980 | addPeerRecoveryRetentionLease(tracker, AllocationId.newInitializing(allocationId));
|
976 | 981 | }
|
977 | 982 |
|
| 983 | + public void testPeerRecoveryRetentionLeaseCreationAndRenewal() { |
| 984 | + |
| 985 | + final int numberOfActiveAllocationsIds = randomIntBetween(1, 8); |
| 986 | + final int numberOfInitializingIds = randomIntBetween(0, 8); |
| 987 | + final Tuple<Set<AllocationId>, Set<AllocationId>> activeAndInitializingAllocationIds = |
| 988 | + randomActiveAndInitializingAllocationIds(numberOfActiveAllocationsIds, numberOfInitializingIds); |
| 989 | + final Set<AllocationId> activeAllocationIds = activeAndInitializingAllocationIds.v1(); |
| 990 | + final Set<AllocationId> initializingAllocationIds = activeAndInitializingAllocationIds.v2(); |
| 991 | + |
| 992 | + final AllocationId primaryId = activeAllocationIds.iterator().next(); |
| 993 | + |
| 994 | + final long initialClusterStateVersion = randomNonNegativeLong(); |
| 995 | + |
| 996 | + final AtomicLong currentTimeMillis = new AtomicLong(0L); |
| 997 | + final ReplicationTracker tracker = newTracker(primaryId, updatedGlobalCheckpoint::set, currentTimeMillis::get); |
| 998 | + |
| 999 | + final long retentionLeaseExpiryTimeMillis = tracker.indexSettings().getRetentionLeaseMillis(); |
| 1000 | + final long peerRecoveryRetentionLeaseRenewalTimeMillis = retentionLeaseExpiryTimeMillis / 2; |
| 1001 | + |
| 1002 | + final long maximumTestTimeMillis = 13 * retentionLeaseExpiryTimeMillis; |
| 1003 | + final long testStartTimeMillis = randomLongBetween(0L, Long.MAX_VALUE - maximumTestTimeMillis); |
| 1004 | + currentTimeMillis.set(testStartTimeMillis); |
| 1005 | + |
| 1006 | + final Function<AllocationId, RetentionLease> retentionLeaseFromAllocationId = allocationId |
| 1007 | + -> new RetentionLease(ReplicationTracker.getPeerRecoveryRetentionLeaseId(nodeIdFromAllocationId(allocationId)), |
| 1008 | + 0L, currentTimeMillis.get(), ReplicationTracker.PEER_RECOVERY_RETENTION_LEASE_SOURCE); |
| 1009 | + |
| 1010 | + final List<RetentionLease> initialLeases = new ArrayList<>(); |
| 1011 | + if (randomBoolean()) { |
| 1012 | + initialLeases.add(retentionLeaseFromAllocationId.apply(primaryId)); |
| 1013 | + } |
| 1014 | + for (final AllocationId replicaId : initializingAllocationIds) { |
| 1015 | + if (randomBoolean()) { |
| 1016 | + initialLeases.add(retentionLeaseFromAllocationId.apply(replicaId)); |
| 1017 | + } |
| 1018 | + } |
| 1019 | + for (int i = randomIntBetween(0, 5); i > 0; i--) { |
| 1020 | + initialLeases.add(retentionLeaseFromAllocationId.apply(AllocationId.newInitializing())); |
| 1021 | + } |
| 1022 | + tracker.updateRetentionLeasesOnReplica(new RetentionLeases(randomNonNegativeLong(), randomNonNegativeLong(), initialLeases)); |
| 1023 | + |
| 1024 | + IndexShardRoutingTable routingTable = routingTable(initializingAllocationIds, primaryId); |
| 1025 | + tracker.updateFromMaster(initialClusterStateVersion, ids(activeAllocationIds), routingTable); |
| 1026 | + tracker.activatePrimaryMode(NO_OPS_PERFORMED); |
| 1027 | + assertTrue("primary's retention lease should exist", |
| 1028 | + tracker.getRetentionLeases().contains(ReplicationTracker.getPeerRecoveryRetentionLeaseId(routingTable.primaryShard()))); |
| 1029 | + |
| 1030 | + final Consumer<Runnable> assertAsTimePasses = assertion -> { |
| 1031 | + final long startTime = currentTimeMillis.get(); |
| 1032 | + while (currentTimeMillis.get() < startTime + retentionLeaseExpiryTimeMillis * 2) { |
| 1033 | + currentTimeMillis.addAndGet(randomLongBetween(0L, retentionLeaseExpiryTimeMillis * 2)); |
| 1034 | + tracker.renewPeerRecoveryRetentionLeases(); |
| 1035 | + tracker.getRetentionLeases(true); |
| 1036 | + assertion.run(); |
| 1037 | + } |
| 1038 | + }; |
| 1039 | + |
| 1040 | + assertAsTimePasses.accept(() -> { |
| 1041 | + // Leases for assigned replicas do not expire |
| 1042 | + final RetentionLeases retentionLeases = tracker.getRetentionLeases(); |
| 1043 | + for (final AllocationId replicaId : initializingAllocationIds) { |
| 1044 | + final String leaseId = retentionLeaseFromAllocationId.apply(replicaId).id(); |
| 1045 | + assertTrue("should not have removed lease for " + replicaId + " in " + retentionLeases, |
| 1046 | + initialLeases.stream().noneMatch(l -> l.id().equals(leaseId)) || retentionLeases.contains(leaseId)); |
| 1047 | + } |
| 1048 | + }); |
| 1049 | + |
| 1050 | + // Leases that don't correspond to assigned replicas, however, are expired by this time. |
| 1051 | + final Set<String> expectedLeaseIds = Stream.concat(Stream.of(primaryId), initializingAllocationIds.stream()) |
| 1052 | + .map(allocationId -> retentionLeaseFromAllocationId.apply(allocationId).id()).collect(Collectors.toSet()); |
| 1053 | + for (final RetentionLease retentionLease : tracker.getRetentionLeases().leases()) { |
| 1054 | + assertThat(expectedLeaseIds, hasItem(retentionLease.id())); |
| 1055 | + } |
| 1056 | + |
| 1057 | + for (AllocationId replicaId : initializingAllocationIds) { |
| 1058 | + markAsTrackingAndInSyncQuietly(tracker, replicaId.getId(), NO_OPS_PERFORMED); |
| 1059 | + } |
| 1060 | + |
| 1061 | + assertThat(tracker.getRetentionLeases().leases().stream().map(RetentionLease::id).collect(Collectors.toSet()), |
| 1062 | + equalTo(expectedLeaseIds)); |
| 1063 | + |
| 1064 | + assertAsTimePasses.accept(() -> { |
| 1065 | + // Leases still don't expire |
| 1066 | + assertThat(tracker.getRetentionLeases().leases().stream().map(RetentionLease::id).collect(Collectors.toSet()), |
| 1067 | + equalTo(expectedLeaseIds)); |
| 1068 | + |
| 1069 | + // Also leases are renewed before reaching half the expiry time |
| 1070 | + //noinspection OptionalGetWithoutIsPresent |
| 1071 | + assertThat(tracker.getRetentionLeases() + " renewed before too long", |
| 1072 | + tracker.getRetentionLeases().leases().stream().mapToLong(RetentionLease::timestamp).min().getAsLong(), |
| 1073 | + greaterThanOrEqualTo(currentTimeMillis.get() - peerRecoveryRetentionLeaseRenewalTimeMillis)); |
| 1074 | + }); |
| 1075 | + |
| 1076 | + IndexShardRoutingTable.Builder routingTableBuilder = new IndexShardRoutingTable.Builder(routingTable); |
| 1077 | + for (ShardRouting replicaShard : routingTable.replicaShards()) { |
| 1078 | + routingTableBuilder.removeShard(replicaShard); |
| 1079 | + routingTableBuilder.addShard(replicaShard.moveToStarted()); |
| 1080 | + } |
| 1081 | + routingTable = routingTableBuilder.build(); |
| 1082 | + activeAllocationIds.addAll(initializingAllocationIds); |
| 1083 | + |
| 1084 | + tracker.updateFromMaster(initialClusterStateVersion + randomLongBetween(1, 10), ids(activeAllocationIds), routingTable); |
| 1085 | + |
| 1086 | + assertAsTimePasses.accept(() -> { |
| 1087 | + // Leases still don't expire |
| 1088 | + assertThat(tracker.getRetentionLeases().leases().stream().map(RetentionLease::id).collect(Collectors.toSet()), |
| 1089 | + equalTo(expectedLeaseIds)); |
| 1090 | + // ... and any extra peer recovery retention leases are expired immediately since the shard is fully active |
| 1091 | + tracker.addPeerRecoveryRetentionLease(randomAlphaOfLength(10), randomNonNegativeLong(), ActionListener.wrap(() -> {})); |
| 1092 | + }); |
| 1093 | + |
| 1094 | + tracker.renewPeerRecoveryRetentionLeases(); |
| 1095 | + assertTrue("expired extra lease", tracker.getRetentionLeases(true).v1()); |
| 1096 | + |
| 1097 | + final AllocationId advancingAllocationId |
| 1098 | + = initializingAllocationIds.isEmpty() || rarely() ? primaryId : randomFrom(initializingAllocationIds); |
| 1099 | + final String advancingLeaseId = retentionLeaseFromAllocationId.apply(advancingAllocationId).id(); |
| 1100 | + |
| 1101 | + final long initialGlobalCheckpoint |
| 1102 | + = Math.max(NO_OPS_PERFORMED, tracker.getTrackedLocalCheckpointForShard(advancingAllocationId.getId()).globalCheckpoint); |
| 1103 | + assertThat(tracker.getRetentionLeases().get(advancingLeaseId).retainingSequenceNumber(), equalTo(initialGlobalCheckpoint + 1)); |
| 1104 | + final long newGlobalCheckpoint = initialGlobalCheckpoint + randomLongBetween(1, 1000); |
| 1105 | + tracker.updateGlobalCheckpointForShard(advancingAllocationId.getId(), newGlobalCheckpoint); |
| 1106 | + tracker.renewPeerRecoveryRetentionLeases(); |
| 1107 | + assertThat("lease was renewed because the shard advanced its global checkpoint", |
| 1108 | + tracker.getRetentionLeases().get(advancingLeaseId).retainingSequenceNumber(), equalTo(newGlobalCheckpoint + 1)); |
| 1109 | + |
| 1110 | + final long initialVersion = tracker.getRetentionLeases().version(); |
| 1111 | + tracker.renewPeerRecoveryRetentionLeases(); |
| 1112 | + assertThat("immediate renewal is a no-op", tracker.getRetentionLeases().version(), equalTo(initialVersion)); |
| 1113 | + |
| 1114 | + //noinspection OptionalGetWithoutIsPresent |
| 1115 | + final long millisUntilFirstRenewal |
| 1116 | + = tracker.getRetentionLeases().leases().stream().mapToLong(RetentionLease::timestamp).min().getAsLong() |
| 1117 | + + peerRecoveryRetentionLeaseRenewalTimeMillis |
| 1118 | + - currentTimeMillis.get(); |
| 1119 | + |
| 1120 | + if (millisUntilFirstRenewal != 0) { |
| 1121 | + final long shorterThanRenewalTime = randomLongBetween(0L, millisUntilFirstRenewal - 1); |
| 1122 | + currentTimeMillis.addAndGet(shorterThanRenewalTime); |
| 1123 | + tracker.renewPeerRecoveryRetentionLeases(); |
| 1124 | + assertThat("renewal is a no-op after a short time", tracker.getRetentionLeases().version(), equalTo(initialVersion)); |
| 1125 | + currentTimeMillis.addAndGet(millisUntilFirstRenewal - shorterThanRenewalTime); |
| 1126 | + } |
| 1127 | + |
| 1128 | + tracker.renewPeerRecoveryRetentionLeases(); |
| 1129 | + assertThat("renewal happens after a sufficiently long time", tracker.getRetentionLeases().version(), greaterThan(initialVersion)); |
| 1130 | + assertTrue("all leases were renewed", |
| 1131 | + tracker.getRetentionLeases().leases().stream().allMatch(l -> l.timestamp() == currentTimeMillis.get())); |
| 1132 | + |
| 1133 | + assertThat("test ran for too long, potentially leading to overflow", |
| 1134 | + currentTimeMillis.get(), lessThanOrEqualTo(testStartTimeMillis + maximumTestTimeMillis)); |
| 1135 | + } |
| 1136 | + |
978 | 1137 | }
|
0 commit comments