|
23 | 23 | import com.carrotsearch.hppc.ObjectLongMap;
|
24 | 24 | import org.elasticsearch.Version;
|
25 | 25 | import org.elasticsearch.action.ActionListener;
|
| 26 | +import org.elasticsearch.action.support.GroupedActionListener; |
26 | 27 | import org.elasticsearch.action.support.replication.ReplicationResponse;
|
27 | 28 | import org.elasticsearch.cluster.metadata.IndexMetaData;
|
28 | 29 | import org.elasticsearch.cluster.routing.AllocationId;
|
@@ -201,6 +202,14 @@ public class ReplicationTracker extends AbstractIndexShardComponent implements L
|
201 | 202 | */
|
202 | 203 | private long persistedRetentionLeasesVersion;
|
203 | 204 |
|
| 205 | + /** |
| 206 | + * Whether there should be a peer recovery retention lease (PRRL) for every tracked shard copy. Always true on indices created from |
| 207 | + * {@link Version#V_7_4_0} onwards, because these versions create PRRLs properly. May be false on indices created in an earlier version |
| 208 | + * if we recently did a rolling upgrade and {@link ReplicationTracker#createMissingPeerRecoveryRetentionLeases(ActionListener)} has not |
| 209 | + * yet completed. Is only permitted to change from false to true; can be removed once support for pre-PRRL indices is no longer needed. |
| 210 | + */ |
| 211 | + private boolean hasAllPeerRecoveryRetentionLeases; |
| 212 | + |
204 | 213 | /**
|
205 | 214 | * Get all retention leases tracked on this shard.
|
206 | 215 | *
|
@@ -486,10 +495,10 @@ public synchronized void renewPeerRecoveryRetentionLeases() {
|
486 | 495 | if (retentionLease == null) {
|
487 | 496 | /*
|
488 | 497 | * If this shard copy is tracked then we got here here via a rolling upgrade from an older version that doesn't
|
489 |
| - * create peer recovery retention leases for every shard copy. TODO create leases lazily in that situation. |
| 498 | + * create peer recovery retention leases for every shard copy. |
490 | 499 | */
|
491 | 500 | assert checkpoints.get(shardRouting.allocationId().getId()).tracked == false
|
492 |
| - || indexSettings.getIndexVersionCreated().before(Version.V_7_4_0); |
| 501 | + || hasAllPeerRecoveryRetentionLeases == false; |
493 | 502 | return false;
|
494 | 503 | }
|
495 | 504 | return retentionLease.timestamp() <= renewalTimeMillis
|
@@ -752,7 +761,7 @@ private boolean invariant() {
|
752 | 761 | if (primaryMode
|
753 | 762 | && indexSettings.isSoftDeleteEnabled()
|
754 | 763 | && indexSettings.getIndexMetaData().getState() == IndexMetaData.State.OPEN
|
755 |
| - && indexSettings.getIndexVersionCreated().onOrAfter(Version.V_7_4_0)) { |
| 764 | + && hasAllPeerRecoveryRetentionLeases) { |
756 | 765 | // all tracked shard copies have a corresponding peer-recovery retention lease
|
757 | 766 | for (final ShardRouting shardRouting : routingTable.assignedShards()) {
|
758 | 767 | if (checkpoints.get(shardRouting.allocationId().getId()).tracked) {
|
@@ -819,6 +828,7 @@ public ReplicationTracker(
|
819 | 828 | this.pendingInSync = new HashSet<>();
|
820 | 829 | this.routingTable = null;
|
821 | 830 | this.replicationGroup = null;
|
| 831 | + this.hasAllPeerRecoveryRetentionLeases = indexSettings.getIndexVersionCreated().onOrAfter(Version.V_7_4_0); |
822 | 832 | assert Version.V_EMPTY.equals(indexSettings.getIndexVersionCreated()) == false;
|
823 | 833 | assert invariant();
|
824 | 834 | }
|
@@ -913,30 +923,51 @@ public synchronized void activatePrimaryMode(final long localCheckpoint) {
|
913 | 923 | updateGlobalCheckpointOnPrimary();
|
914 | 924 |
|
915 | 925 | if (indexSettings.isSoftDeleteEnabled()) {
|
| 926 | + addPeerRecoveryRetentionLeaseForSolePrimary(); |
| 927 | + } |
| 928 | + |
| 929 | + assert invariant(); |
| 930 | + } |
| 931 | + |
| 932 | + /** |
| 933 | + * Creates a peer recovery retention lease for this shard, if one does not already exist and this shard is the sole shard copy in the |
| 934 | + * replication group. If one does not already exist and yet there are other shard copies in this group then we must have just done |
| 935 | + * a rolling upgrade from a version before {@link Version#V_7_4_0}, in which case the missing leases should be created asynchronously |
| 936 | + * by the caller using {@link ReplicationTracker#createMissingPeerRecoveryRetentionLeases(ActionListener)}. |
| 937 | + */ |
| 938 | + private void addPeerRecoveryRetentionLeaseForSolePrimary() { |
| 939 | + assert primaryMode; |
| 940 | + assert Thread.holdsLock(this); |
| 941 | + |
| 942 | + if (indexSettings().getIndexMetaData().getState() == IndexMetaData.State.OPEN) { |
916 | 943 | final ShardRouting primaryShard = routingTable.primaryShard();
|
917 | 944 | final String leaseId = getPeerRecoveryRetentionLeaseId(primaryShard);
|
918 | 945 | if (retentionLeases.get(leaseId) == null) {
|
919 |
| - /* |
920 |
| - * We might have got here here via a rolling upgrade from an older version that doesn't create peer recovery retention |
921 |
| - * leases for every shard copy, but in this case we do not expect any leases to exist. |
922 |
| - */ |
923 |
| - if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_7_4_0)) { |
924 |
| - // We are starting up the whole replication group from scratch: if we were not (i.e. this is a replica promotion) then |
925 |
| - // this copy must already be in-sync and active and therefore holds a retention lease for itself. |
926 |
| - assert routingTable.activeShards().equals(Collections.singletonList(primaryShard)) : routingTable.activeShards(); |
| 946 | + if (replicationGroup.getReplicationTargets().equals(Collections.singletonList(primaryShard))) { |
927 | 947 | assert primaryShard.allocationId().getId().equals(shardAllocationId)
|
928 |
| - : routingTable.activeShards() + " vs " + shardAllocationId; |
929 |
| - assert replicationGroup.getReplicationTargets().equals(Collections.singletonList(primaryShard)); |
930 |
| - |
| 948 | + : routingTable.assignedShards() + " vs " + shardAllocationId; |
931 | 949 | // Safe to call innerAddRetentionLease() without a subsequent sync since there are no other members of this replication
|
932 | 950 | // group.
|
| 951 | + logger.trace("addPeerRecoveryRetentionLeaseForSolePrimary: adding lease [{}]", leaseId); |
933 | 952 | innerAddRetentionLease(leaseId, Math.max(0L, checkpoints.get(shardAllocationId).globalCheckpoint + 1),
|
934 | 953 | PEER_RECOVERY_RETENTION_LEASE_SOURCE);
|
| 954 | + hasAllPeerRecoveryRetentionLeases = true; |
| 955 | + } else { |
| 956 | + /* |
| 957 | + * We got here here via a rolling upgrade from an older version that doesn't create peer recovery retention |
| 958 | + * leases for every shard copy, but in this case we do not expect any leases to exist. |
| 959 | + */ |
| 960 | + assert hasAllPeerRecoveryRetentionLeases == false : routingTable + " vs " + retentionLeases; |
| 961 | + logger.debug("{} becoming primary of {} with missing lease: {}", primaryShard, routingTable, retentionLeases); |
935 | 962 | }
|
| 963 | + } else if (hasAllPeerRecoveryRetentionLeases == false && routingTable.assignedShards().stream().allMatch(shardRouting -> |
| 964 | + retentionLeases.contains(getPeerRecoveryRetentionLeaseId(shardRouting)) |
| 965 | + || checkpoints.get(shardRouting.allocationId().getId()).tracked == false)) { |
| 966 | + // Although this index is old enough not to have all the expected peer recovery retention leases, in fact it does, so we |
| 967 | + // don't need to do any more work. |
| 968 | + hasAllPeerRecoveryRetentionLeases = true; |
936 | 969 | }
|
937 | 970 | }
|
938 |
| - |
939 |
| - assert invariant(); |
940 | 971 | }
|
941 | 972 |
|
942 | 973 | /**
|
@@ -1239,9 +1270,54 @@ public synchronized void activateWithPrimaryContext(PrimaryContext primaryContex
|
1239 | 1270 | // note that if there was no cluster state update between start of the engine of this shard and the call to
|
1240 | 1271 | // initializeWithPrimaryContext, we might still have missed a cluster state update. This is best effort.
|
1241 | 1272 | runAfter.run();
|
| 1273 | + |
| 1274 | + if (indexSettings.isSoftDeleteEnabled()) { |
| 1275 | + addPeerRecoveryRetentionLeaseForSolePrimary(); |
| 1276 | + } |
| 1277 | + |
| 1278 | + assert invariant(); |
| 1279 | + } |
| 1280 | + |
| 1281 | + private synchronized void setHasAllPeerRecoveryRetentionLeases() { |
| 1282 | + hasAllPeerRecoveryRetentionLeases = true; |
1242 | 1283 | assert invariant();
|
1243 | 1284 | }
|
1244 | 1285 |
|
| 1286 | + /** |
| 1287 | + * Create any required peer-recovery retention leases that do not currently exist because we just did a rolling upgrade from a version |
| 1288 | + * prior to {@link Version#V_7_4_0} that does not create peer-recovery retention leases. |
| 1289 | + */ |
| 1290 | + public synchronized void createMissingPeerRecoveryRetentionLeases(ActionListener<Void> listener) { |
| 1291 | + if (hasAllPeerRecoveryRetentionLeases == false) { |
| 1292 | + final List<ShardRouting> shardRoutings = routingTable.assignedShards(); |
| 1293 | + final GroupedActionListener<ReplicationResponse> groupedActionListener = new GroupedActionListener<>(ActionListener.wrap(vs -> { |
| 1294 | + setHasAllPeerRecoveryRetentionLeases(); |
| 1295 | + listener.onResponse(null); |
| 1296 | + }, listener::onFailure), shardRoutings.size()); |
| 1297 | + for (ShardRouting shardRouting : shardRoutings) { |
| 1298 | + if (retentionLeases.contains(getPeerRecoveryRetentionLeaseId(shardRouting))) { |
| 1299 | + groupedActionListener.onResponse(null); |
| 1300 | + } else { |
| 1301 | + final CheckpointState checkpointState = checkpoints.get(shardRouting.allocationId().getId()); |
| 1302 | + if (checkpointState.tracked == false) { |
| 1303 | + groupedActionListener.onResponse(null); |
| 1304 | + } else { |
| 1305 | + logger.trace("createMissingPeerRecoveryRetentionLeases: adding missing lease for {}", shardRouting); |
| 1306 | + try { |
| 1307 | + addPeerRecoveryRetentionLease(shardRouting.currentNodeId(), |
| 1308 | + Math.max(SequenceNumbers.NO_OPS_PERFORMED, checkpointState.globalCheckpoint), groupedActionListener); |
| 1309 | + } catch (Exception e) { |
| 1310 | + groupedActionListener.onFailure(e); |
| 1311 | + } |
| 1312 | + } |
| 1313 | + } |
| 1314 | + } |
| 1315 | + } else { |
| 1316 | + logger.trace("createMissingPeerRecoveryRetentionLeases: nothing to do"); |
| 1317 | + listener.onResponse(null); |
| 1318 | + } |
| 1319 | + } |
| 1320 | + |
1245 | 1321 | private Runnable getMasterUpdateOperationFromCurrentState() {
|
1246 | 1322 | assert primaryMode == false;
|
1247 | 1323 | final long lastAppliedClusterStateVersion = appliedClusterStateVersion;
|
|
0 commit comments