|
29 | 29 | import org.apache.lucene.store.IndexInput;
|
30 | 30 | import org.apache.lucene.store.RateLimiter;
|
31 | 31 | import org.apache.lucene.util.ArrayUtil;
|
32 |
| -import org.elasticsearch.core.internal.io.IOUtils; |
33 | 32 | import org.elasticsearch.ExceptionsHelper;
|
34 | 33 | import org.elasticsearch.Version;
|
35 |
| -import org.elasticsearch.action.support.PlainActionFuture; |
| 34 | +import org.elasticsearch.action.ActionListener; |
36 | 35 | import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
|
37 | 36 | import org.elasticsearch.cluster.routing.ShardRouting;
|
38 | 37 | import org.elasticsearch.common.Nullable;
|
|
44 | 43 | import org.elasticsearch.common.settings.Settings;
|
45 | 44 | import org.elasticsearch.common.unit.ByteSizeValue;
|
46 | 45 | import org.elasticsearch.common.util.CancellableThreads;
|
| 46 | +import org.elasticsearch.common.util.concurrent.FutureUtils; |
| 47 | +import org.elasticsearch.core.internal.io.IOUtils; |
47 | 48 | import org.elasticsearch.core.internal.io.Streams;
|
48 | 49 | import org.elasticsearch.index.engine.Engine;
|
49 | 50 | import org.elasticsearch.index.engine.RecoveryEngineException;
|
|
67 | 68 | import java.util.Comparator;
|
68 | 69 | import java.util.List;
|
69 | 70 | import java.util.Locale;
|
| 71 | +import java.util.concurrent.CompletableFuture; |
70 | 72 | import java.util.concurrent.atomic.AtomicLong;
|
71 | 73 | import java.util.function.Function;
|
72 | 74 | import java.util.function.Supplier;
|
@@ -142,7 +144,7 @@ public RecoveryResponse recoverToTarget() throws IOException {
|
142 | 144 | throw new DelayRecoveryException("source node does not have the shard listed in its state as allocated on the node");
|
143 | 145 | }
|
144 | 146 | assert targetShardRouting.initializing() : "expected recovery target to be initializing but was " + targetShardRouting;
|
145 |
| - }, shardId + " validating recovery target ["+ request.targetAllocationId() + "] registered "); |
| 147 | + }, shardId + " validating recovery target ["+ request.targetAllocationId() + "] registered ", shard, cancellableThreads, logger); |
146 | 148 |
|
147 | 149 | try (Closeable ignored = shard.acquireTranslogRetentionLock()) {
|
148 | 150 | final long startingSeqNo;
|
@@ -196,7 +198,7 @@ public RecoveryResponse recoverToTarget() throws IOException {
|
196 | 198 | * all documents up to maxSeqNo in phase2.
|
197 | 199 | */
|
198 | 200 | runUnderPrimaryPermit(() -> shard.initiateTracking(request.targetAllocationId()),
|
199 |
| - shardId + " initiating tracking of " + request.targetAllocationId()); |
| 201 | + shardId + " initiating tracking of " + request.targetAllocationId(), shard, cancellableThreads, logger); |
200 | 202 |
|
201 | 203 | final long endingSeqNo = shard.seqNoStats().getMaxSeqNo();
|
202 | 204 | /*
|
@@ -227,17 +229,41 @@ private boolean isTargetSameHistory() {
|
227 | 229 | return targetHistoryUUID != null && targetHistoryUUID.equals(shard.getHistoryUUID());
|
228 | 230 | }
|
229 | 231 |
|
230 |
| - private void runUnderPrimaryPermit(CancellableThreads.Interruptable runnable, String reason) { |
| 232 | + static void runUnderPrimaryPermit(CancellableThreads.Interruptable runnable, String reason, |
| 233 | + IndexShard primary, CancellableThreads cancellableThreads, Logger logger) { |
231 | 234 | cancellableThreads.execute(() -> {
|
232 |
| - final PlainActionFuture<Releasable> onAcquired = new PlainActionFuture<>(); |
233 |
| - shard.acquirePrimaryOperationPermit(onAcquired, ThreadPool.Names.SAME, reason); |
234 |
| - try (Releasable ignored = onAcquired.actionGet()) { |
| 235 | + CompletableFuture<Releasable> permit = new CompletableFuture<>(); |
| 236 | + final ActionListener<Releasable> onAcquired = new ActionListener<Releasable>() { |
| 237 | + @Override |
| 238 | + public void onResponse(Releasable releasable) { |
| 239 | + if (permit.complete(releasable) == false) { |
| 240 | + releasable.close(); |
| 241 | + } |
| 242 | + } |
| 243 | + |
| 244 | + @Override |
| 245 | + public void onFailure(Exception e) { |
| 246 | + permit.completeExceptionally(e); |
| 247 | + } |
| 248 | + }; |
| 249 | + primary.acquirePrimaryOperationPermit(onAcquired, ThreadPool.Names.SAME, reason); |
| 250 | + try (Releasable ignored = FutureUtils.get(permit)) { |
235 | 251 | // check that the IndexShard still has the primary authority. This needs to be checked under operation permit to prevent
|
236 | 252 | // races, as IndexShard will switch its authority only when it holds all operation permits, see IndexShard.relocated()
|
237 |
| - if (shard.isPrimaryMode() == false) { |
238 |
| - throw new IndexShardRelocatedException(shard.shardId()); |
| 253 | + if (primary.isPrimaryMode() == false) { |
| 254 | + throw new IndexShardRelocatedException(primary.shardId()); |
239 | 255 | }
|
240 | 256 | runnable.run();
|
| 257 | + } finally { |
| 258 | + // just in case we got an exception (likely interrupted) while waiting for the get |
| 259 | + permit.whenComplete((r, e) -> { |
| 260 | + if (r != null) { |
| 261 | + r.close(); |
| 262 | + } |
| 263 | + if (e != null) { |
| 264 | + logger.trace("suppressing exception on completion (it was already bubbled up or the operation was aborted)", e); |
| 265 | + } |
| 266 | + }); |
241 | 267 | }
|
242 | 268 | });
|
243 | 269 | }
|
@@ -489,11 +515,11 @@ public void finalizeRecovery(final long targetLocalCheckpoint) throws IOExceptio
|
489 | 515 | * the permit then the state of the shard will be relocated and this recovery will fail.
|
490 | 516 | */
|
491 | 517 | runUnderPrimaryPermit(() -> shard.markAllocationIdAsInSync(request.targetAllocationId(), targetLocalCheckpoint),
|
492 |
| - shardId + " marking " + request.targetAllocationId() + " as in sync"); |
| 518 | + shardId + " marking " + request.targetAllocationId() + " as in sync", shard, cancellableThreads, logger); |
493 | 519 | final long globalCheckpoint = shard.getGlobalCheckpoint();
|
494 | 520 | cancellableThreads.executeIO(() -> recoveryTarget.finalizeRecovery(globalCheckpoint));
|
495 | 521 | runUnderPrimaryPermit(() -> shard.updateGlobalCheckpointForShard(request.targetAllocationId(), globalCheckpoint),
|
496 |
| - shardId + " updating " + request.targetAllocationId() + "'s global checkpoint"); |
| 522 | + shardId + " updating " + request.targetAllocationId() + "'s global checkpoint", shard, cancellableThreads, logger); |
497 | 523 |
|
498 | 524 | if (request.isPrimaryRelocation()) {
|
499 | 525 | logger.trace("performing relocation hand-off");
|
|
0 commit comments