31
31
import org .apache .lucene .util .ArrayUtil ;
32
32
import org .elasticsearch .ExceptionsHelper ;
33
33
import org .elasticsearch .action .ActionListener ;
34
- import org .elasticsearch .action .ActionRunnable ;
35
34
import org .elasticsearch .action .StepListener ;
36
35
import org .elasticsearch .cluster .routing .IndexShardRoutingTable ;
37
36
import org .elasticsearch .cluster .routing .ShardRouting ;
47
46
import org .elasticsearch .common .unit .ByteSizeValue ;
48
47
import org .elasticsearch .common .unit .TimeValue ;
49
48
import org .elasticsearch .common .util .CancellableThreads ;
50
- import org .elasticsearch .common .util .concurrent .ConcurrentCollections ;
49
+ import org .elasticsearch .common .util .concurrent .AsyncIOProcessor ;
51
50
import org .elasticsearch .common .util .concurrent .FutureUtils ;
52
51
import org .elasticsearch .core .internal .io .IOUtils ;
53
52
import org .elasticsearch .index .engine .Engine ;
68
67
69
68
import java .io .Closeable ;
70
69
import java .io .IOException ;
70
+ import java .util .ArrayDeque ;
71
71
import java .util .ArrayList ;
72
72
import java .util .Arrays ;
73
73
import java .util .Collections ;
79
79
import java .util .Objects ;
80
80
import java .util .concurrent .CompletableFuture ;
81
81
import java .util .concurrent .CopyOnWriteArrayList ;
82
- import java .util .concurrent .Executor ;
83
- import java .util .concurrent .Semaphore ;
82
+ import java .util .concurrent .atomic .AtomicBoolean ;
84
83
import java .util .concurrent .atomic .AtomicInteger ;
85
- import java .util .concurrent .atomic .AtomicReference ;
86
84
import java .util .function .Consumer ;
87
85
import java .util .function .IntSupplier ;
88
86
import java .util .stream .StreamSupport ;
@@ -113,12 +111,10 @@ public class RecoverySourceHandler {
113
111
private final int maxConcurrentFileChunks ;
114
112
private final CancellableThreads cancellableThreads = new CancellableThreads ();
115
113
private final List <Closeable > resources = new CopyOnWriteArrayList <>();
116
- private final Executor sendFileExecutor ;
117
114
118
115
public RecoverySourceHandler (IndexShard shard , RecoveryTargetHandler recoveryTarget , StartRecoveryRequest request ,
119
- Executor sendFileExecutor , int fileChunkSizeInBytes , int maxConcurrentFileChunks ) {
116
+ int fileChunkSizeInBytes , int maxConcurrentFileChunks ) {
120
117
this .shard = shard ;
121
- this .sendFileExecutor = sendFileExecutor ;
122
118
this .recoveryTarget = recoveryTarget ;
123
119
this .request = request ;
124
120
this .shardId = this .request .shardId ().id ();
@@ -701,77 +697,81 @@ public String toString() {
701
697
* one of the networking threads which receive/handle the acknowledgments of the current pending file chunk requests. This process will
702
698
* continue until all chunks are sent and acknowledged.
703
699
*/
704
- private class MultiFileSender extends ActionRunnable < Void > implements Closeable {
700
+ private class MultiFileSender extends AsyncIOProcessor < FileChunkResponse > implements Closeable {
705
701
private final Store store ;
706
702
private final IntSupplier translogOps ;
703
+ private final AtomicBoolean done = new AtomicBoolean (false );
704
+ private final ActionListener <Void > listener ;
707
705
private final LocalCheckpointTracker requestSeqIdTracker = new LocalCheckpointTracker (NO_OPS_PERFORMED , NO_OPS_PERFORMED );
708
- private final Semaphore semaphore = new Semaphore (0 );
709
706
private final Iterator <StoreFileMetaData > remainingFiles ;
710
707
private StoreFileMetaData currentFile ;
711
708
private InputStreamIndexInput currentInput = null ;
712
709
private long currentChunkPosition = 0 ;
713
- private final Deque <byte []> recycledBuffers = ConcurrentCollections . newDeque ();
714
- private final AtomicReference < Tuple < StoreFileMetaData , Exception >> error = new AtomicReference <>( );
710
+ private final Deque <byte []> recycledBuffers = new ArrayDeque <> ();
711
+ private final FileChunkResponse INITIAL_RESPONSE = new FileChunkResponse ( SequenceNumbers . UNASSIGNED_SEQ_NO , null , null );
715
712
716
713
MultiFileSender (Store store , IntSupplier translogOps , StoreFileMetaData [] files , ActionListener <Void > listener ) {
717
- super (ActionListener . notifyOnce ( listener ));
714
+ super (logger , maxConcurrentFileChunks * 2 , shard . getThreadPool (). getThreadContext ( ));
718
715
this .store = store ;
719
716
this .translogOps = translogOps ;
720
717
this .remainingFiles = Arrays .asList (files ).iterator ();
718
+ this .listener = ActionListener .wrap (
719
+ r -> {
720
+ if (done .compareAndSet (false , true )) {
721
+ listener .onResponse (r );
722
+ }
723
+ },
724
+ e -> {
725
+ if (done .compareAndSet (false , true )) {
726
+ listener .onFailure (e );
727
+ }
728
+ });
729
+ }
730
+
731
+ void start () {
732
+ put (INITIAL_RESPONSE , e -> {});
721
733
}
722
734
723
735
@ Override
724
- protected void doRun () throws Exception {
725
- assert ThreadPool .assertCurrentMethodIsNotCalledRecursively ();
726
- assert Transports .assertNotTransportThread (RecoverySourceHandler .this + "[send file chunk]" );
727
- while (true ) {
728
- assert semaphore .availablePermits () == 0 ;
729
- cancellableThreads .checkForCancel ();
730
- if (canSendMore () == false ) {
731
- semaphore .release ();
732
- // Here we have to retry before abort to avoid a race situation where the other threads have flipped `canSendMore`
733
- // condition but they are not going to resume the sending process because this thread still holds the semaphore.
734
- final boolean changed = canSendMore () || error .get () != null ;
735
- if (changed == false || semaphore .tryAcquire () == false ) {
736
- break ;
736
+ protected void write (List <Tuple <FileChunkResponse , Consumer <Exception >>> responses ) {
737
+ assert Transports .assertNotTransportThread (RecoverySourceHandler .this + "[send file chunks]" );
738
+ if (done .get ()) {
739
+ return ;
740
+ }
741
+ try {
742
+ for (Tuple <FileChunkResponse , Consumer <Exception >> response : responses ) {
743
+ if (response .v1 () == INITIAL_RESPONSE ) {
744
+ continue ; // not an actual response, a marker to initialize the sending process.
745
+ }
746
+ requestSeqIdTracker .markSeqNoAsProcessed (response .v1 ().seqNo );
747
+ response .v1 ().chunk .close ();
748
+ if (response .v1 ().failure != null ) {
749
+ handleErrorOnSendFiles (store , response .v1 ().failure , new StoreFileMetaData []{response .v1 ().chunk .md });
750
+ throw response .v1 ().failure ;
737
751
}
738
752
}
739
- if (error .get () != null ) {
740
- handleErrorOnSendFiles (store , error .get ().v2 (), new StoreFileMetaData []{error .get ().v1 ()});
741
- throw error .get ().v2 ();
742
- }
743
- final FileChunk chunk = readNextChunk ();
744
- if (chunk == null ) {
745
- semaphore .release (); // allow other threads respond if we are not done yet.
746
- if (requestSeqIdTracker .getMaxSeqNo () == requestSeqIdTracker .getProcessedCheckpoint () && semaphore .tryAcquire ()) {
747
- listener .onResponse (null );
753
+ while (requestSeqIdTracker .getMaxSeqNo () - requestSeqIdTracker .getProcessedCheckpoint () < maxConcurrentFileChunks ) {
754
+ cancellableThreads .checkForCancel ();
755
+ final FileChunk chunk = readNextChunk ();
756
+ if (chunk == null ) {
757
+ if (requestSeqIdTracker .getProcessedCheckpoint () == requestSeqIdTracker .getMaxSeqNo ()) {
758
+ listener .onResponse (null );
759
+ }
760
+ return ;
748
761
}
749
- break ;
762
+ final long requestSeqId = requestSeqIdTracker .generateSeqNo ();
763
+ cancellableThreads .execute (() -> recoveryTarget .writeFileChunk (chunk .md , chunk .position , chunk .content , chunk .lastChunk ,
764
+ translogOps .getAsInt (), ActionListener .wrap (
765
+ r -> this .put (new FileChunkResponse (requestSeqId , chunk , null ), ignored -> {}),
766
+ e -> this .put (new FileChunkResponse (requestSeqId , chunk , e ), ignored -> {})
767
+ )));
750
768
}
751
- final long requestSeqId = requestSeqIdTracker .generateSeqNo ();
752
- cancellableThreads .execute (() ->
753
- recoveryTarget .writeFileChunk (chunk .md , chunk .position , chunk .content , chunk .lastChunk , translogOps .getAsInt (),
754
- ActionListener .wrap (
755
- r -> {
756
- chunk .close (); // release the buffer so we can reuse to reduce allocation
757
- requestSeqIdTracker .markSeqNoAsProcessed (requestSeqId );
758
- if (canSendMore () && semaphore .tryAcquire ()) {
759
- sendFileExecutor .execute (this ); // fork off from the network thread
760
- }
761
- },
762
- e -> {
763
- if (error .compareAndSet (null , Tuple .tuple (chunk .md , e )) && semaphore .tryAcquire ()) {
764
- // have to fork as handleErrorOnSendFiles can read file which should not happen on the network thread.
765
- sendFileExecutor .execute (this );
766
- }
767
- })
768
- )
769
- );
769
+ } catch (Exception e ) {
770
+ listener .onFailure (e );
770
771
}
771
772
}
772
773
773
- FileChunk readNextChunk () throws Exception {
774
- assert semaphore .availablePermits () == 0 ;
774
+ private FileChunk readNextChunk () throws Exception {
775
775
try {
776
776
if (currentInput == null ) {
777
777
if (remainingFiles .hasNext () == false ) {
@@ -808,13 +808,8 @@ public void close() throws IOException {
808
808
}
809
809
}
810
810
811
- boolean canSendMore () {
812
- return requestSeqIdTracker .getMaxSeqNo () - requestSeqIdTracker .getProcessedCheckpoint () < maxConcurrentFileChunks ;
813
- }
814
-
815
811
@ Override
816
812
public void close () throws IOException {
817
- assert semaphore .availablePermits () == 0 ;
818
813
IOUtils .close (recycledBuffers ::clear , currentInput , () -> currentInput = null );
819
814
}
820
815
}
@@ -840,6 +835,18 @@ public void close() {
840
835
}
841
836
}
842
837
838
+ private static class FileChunkResponse {
839
+ final long seqNo ;
840
+ final FileChunk chunk ;
841
+ final Exception failure ;
842
+
843
+ FileChunkResponse (long seqNo , FileChunk chunk , Exception failure ) {
844
+ this .seqNo = seqNo ;
845
+ this .chunk = chunk ;
846
+ this .failure = failure ;
847
+ }
848
+ }
849
+
843
850
void sendFiles (Store store , StoreFileMetaData [] files , IntSupplier translogOps , ActionListener <Void > listener ) {
844
851
ArrayUtil .timSort (files , Comparator .comparingLong (StoreFileMetaData ::length )); // send smallest first
845
852
StepListener <Void > wrappedListener = new StepListener <>();
@@ -852,7 +859,7 @@ void sendFiles(Store store, StoreFileMetaData[] files, IntSupplier translogOps,
852
859
listener .onFailure (e );
853
860
});
854
861
resources .add (multiFileSender );
855
- multiFileSender .run ();
862
+ multiFileSender .start ();
856
863
}
857
864
858
865
private void cleanFiles (Store store , Store .MetadataSnapshot sourceMetadata , IntSupplier translogOps ,
0 commit comments