70
70
import java .io .Closeable ;
71
71
import java .io .IOException ;
72
72
import java .io .InputStream ;
73
- import java .util .ArrayDeque ;
74
73
import java .util .ArrayList ;
75
74
import java .util .Collections ;
76
- import java .util .Comparator ;
77
75
import java .util .HashMap ;
78
76
import java .util .List ;
79
77
import java .util .Map ;
@@ -375,16 +373,6 @@ void restoreFiles() throws IOException {
375
373
restore (snapshotFiles );
376
374
}
377
375
378
- private static class FileSession {
379
- FileSession (long lastTrackedSeqNo , long lastOffset ) {
380
- this .lastTrackedSeqNo = lastTrackedSeqNo ;
381
- this .lastOffset = lastOffset ;
382
- }
383
-
384
- final long lastTrackedSeqNo ;
385
- final long lastOffset ;
386
- }
387
-
388
376
@ Override
389
377
protected void restoreFiles (List <FileInfo > filesToRecover , Store store ) throws IOException {
390
378
logger .trace ("[{}] starting CCR restore of {} files" , shardId , filesToRecover );
@@ -393,116 +381,61 @@ protected void restoreFiles(List<FileInfo> filesToRecover, Store store) throws I
393
381
final LocalCheckpointTracker requestSeqIdTracker = new LocalCheckpointTracker (NO_OPS_PERFORMED , NO_OPS_PERFORMED );
394
382
final AtomicReference <Tuple <StoreFileMetaData , Exception >> error = new AtomicReference <>();
395
383
396
- final ArrayDeque <FileInfo > remainingFiles = new ArrayDeque <>(filesToRecover );
397
- final Map <FileInfo , FileSession > inFlightRequests = new HashMap <>();
398
- final Object mutex = new Object ();
399
-
400
- while (true ) {
401
- if (error .get () != null ) {
402
- break ;
403
- }
404
- final FileInfo fileToRecover ;
405
- final FileSession prevFileSession ;
406
- synchronized (mutex ) {
407
- if (inFlightRequests .isEmpty () && remainingFiles .isEmpty ()) {
408
- break ;
409
- }
410
- final long maxConcurrentFileChunks = ccrSettings .getMaxConcurrentFileChunks ();
411
- if (remainingFiles .isEmpty () == false && inFlightRequests .size () < maxConcurrentFileChunks ) {
412
- for (int i = 0 ; i < maxConcurrentFileChunks ; i ++) {
413
- if (remainingFiles .isEmpty ()) {
414
- break ;
415
- }
416
- inFlightRequests .put (remainingFiles .pop (), new FileSession (NO_OPS_PERFORMED , 0 ));
417
- }
418
- }
419
- final Map .Entry <FileInfo , FileSession > minEntry =
420
- inFlightRequests .entrySet ().stream ().min (Comparator .comparingLong (e -> e .getValue ().lastTrackedSeqNo )).get ();
421
- prevFileSession = minEntry .getValue ();
422
- fileToRecover = minEntry .getKey ();
423
- }
424
- try {
425
- requestSeqIdTracker .waitForOpsToComplete (prevFileSession .lastTrackedSeqNo );
426
- final FileSession fileSession ;
427
- synchronized (mutex ) {
428
- fileSession = inFlightRequests .get (fileToRecover );
429
- // if file has been removed in the mean-while, it means that restore of this file completed, so start working
430
- // on the next one
431
- if (fileSession == null ) {
432
- continue ;
433
- }
434
- }
384
+ for (FileInfo fileInfo : filesToRecover ) {
385
+ final long fileLength = fileInfo .length ();
386
+ long offset = 0 ;
387
+ while (offset < fileLength && error .get () == null ) {
435
388
final long requestSeqId = requestSeqIdTracker .generateSeqNo ();
436
389
try {
437
- synchronized (mutex ) {
438
- inFlightRequests .put (fileToRecover , new FileSession (requestSeqId , fileSession .lastOffset ));
390
+ requestSeqIdTracker .waitForOpsToComplete (requestSeqId - ccrSettings .getMaxConcurrentFileChunks ());
391
+
392
+ if (error .get () != null ) {
393
+ requestSeqIdTracker .markSeqNoAsCompleted (requestSeqId );
394
+ break ;
439
395
}
440
- final int bytesRequested = Math .toIntExact (Math .min (ccrSettings .getChunkSize ().getBytes (),
441
- fileToRecover .length () - fileSession .lastOffset ));
396
+
397
+ final int bytesRequested = Math .toIntExact (
398
+ Math .min (ccrSettings .getChunkSize ().getBytes (), fileLength - offset ));
399
+ offset += bytesRequested ;
400
+
442
401
final GetCcrRestoreFileChunkRequest request =
443
- new GetCcrRestoreFileChunkRequest (node , sessionUUID , fileToRecover .name (), bytesRequested );
402
+ new GetCcrRestoreFileChunkRequest (node , sessionUUID , fileInfo .name (), bytesRequested );
444
403
logger .trace ("[{}] [{}] fetching chunk for file [{}], expected offset: {}, size: {}" , shardId , snapshotId ,
445
- fileToRecover .name (), fileSession . lastOffset , bytesRequested );
404
+ fileInfo .name (), offset , bytesRequested );
446
405
447
406
remoteClient .execute (GetCcrRestoreFileChunkAction .INSTANCE , request ,
448
407
ActionListener .wrap (
449
408
r -> threadPool .generic ().execute (new AbstractRunnable () {
450
409
@ Override
451
410
public void onFailure (Exception e ) {
452
- error .compareAndSet (null , Tuple .tuple (fileToRecover .metadata (), e ));
411
+ error .compareAndSet (null , Tuple .tuple (fileInfo .metadata (), e ));
453
412
requestSeqIdTracker .markSeqNoAsCompleted (requestSeqId );
454
413
}
455
414
456
415
@ Override
457
416
protected void doRun () throws Exception {
458
417
final int actualChunkSize = r .getChunk ().length ();
459
418
logger .trace ("[{}] [{}] got response for file [{}], offset: {}, length: {}" , shardId ,
460
- snapshotId , fileToRecover .name (), r .getOffset (), actualChunkSize );
419
+ snapshotId , fileInfo .name (), r .getOffset (), actualChunkSize );
461
420
final long nanosPaused = ccrSettings .getRateLimiter ().maybePause (actualChunkSize );
462
421
throttleListener .accept (nanosPaused );
463
- final long newOffset = r .getOffset () + actualChunkSize ;
464
-
465
- assert r .getOffset () == fileSession .lastOffset ;
466
- assert actualChunkSize == bytesRequested ;
467
- assert newOffset <= fileToRecover .length ();
468
- final boolean lastChunk = newOffset >= fileToRecover .length ();
469
- multiFileWriter .writeFileChunk (fileToRecover .metadata (), r .getOffset (), r .getChunk (),
470
- lastChunk );
471
- if (lastChunk ) {
472
- synchronized (mutex ) {
473
- final FileSession removed = inFlightRequests .remove (fileToRecover );
474
- assert removed != null : "session disappeared for " + fileToRecover .name ();
475
- assert removed .lastTrackedSeqNo == requestSeqId ;
476
- assert removed .lastOffset == fileSession .lastOffset ;
477
- }
478
- } else {
479
- synchronized (mutex ) {
480
- final FileSession replaced = inFlightRequests .replace (fileToRecover ,
481
- new FileSession (requestSeqId , newOffset ));
482
- assert replaced != null : "session disappeared for " + fileToRecover .name ();
483
- assert replaced .lastTrackedSeqNo == requestSeqId ;
484
- assert replaced .lastOffset == fileSession .lastOffset ;
485
- }
486
- }
422
+ final boolean lastChunk = r .getOffset () + actualChunkSize >= fileLength ;
423
+ multiFileWriter .writeFileChunk (fileInfo .metadata (), r .getOffset (), r .getChunk (), lastChunk );
487
424
requestSeqIdTracker .markSeqNoAsCompleted (requestSeqId );
488
425
}
489
426
}),
490
427
e -> {
491
- error .compareAndSet (null , Tuple .tuple (fileToRecover .metadata (), e ));
428
+ error .compareAndSet (null , Tuple .tuple (fileInfo .metadata (), e ));
492
429
requestSeqIdTracker .markSeqNoAsCompleted (requestSeqId );
493
430
}
494
431
));
495
432
} catch (Exception e ) {
496
- error .compareAndSet (null , Tuple .tuple (fileToRecover .metadata (), e ));
433
+ error .compareAndSet (null , Tuple .tuple (fileInfo .metadata (), e ));
497
434
requestSeqIdTracker .markSeqNoAsCompleted (requestSeqId );
498
- throw e ;
499
435
}
500
- } catch (Exception e ) {
501
- error .compareAndSet (null , Tuple .tuple (fileToRecover .metadata (), e ));
502
- break ;
503
436
}
504
-
505
437
}
438
+
506
439
try {
507
440
requestSeqIdTracker .waitForOpsToComplete (requestSeqIdTracker .getMaxSeqNo ());
508
441
} catch (InterruptedException e ) {
0 commit comments