70
70
import java .io .Closeable ;
71
71
import java .io .IOException ;
72
72
import java .io .InputStream ;
73
- import java .util .ArrayDeque ;
74
73
import java .util .ArrayList ;
75
74
import java .util .Collections ;
76
- import java .util .Comparator ;
77
75
import java .util .HashMap ;
78
76
import java .util .List ;
79
77
import java .util .Map ;
@@ -381,16 +379,6 @@ void restoreFiles() throws IOException {
381
379
restore (snapshotFiles );
382
380
}
383
381
384
- private static class FileSession {
385
- FileSession (long lastTrackedSeqNo , long lastOffset ) {
386
- this .lastTrackedSeqNo = lastTrackedSeqNo ;
387
- this .lastOffset = lastOffset ;
388
- }
389
-
390
- final long lastTrackedSeqNo ;
391
- final long lastOffset ;
392
- }
393
-
394
382
@ Override
395
383
protected void restoreFiles (List <FileInfo > filesToRecover , Store store ) throws IOException {
396
384
logger .trace ("[{}] starting CCR restore of {} files" , shardId , filesToRecover );
@@ -399,116 +387,61 @@ protected void restoreFiles(List<FileInfo> filesToRecover, Store store) throws I
399
387
final LocalCheckpointTracker requestSeqIdTracker = new LocalCheckpointTracker (NO_OPS_PERFORMED , NO_OPS_PERFORMED );
400
388
final AtomicReference <Tuple <StoreFileMetaData , Exception >> error = new AtomicReference <>();
401
389
402
- final ArrayDeque <FileInfo > remainingFiles = new ArrayDeque <>(filesToRecover );
403
- final Map <FileInfo , FileSession > inFlightRequests = new HashMap <>();
404
- final Object mutex = new Object ();
405
-
406
- while (true ) {
407
- if (error .get () != null ) {
408
- break ;
409
- }
410
- final FileInfo fileToRecover ;
411
- final FileSession prevFileSession ;
412
- synchronized (mutex ) {
413
- if (inFlightRequests .isEmpty () && remainingFiles .isEmpty ()) {
414
- break ;
415
- }
416
- final long maxConcurrentFileChunks = ccrSettings .getMaxConcurrentFileChunks ();
417
- if (remainingFiles .isEmpty () == false && inFlightRequests .size () < maxConcurrentFileChunks ) {
418
- for (int i = 0 ; i < maxConcurrentFileChunks ; i ++) {
419
- if (remainingFiles .isEmpty ()) {
420
- break ;
421
- }
422
- inFlightRequests .put (remainingFiles .pop (), new FileSession (NO_OPS_PERFORMED , 0 ));
423
- }
424
- }
425
- final Map .Entry <FileInfo , FileSession > minEntry =
426
- inFlightRequests .entrySet ().stream ().min (Comparator .comparingLong (e -> e .getValue ().lastTrackedSeqNo )).get ();
427
- prevFileSession = minEntry .getValue ();
428
- fileToRecover = minEntry .getKey ();
429
- }
430
- try {
431
- requestSeqIdTracker .waitForOpsToComplete (prevFileSession .lastTrackedSeqNo );
432
- final FileSession fileSession ;
433
- synchronized (mutex ) {
434
- fileSession = inFlightRequests .get (fileToRecover );
435
- // if file has been removed in the mean-while, it means that restore of this file completed, so start working
436
- // on the next one
437
- if (fileSession == null ) {
438
- continue ;
439
- }
440
- }
390
+ for (FileInfo fileInfo : filesToRecover ) {
391
+ final long fileLength = fileInfo .length ();
392
+ long offset = 0 ;
393
+ while (offset < fileLength && error .get () == null ) {
441
394
final long requestSeqId = requestSeqIdTracker .generateSeqNo ();
442
395
try {
443
- synchronized (mutex ) {
444
- inFlightRequests .put (fileToRecover , new FileSession (requestSeqId , fileSession .lastOffset ));
396
+ requestSeqIdTracker .waitForOpsToComplete (requestSeqId - ccrSettings .getMaxConcurrentFileChunks ());
397
+
398
+ if (error .get () != null ) {
399
+ requestSeqIdTracker .markSeqNoAsCompleted (requestSeqId );
400
+ break ;
445
401
}
446
- final int bytesRequested = Math .toIntExact (Math .min (ccrSettings .getChunkSize ().getBytes (),
447
- fileToRecover .length () - fileSession .lastOffset ));
402
+
403
+ final int bytesRequested = Math .toIntExact (
404
+ Math .min (ccrSettings .getChunkSize ().getBytes (), fileLength - offset ));
405
+ offset += bytesRequested ;
406
+
448
407
final GetCcrRestoreFileChunkRequest request =
449
- new GetCcrRestoreFileChunkRequest (node , sessionUUID , fileToRecover .name (), bytesRequested );
408
+ new GetCcrRestoreFileChunkRequest (node , sessionUUID , fileInfo .name (), bytesRequested );
450
409
logger .trace ("[{}] [{}] fetching chunk for file [{}], expected offset: {}, size: {}" , shardId , snapshotId ,
451
- fileToRecover .name (), fileSession . lastOffset , bytesRequested );
410
+ fileInfo .name (), offset , bytesRequested );
452
411
453
412
remoteClient .execute (GetCcrRestoreFileChunkAction .INSTANCE , request ,
454
413
ActionListener .wrap (
455
414
r -> threadPool .generic ().execute (new AbstractRunnable () {
456
415
@ Override
457
416
public void onFailure (Exception e ) {
458
- error .compareAndSet (null , Tuple .tuple (fileToRecover .metadata (), e ));
417
+ error .compareAndSet (null , Tuple .tuple (fileInfo .metadata (), e ));
459
418
requestSeqIdTracker .markSeqNoAsCompleted (requestSeqId );
460
419
}
461
420
462
421
@ Override
463
422
protected void doRun () throws Exception {
464
423
final int actualChunkSize = r .getChunk ().length ();
465
424
logger .trace ("[{}] [{}] got response for file [{}], offset: {}, length: {}" , shardId ,
466
- snapshotId , fileToRecover .name (), r .getOffset (), actualChunkSize );
425
+ snapshotId , fileInfo .name (), r .getOffset (), actualChunkSize );
467
426
final long nanosPaused = ccrSettings .getRateLimiter ().maybePause (actualChunkSize );
468
427
throttleListener .accept (nanosPaused );
469
- final long newOffset = r .getOffset () + actualChunkSize ;
470
-
471
- assert r .getOffset () == fileSession .lastOffset ;
472
- assert actualChunkSize == bytesRequested ;
473
- assert newOffset <= fileToRecover .length ();
474
- final boolean lastChunk = newOffset >= fileToRecover .length ();
475
- multiFileWriter .writeFileChunk (fileToRecover .metadata (), r .getOffset (), r .getChunk (),
476
- lastChunk );
477
- if (lastChunk ) {
478
- synchronized (mutex ) {
479
- final FileSession removed = inFlightRequests .remove (fileToRecover );
480
- assert removed != null : "session disappeared for " + fileToRecover .name ();
481
- assert removed .lastTrackedSeqNo == requestSeqId ;
482
- assert removed .lastOffset == fileSession .lastOffset ;
483
- }
484
- } else {
485
- synchronized (mutex ) {
486
- final FileSession replaced = inFlightRequests .replace (fileToRecover ,
487
- new FileSession (requestSeqId , newOffset ));
488
- assert replaced != null : "session disappeared for " + fileToRecover .name ();
489
- assert replaced .lastTrackedSeqNo == requestSeqId ;
490
- assert replaced .lastOffset == fileSession .lastOffset ;
491
- }
492
- }
428
+ final boolean lastChunk = r .getOffset () + actualChunkSize >= fileLength ;
429
+ multiFileWriter .writeFileChunk (fileInfo .metadata (), r .getOffset (), r .getChunk (), lastChunk );
493
430
requestSeqIdTracker .markSeqNoAsCompleted (requestSeqId );
494
431
}
495
432
}),
496
433
e -> {
497
- error .compareAndSet (null , Tuple .tuple (fileToRecover .metadata (), e ));
434
+ error .compareAndSet (null , Tuple .tuple (fileInfo .metadata (), e ));
498
435
requestSeqIdTracker .markSeqNoAsCompleted (requestSeqId );
499
436
}
500
437
));
501
438
} catch (Exception e ) {
502
- error .compareAndSet (null , Tuple .tuple (fileToRecover .metadata (), e ));
439
+ error .compareAndSet (null , Tuple .tuple (fileInfo .metadata (), e ));
503
440
requestSeqIdTracker .markSeqNoAsCompleted (requestSeqId );
504
- throw e ;
505
441
}
506
- } catch (Exception e ) {
507
- error .compareAndSet (null , Tuple .tuple (fileToRecover .metadata (), e ));
508
- break ;
509
442
}
510
-
511
443
}
444
+
512
445
try {
513
446
requestSeqIdTracker .waitForOpsToComplete (requestSeqIdTracker .getMaxSeqNo ());
514
447
} catch (InterruptedException e ) {
0 commit comments