@@ -38,19 +38,18 @@ type Monitor struct {
38
38
}
39
39
40
40
type Config struct {
41
- // Max time to wait for other side to accept open channel request before attempting restart
41
+ // Max time to wait for other side to accept open channel request before attempting restart.
42
+ // Set to 0 to disable timeout.
42
43
AcceptTimeout time.Duration
43
44
// Debounce when restart is triggered by multiple errors
44
45
RestartDebounce time.Duration
45
46
// Backoff after restarting
46
47
RestartBackoff time.Duration
47
48
// Number of times to try to restart before failing
48
49
MaxConsecutiveRestarts uint32
49
- // Max time to wait for the peer to acknowledge a restart request.
50
- // Note: Does not include the time taken to reconnect to the peer.
51
- RestartAckTimeout time.Duration
52
50
// Max time to wait for the responder to send a Complete message once all
53
- // data has been sent
51
+ // data has been sent.
52
+ // Set to 0 to disable timeout.
54
53
CompleteTimeout time.Duration
55
54
// Called when a restart completes successfully
56
55
OnRestartComplete func (id datatransfer.ChannelID )
@@ -74,17 +73,14 @@ func checkConfig(cfg *Config) {
74
73
}
75
74
76
75
prefix := "data-transfer channel monitor config "
77
- if cfg .AcceptTimeout <= 0 {
78
- panic (fmt .Sprintf (prefix + "AcceptTimeout is %s but must be > 0" , cfg .AcceptTimeout ))
76
+ if cfg .AcceptTimeout < 0 {
77
+ panic (fmt .Sprintf (prefix + "AcceptTimeout is %s but must be >= 0" , cfg .AcceptTimeout ))
79
78
}
80
79
if cfg .MaxConsecutiveRestarts == 0 {
81
80
panic (fmt .Sprintf (prefix + "MaxConsecutiveRestarts is %d but must be > 0" , cfg .MaxConsecutiveRestarts ))
82
81
}
83
- if cfg .RestartAckTimeout <= 0 {
84
- panic (fmt .Sprintf (prefix + "RestartAckTimeout is %s but must be > 0" , cfg .RestartAckTimeout ))
85
- }
86
- if cfg .CompleteTimeout <= 0 {
87
- panic (fmt .Sprintf (prefix + "CompleteTimeout is %s but must be > 0" , cfg .CompleteTimeout ))
82
+ if cfg .CompleteTimeout < 0 {
83
+ panic (fmt .Sprintf (prefix + "CompleteTimeout is %s but must be >= 0" , cfg .CompleteTimeout ))
88
84
}
89
85
}
90
86
@@ -275,6 +271,11 @@ func (mc *monitoredChannel) start() {
275
271
// an Accept to our open channel request before the accept timeout.
276
272
// Returns a function that can be used to cancel the timer.
277
273
func (mc * monitoredChannel ) watchForResponderAccept () func () {
274
+ // Check if the accept timeout is disabled
275
+ if mc .cfg .AcceptTimeout == 0 {
276
+ return func () {}
277
+ }
278
+
278
279
// Start a timer for the accept timeout
279
280
timer := time .NewTimer (mc .cfg .AcceptTimeout )
280
281
@@ -297,6 +298,11 @@ func (mc *monitoredChannel) watchForResponderAccept() func() {
297
298
298
299
// Wait up to the configured timeout for the responder to send a Complete message
299
300
func (mc * monitoredChannel ) watchForResponderComplete () {
301
+ // Check if the complete timeout is disabled
302
+ if mc .cfg .CompleteTimeout == 0 {
303
+ return
304
+ }
305
+
300
306
// Start a timer for the complete timeout
301
307
timer := time .NewTimer (mc .cfg .CompleteTimeout )
302
308
defer timer .Stop ()
@@ -308,7 +314,7 @@ func (mc *monitoredChannel) watchForResponderComplete() {
308
314
case <- timer .C :
309
315
// Timer expired before we received a Complete message from the responder
310
316
err := xerrors .Errorf ("%s: timed out waiting %s for Complete message from remote peer" ,
311
- mc .chid , mc .cfg .AcceptTimeout )
317
+ mc .chid , mc .cfg .CompleteTimeout )
312
318
mc .closeChannelAndShutdown (err )
313
319
}
314
320
}
@@ -414,8 +420,7 @@ func (mc *monitoredChannel) doRestartChannel() error {
414
420
err := mc .sendRestartMessage (restartCount )
415
421
if err != nil {
416
422
log .Warnf ("%s: restart failed, trying again: %s" , mc .chid , err )
417
- // If the restart message could not be sent, or there was a timeout
418
- // waiting for the restart to be acknowledged, try again
423
+ // If the restart message could not be sent, try again
419
424
return mc .doRestartChannel ()
420
425
}
421
426
log .Infof ("%s: restart completed successfully" , mc .chid )
@@ -438,25 +443,12 @@ func (mc *monitoredChannel) sendRestartMessage(restartCount int) error {
438
443
log .Infof ("%s: re-established connection to %s in %s" , mc .chid , p , time .Since (start ))
439
444
440
445
// Send a restart message for the channel
441
- restartResult := mc .waitForRestartResponse ()
442
446
log .Infof ("%s: sending restart message to %s (%d consecutive restarts)" , mc .chid , p , restartCount )
443
447
err = mc .mgr .RestartDataTransferChannel (mc .ctx , mc .chid )
444
448
if err != nil {
445
449
return xerrors .Errorf ("%s: failed to send restart message to %s: %w" , mc .chid , p , err )
446
450
}
447
451
448
- // The restart message is fire and forget, so we need to watch for a
449
- // restart response to know that the restart message reached the peer.
450
- select {
451
- case <- mc .ctx .Done ():
452
- return nil // channel shutdown so just bail out
453
- case err = <- restartResult :
454
- if err != nil {
455
- return xerrors .Errorf ("%s: failed to send restart message to %s: %w" , mc .chid , p , err )
456
- }
457
- }
458
- log .Infof ("%s: received restart response from %s" , mc .chid , p )
459
-
460
452
// The restart message was sent successfully.
461
453
// If a restart backoff is configured, backoff after a restart before
462
454
// attempting another.
@@ -490,47 +482,3 @@ func (mc *monitoredChannel) closeChannelAndShutdown(cherr error) {
490
482
log .Errorf ("error closing data-transfer channel %s: %s" , mc .chid , err )
491
483
}
492
484
}
493
-
494
- // Wait for the peer to send an acknowledgement to the restart request
495
- func (mc * monitoredChannel ) waitForRestartResponse () chan error {
496
- restartFired := make (chan struct {})
497
- restarted := make (chan error , 1 )
498
- timer := time .NewTimer (mc .cfg .RestartAckTimeout )
499
-
500
- unsub := mc .mgr .SubscribeToEvents (func (event datatransfer.Event , channelState datatransfer.ChannelState ) {
501
- if channelState .ChannelID () != mc .chid {
502
- return
503
- }
504
-
505
- // The Restart event is fired when we receive an acknowledgement
506
- // from the peer that it has received a restart request
507
- if event .Code == datatransfer .Restart {
508
- close (restartFired )
509
- }
510
- })
511
-
512
- go func () {
513
- defer unsub ()
514
- defer timer .Stop ()
515
-
516
- select {
517
-
518
- // Restart ack received from peer
519
- case <- restartFired :
520
- restarted <- nil
521
-
522
- // Channel monitor shutdown, just bail out
523
- case <- mc .ctx .Done ():
524
- restarted <- nil
525
-
526
- // Timer expired before receiving a restart ack from peer
527
- case <- timer .C :
528
- p := mc .chid .OtherParty (mc .mgr .PeerID ())
529
- err := xerrors .Errorf ("did not receive response to restart request from %s after %s" ,
530
- p , mc .cfg .RestartAckTimeout )
531
- restarted <- err
532
- }
533
- }()
534
-
535
- return restarted
536
- }
0 commit comments