@@ -39,7 +39,7 @@ use crate::routing::gossip::NodeId;
39
39
#[ cfg( feature = "std" ) ]
40
40
use {
41
41
crate :: util:: time:: tests:: SinceEpoch ,
42
- std:: time:: { SystemTime , Duration }
42
+ std:: time:: { SystemTime , Instant , Duration }
43
43
} ;
44
44
45
45
#[ test]
@@ -2616,3 +2616,166 @@ fn test_simple_partial_retry() {
2616
2616
expect_pending_htlcs_forwardable ! ( nodes[ 2 ] ) ;
2617
2617
expect_payment_claimable ! ( nodes[ 2 ] , payment_hash, payment_secret, amt_msat) ;
2618
2618
}
2619
+
2620
+
2621
+ #[ test]
2622
+ #[ cfg( feature = "std" ) ]
2623
+ fn test_threaded_payment_retries ( ) {
2624
+ // In the first version of the in-`ChannelManager` payment retries, retries weren't limited to
2625
+ // a single thread and would happily let multile threads run retries at the same time. Because
2626
+ // retries are done by first calculating the amount we need to retry, then dropping the
2627
+ // relevant lock, then actually sending, we would happily let multiple threads retry the same
2628
+ // amount at the same time, overpaying our original HTLC!
2629
+ let chanmon_cfgs = create_chanmon_cfgs ( 4 ) ;
2630
+ let node_cfgs = create_node_cfgs ( 4 , & chanmon_cfgs) ;
2631
+ let node_chanmgrs = create_node_chanmgrs ( 4 , & node_cfgs, & [ None , None , None , None ] ) ;
2632
+ let nodes = create_network ( 4 , & node_cfgs, & node_chanmgrs) ;
2633
+
2634
+ // There is one mitigating guardrail when retrying payments - we can never over-pay by more
2635
+ // than 10% of the original value. Thus, we want all our retries to be below that. In order to
2636
+ // keep things simple, we route one HTLC for 0.1% of the payment over channel 1 and the rest
2637
+ // out over channel 3+4. This will let us ignore 99% of the payment value and deal with only
2638
+ // out channel.
2639
+ let chan_1_scid = create_announced_chan_between_nodes_with_value ( & nodes, 0 , 1 , 10_000_000 , 0 ) . 0 . contents . short_channel_id ;
2640
+ create_announced_chan_between_nodes_with_value ( & nodes, 1 , 3 , 10_000_000 , 0 ) ;
2641
+ let chan_3_scid = create_announced_chan_between_nodes_with_value ( & nodes, 0 , 2 , 10_000_000 , 0 ) . 0 . contents . short_channel_id ;
2642
+ let chan_4_scid = create_announced_chan_between_nodes_with_value ( & nodes, 2 , 3 , 10_000_000 , 0 ) . 0 . contents . short_channel_id ;
2643
+
2644
+ let amt_msat = 100_000_000 ;
2645
+ let ( _, payment_hash, _, payment_secret) = get_route_and_payment_hash ! ( & nodes[ 0 ] , nodes[ 2 ] , amt_msat) ;
2646
+ #[ cfg( feature = "std" ) ]
2647
+ let payment_expiry_secs = SystemTime :: UNIX_EPOCH . elapsed ( ) . unwrap ( ) . as_secs ( ) + 60 * 60 ;
2648
+ #[ cfg( not( feature = "std" ) ) ]
2649
+ let payment_expiry_secs = 60 * 60 ;
2650
+ let mut invoice_features = InvoiceFeatures :: empty ( ) ;
2651
+ invoice_features. set_variable_length_onion_required ( ) ;
2652
+ invoice_features. set_payment_secret_required ( ) ;
2653
+ invoice_features. set_basic_mpp_optional ( ) ;
2654
+ let payment_params = PaymentParameters :: from_node_id ( nodes[ 1 ] . node . get_our_node_id ( ) , TEST_FINAL_CLTV )
2655
+ . with_expiry_time ( payment_expiry_secs as u64 )
2656
+ . with_features ( invoice_features) ;
2657
+ let mut route_params = RouteParameters {
2658
+ payment_params,
2659
+ final_value_msat : amt_msat,
2660
+ final_cltv_expiry_delta : TEST_FINAL_CLTV ,
2661
+ } ;
2662
+
2663
+ let mut route = Route {
2664
+ paths : vec ! [
2665
+ vec![ RouteHop {
2666
+ pubkey: nodes[ 1 ] . node. get_our_node_id( ) ,
2667
+ node_features: nodes[ 1 ] . node. node_features( ) ,
2668
+ short_channel_id: chan_1_scid,
2669
+ channel_features: nodes[ 1 ] . node. channel_features( ) ,
2670
+ fee_msat: 0 ,
2671
+ cltv_expiry_delta: 100 ,
2672
+ } , RouteHop {
2673
+ pubkey: nodes[ 3 ] . node. get_our_node_id( ) ,
2674
+ node_features: nodes[ 2 ] . node. node_features( ) ,
2675
+ short_channel_id: 42 , // Set a random SCID which nodes[1] will fail as unknown
2676
+ channel_features: nodes[ 2 ] . node. channel_features( ) ,
2677
+ fee_msat: amt_msat / 1000 ,
2678
+ cltv_expiry_delta: 100 ,
2679
+ } ] ,
2680
+ vec![ RouteHop {
2681
+ pubkey: nodes[ 2 ] . node. get_our_node_id( ) ,
2682
+ node_features: nodes[ 2 ] . node. node_features( ) ,
2683
+ short_channel_id: chan_3_scid,
2684
+ channel_features: nodes[ 2 ] . node. channel_features( ) ,
2685
+ fee_msat: 100_000 ,
2686
+ cltv_expiry_delta: 100 ,
2687
+ } , RouteHop {
2688
+ pubkey: nodes[ 3 ] . node. get_our_node_id( ) ,
2689
+ node_features: nodes[ 3 ] . node. node_features( ) ,
2690
+ short_channel_id: chan_4_scid,
2691
+ channel_features: nodes[ 3 ] . node. channel_features( ) ,
2692
+ fee_msat: amt_msat - amt_msat / 1000 ,
2693
+ cltv_expiry_delta: 100 ,
2694
+ } ]
2695
+ ] ,
2696
+ payment_params : Some ( PaymentParameters :: from_node_id ( nodes[ 2 ] . node . get_our_node_id ( ) , TEST_FINAL_CLTV ) ) ,
2697
+ } ;
2698
+ nodes[ 0 ] . router . expect_find_route ( route_params. clone ( ) , Ok ( route. clone ( ) ) ) ;
2699
+
2700
+ nodes[ 0 ] . node . send_payment_with_retry ( payment_hash, & Some ( payment_secret) , PaymentId ( payment_hash. 0 ) , route_params. clone ( ) , Retry :: Attempts ( 0xdeadbeef ) ) . unwrap ( ) ;
2701
+ check_added_monitors ! ( nodes[ 0 ] , 2 ) ;
2702
+ let mut send_msg_events = nodes[ 0 ] . node . get_and_clear_pending_msg_events ( ) ;
2703
+ assert_eq ! ( send_msg_events. len( ) , 2 ) ;
2704
+ send_msg_events. retain ( |msg|
2705
+ if let MessageSendEvent :: UpdateHTLCs { node_id, .. } = msg {
2706
+ // Drop the commitment update for noddes[2], we can just let that one sit pending
2707
+ // forever.
2708
+ * node_id == nodes[ 1 ] . node . get_our_node_id ( )
2709
+ } else { panic ! ( ) ; }
2710
+ ) ;
2711
+
2712
+ // from here on out, the retry `RouteParameters` amount will be amt/1000
2713
+ route_params. final_value_msat /= 1000 ;
2714
+ route. paths . pop ( ) ;
2715
+
2716
+ let end_time = Instant :: now ( ) + Duration :: from_secs ( 1 ) ;
2717
+ macro_rules! thread_body { ( ) => { {
2718
+ // We really want std::thread::scope, but its not stable until 1.63. Until then, we get unsafe.
2719
+ let node_ref = NodePtr :: from_node( & nodes[ 0 ] ) ;
2720
+ move || {
2721
+ let node_a = unsafe { & * node_ref. 0 } ;
2722
+ while Instant :: now( ) < end_time {
2723
+ node_a. node. get_and_clear_pending_events( ) ; // wipe the PendingHTLCsForwardable
2724
+ // Ignore if we have any pending events, just always pretend we just got a
2725
+ // PendingHTLCsForwardable
2726
+ node_a. node. process_pending_htlc_forwards( ) ;
2727
+ }
2728
+ }
2729
+ } } }
2730
+ let mut threads = Vec :: new ( ) ;
2731
+ for _ in 0 ..16 { threads. push ( std:: thread:: spawn ( thread_body ! ( ) ) ) ; }
2732
+
2733
+ // Back in the main thread, poll pending messages and make sure that we never have more than
2734
+ // one HTLC pending at a time. Note that the commitment_signed_dance will fail horribly if
2735
+ // there are HTLC messages shoved in while its running. This allows us to test that we never
2736
+ // generate an additional update_add_htlc until we've fully failed the first.
2737
+ let mut previously_failed_channels = Vec :: new ( ) ;
2738
+ loop {
2739
+ assert_eq ! ( send_msg_events. len( ) , 1 ) ;
2740
+ let send_event = SendEvent :: from_event ( send_msg_events. pop ( ) . unwrap ( ) ) ;
2741
+ assert_eq ! ( send_event. msgs. len( ) , 1 ) ;
2742
+
2743
+ nodes[ 1 ] . node . handle_update_add_htlc ( & nodes[ 0 ] . node . get_our_node_id ( ) , & send_event. msgs [ 0 ] ) ;
2744
+ commitment_signed_dance ! ( nodes[ 1 ] , nodes[ 0 ] , send_event. commitment_msg, false , true ) ;
2745
+
2746
+ // Note that we only push one route into `expect_find_route` at a time, because that's all
2747
+ // the retries (should) need. If the bug is reintroduced "real" routes may be selected, but
2748
+ // we should still ultimately fail for the same reason - because we're trying to send too
2749
+ // many HTLCs at once.
2750
+ let mut new_route_params = route_params. clone ( ) ;
2751
+ previously_failed_channels. push ( route. paths [ 0 ] [ 1 ] . short_channel_id ) ;
2752
+ new_route_params. payment_params . previously_failed_channels = previously_failed_channels. clone ( ) ;
2753
+ route. paths [ 0 ] [ 1 ] . short_channel_id += 1 ;
2754
+ nodes[ 0 ] . router . expect_find_route ( new_route_params, Ok ( route. clone ( ) ) ) ;
2755
+
2756
+ let bs_fail_updates = get_htlc_update_msgs ! ( nodes[ 1 ] , nodes[ 0 ] . node. get_our_node_id( ) ) ;
2757
+ nodes[ 0 ] . node . handle_update_fail_htlc ( & nodes[ 1 ] . node . get_our_node_id ( ) , & bs_fail_updates. update_fail_htlcs [ 0 ] ) ;
2758
+ // The "normal" commitment_signed_dance delivers the final RAA and then calls
2759
+ // `check_added_monitors` to ensure only the one RAA-generated monitor update was created.
2760
+ // This races with our other threads which may generate an add-HTLCs commitment update via
2761
+ // `process_pending_htlc_forwards`. Instead, we defer the monitor update check until after
2762
+ // *we've* called `process_pending_htlc_forwards` when its guaranteed to have two updates.
2763
+ let last_raa = commitment_signed_dance ! ( nodes[ 0 ] , nodes[ 1 ] , bs_fail_updates. commitment_signed, false , true , false , true ) ;
2764
+ nodes[ 0 ] . node . handle_revoke_and_ack ( & nodes[ 1 ] . node . get_our_node_id ( ) , & last_raa) ;
2765
+
2766
+ let cur_time = Instant :: now ( ) ;
2767
+ if cur_time > end_time {
2768
+ for thread in threads. drain ( ..) { thread. join ( ) . unwrap ( ) ; }
2769
+ }
2770
+
2771
+ // Make sure we have some events to handle when we go around...
2772
+ nodes[ 0 ] . node . get_and_clear_pending_events ( ) ; // wipe the PendingHTLCsForwardable
2773
+ nodes[ 0 ] . node . process_pending_htlc_forwards ( ) ;
2774
+ send_msg_events = nodes[ 0 ] . node . get_and_clear_pending_msg_events ( ) ;
2775
+ check_added_monitors ! ( nodes[ 0 ] , 2 ) ;
2776
+
2777
+ if cur_time > end_time {
2778
+ break ;
2779
+ }
2780
+ }
2781
+ }
0 commit comments