Skip to content

Commit 3536508

Browse files
authored
Fix the Router's Ability to Prune the Mesh Periodically (#589)
When a new peer wants to graft us into their mesh, we check our current mesh size to determine whether we can add any more new peers to it. This is done to prevent our mesh size from being greater than `Dhi` and prevent mesh takeover attacks here: https://github.com/libp2p/go-libp2p-pubsub/blob/c06df2f9a38e9382e644b241adf0e96e5ca00955/gossipsub.go#L943 During every heartbeat we check our mesh size and if it is **greater** than `Dhi` then we will prune our mesh back down to `D`. https://github.com/libp2p/go-libp2p-pubsub/blob/c06df2f9a38e9382e644b241adf0e96e5ca00955/gossipsub.go#L1608 However if you look closely at both lines there is a problematic end result. Since we only stop grafting new peers into our mesh if our current mesh size is **greater than or equal to** `Dhi` and we only prune peers if the current mesh size is greater than `Dhi`. This would result in the mesh being in a state of stasis at `Dhi`. Rather than float between `D` and `Dhi` , the mesh stagnates at `Dhi` . This would end up increasing the target degree of the node to `Dhi` from `D`. This had been observed in ethereum mainnet by recording mesh interactions and message fulfillment from those peers. This PR fixes it by adding an equality check to the conditional so that it can be periodically pruned. The PR also adds a regression test for this particular case.
1 parent c06df2f commit 3536508

File tree

2 files changed

+48
-1
lines changed

2 files changed

+48
-1
lines changed

gossipsub.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1605,7 +1605,7 @@ func (gs *GossipSubRouter) heartbeat() {
16051605
}
16061606

16071607
// do we have too many peers?
1608-
if len(peers) > gs.params.Dhi {
1608+
if len(peers) >= gs.params.Dhi {
16091609
plst := peerMapToList(peers)
16101610

16111611
// sort by score (but shuffle first for the case we don't use the score)

gossipsub_test.go

+47
Original file line numberDiff line numberDiff line change
@@ -3176,6 +3176,53 @@ func TestGossipsubIdontwantClear(t *testing.T) {
31763176
<-ctx.Done()
31773177
}
31783178

3179+
func TestGossipsubPruneMeshCorrectly(t *testing.T) {
3180+
ctx, cancel := context.WithCancel(context.Background())
3181+
defer cancel()
3182+
hosts := getDefaultHosts(t, 9)
3183+
3184+
msgID := func(pmsg *pb.Message) string {
3185+
// silly content-based test message-ID: just use the data as whole
3186+
return base64.URLEncoding.EncodeToString(pmsg.Data)
3187+
}
3188+
3189+
params := DefaultGossipSubParams()
3190+
params.Dhi = 8
3191+
3192+
psubs := make([]*PubSub, 9)
3193+
for i := 0; i < 9; i++ {
3194+
psubs[i] = getGossipsub(ctx, hosts[i],
3195+
WithGossipSubParams(params),
3196+
WithMessageIdFn(msgID))
3197+
}
3198+
3199+
topic := "foobar"
3200+
for _, ps := range psubs {
3201+
_, err := ps.Subscribe(topic)
3202+
if err != nil {
3203+
t.Fatal(err)
3204+
}
3205+
}
3206+
3207+
// Connect first peer with the rest of the 8 other
3208+
// peers.
3209+
for i := 1; i < 9; i++ {
3210+
connect(t, hosts[0], hosts[i])
3211+
}
3212+
3213+
// Wait for 2 heartbeats to be able to prune excess peers back down to D.
3214+
totalTimeToWait := params.HeartbeatInitialDelay + 2*params.HeartbeatInterval
3215+
time.Sleep(totalTimeToWait)
3216+
3217+
meshPeers, ok := psubs[0].rt.(*GossipSubRouter).mesh[topic]
3218+
if !ok {
3219+
t.Fatal("mesh does not exist for topic")
3220+
}
3221+
if len(meshPeers) != params.D {
3222+
t.Fatalf("mesh does not have the correct number of peers. Wanted %d but got %d", params.D, len(meshPeers))
3223+
}
3224+
}
3225+
31793226
func BenchmarkAllocDoDropRPC(b *testing.B) {
31803227
gs := GossipSubRouter{tracer: &pubsubTracer{}}
31813228

0 commit comments

Comments
 (0)