Skip to content

Commit 77595e4

Browse files
net: if a DNS lookup times out, forget that it is in flight
Before this CL, if the system resolver does a very slow DNS lookup for a particular host, all subsequent requests for that host will hang waiting for that lookup to complete. That is more or less expected when Dial is called with no deadline. When Dial has a deadline, though, we can accumulate a large number of goroutines waiting for that slow DNS lookup. Try to avoid this problem by restarting the DNS lookup when it is redone after a deadline is passed. This CL also avoids creating an extra goroutine purely to handle the deadline. No test because we would have to simulate a slow DNS lookup followed by a fast DNS lookup. Fixes #8602. LGTM=bradfitz R=bradfitz, mikioh.mikioh CC=golang-codereviews, r, rsc https://golang.org/cl/154610044
1 parent 78082df commit 77595e4

File tree

2 files changed

+89
-28
lines changed

2 files changed

+89
-28
lines changed

src/net/lookup.go

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,16 @@ func lookupIPMerge(host string) (addrs []IP, err error) {
4040
addrsi, err, shared := lookupGroup.Do(host, func() (interface{}, error) {
4141
return lookupIP(host)
4242
})
43+
return lookupIPReturn(addrsi, err, shared)
44+
}
45+
46+
// lookupIPReturn turns the return values from singleflight.Do into
47+
// the return values from LookupIP.
48+
func lookupIPReturn(addrsi interface{}, err error, shared bool) ([]IP, error) {
4349
if err != nil {
4450
return nil, err
4551
}
46-
addrs = addrsi.([]IP)
52+
addrs := addrsi.([]IP)
4753
if shared {
4854
clone := make([]IP, len(addrs))
4955
copy(clone, addrs)
@@ -52,41 +58,40 @@ func lookupIPMerge(host string) (addrs []IP, err error) {
5258
return addrs, nil
5359
}
5460

61+
// lookupIPDeadline looks up a hostname with a deadline.
5562
func lookupIPDeadline(host string, deadline time.Time) (addrs []IP, err error) {
5663
if deadline.IsZero() {
5764
return lookupIPMerge(host)
5865
}
5966

60-
// TODO(bradfitz): consider pushing the deadline down into the
61-
// name resolution functions. But that involves fixing it for
62-
// the native Go resolver, cgo, Windows, etc.
63-
//
64-
// In the meantime, just use a goroutine. Most users affected
65-
// by http://golang.org/issue/2631 are due to TCP connections
66-
// to unresponsive hosts, not DNS.
67+
// We could push the deadline down into the name resolution
68+
// functions. However, the most commonly used implementation
69+
// calls getaddrinfo, which has no timeout.
70+
6771
timeout := deadline.Sub(time.Now())
6872
if timeout <= 0 {
69-
err = errTimeout
70-
return
73+
return nil, errTimeout
7174
}
7275
t := time.NewTimer(timeout)
7376
defer t.Stop()
74-
type res struct {
75-
addrs []IP
76-
err error
77-
}
78-
resc := make(chan res, 1)
79-
go func() {
80-
a, err := lookupIPMerge(host)
81-
resc <- res{a, err}
82-
}()
77+
78+
ch := lookupGroup.DoChan(host, func() (interface{}, error) {
79+
return lookupIP(host)
80+
})
81+
8382
select {
8483
case <-t.C:
85-
err = errTimeout
86-
case r := <-resc:
87-
addrs, err = r.addrs, r.err
84+
// The DNS lookup timed out for some reason. Force
85+
// future requests to start the DNS lookup again
86+
// rather than waiting for the current lookup to
87+
// complete. See issue 8602.
88+
lookupGroup.Forget(host)
89+
90+
return nil, errTimeout
91+
92+
case r := <-ch:
93+
return lookupIPReturn(r.v, r.err, r.shared)
8894
}
89-
return
9095
}
9196

9297
// LookupPort looks up the port for the given network and service.

src/net/singleflight.go

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,18 @@ import "sync"
88

99
// call is an in-flight or completed singleflight.Do call
1010
type call struct {
11-
wg sync.WaitGroup
12-
val interface{}
13-
err error
14-
dups int
11+
wg sync.WaitGroup
12+
13+
// These fields are written once before the WaitGroup is done
14+
// and are only read after the WaitGroup is done.
15+
val interface{}
16+
err error
17+
18+
// These fields are read and written with the singleflight
19+
// mutex held before the WaitGroup is done, and are read but
20+
// not written after the WaitGroup is done.
21+
dups int
22+
chans []chan<- singleflightResult
1523
}
1624

1725
// singleflight represents a class of work and forms a namespace in
@@ -21,6 +29,14 @@ type singleflight struct {
2129
m map[string]*call // lazily initialized
2230
}
2331

32+
// singleflightResult holds the results of Do, so they can be passed
33+
// on a channel.
34+
type singleflightResult struct {
35+
v interface{}
36+
err error
37+
shared bool
38+
}
39+
2440
// Do executes and returns the results of the given function, making
2541
// sure that only one execution is in-flight for a given key at a
2642
// time. If a duplicate comes in, the duplicate caller waits for the
@@ -42,12 +58,52 @@ func (g *singleflight) Do(key string, fn func() (interface{}, error)) (v interfa
4258
g.m[key] = c
4359
g.mu.Unlock()
4460

61+
g.doCall(c, key, fn)
62+
return c.val, c.err, c.dups > 0
63+
}
64+
65+
// DoChan is like Do but returns a channel that will receive the
66+
// results when they are ready.
67+
func (g *singleflight) DoChan(key string, fn func() (interface{}, error)) <-chan singleflightResult {
68+
ch := make(chan singleflightResult, 1)
69+
g.mu.Lock()
70+
if g.m == nil {
71+
g.m = make(map[string]*call)
72+
}
73+
if c, ok := g.m[key]; ok {
74+
c.dups++
75+
c.chans = append(c.chans, ch)
76+
g.mu.Unlock()
77+
return ch
78+
}
79+
c := &call{chans: []chan<- singleflightResult{ch}}
80+
c.wg.Add(1)
81+
g.m[key] = c
82+
g.mu.Unlock()
83+
84+
go g.doCall(c, key, fn)
85+
86+
return ch
87+
}
88+
89+
// doCall handles the single call for a key.
90+
func (g *singleflight) doCall(c *call, key string, fn func() (interface{}, error)) {
4591
c.val, c.err = fn()
4692
c.wg.Done()
4793

4894
g.mu.Lock()
4995
delete(g.m, key)
96+
for _, ch := range c.chans {
97+
ch <- singleflightResult{c.val, c.err, c.dups > 0}
98+
}
5099
g.mu.Unlock()
100+
}
51101

52-
return c.val, c.err, c.dups > 0
102+
// Forget tells the singleflight to forget about a key. Future calls
103+
// to Do for this key will call the function rather than waiting for
104+
// an earlier call to complete.
105+
func (g *singleflight) Forget(key string) {
106+
g.mu.Lock()
107+
delete(g.m, key)
108+
g.mu.Unlock()
53109
}

0 commit comments

Comments
 (0)