Skip to content

Commit 6e383c4

Browse files
committed
quic: add qlog recovery metrics
Log events for various congestion control and loss recovery metrics. For golang/go#58547 Change-Id: Ife3b3897f6ca731049c78b934a7123aa1ed4aee2 Reviewed-on: https://go-review.googlesource.com/c/net/+/564016 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Jonathan Amsterdam <[email protected]>
1 parent 840656f commit 6e383c4

12 files changed

+222
-37
lines changed

internal/quic/congestion_reno.go

+56-3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
package quic
88

99
import (
10+
"context"
11+
"log/slog"
1012
"math"
1113
"time"
1214
)
@@ -40,6 +42,9 @@ type ccReno struct {
4042
// true if we haven't sent that packet yet.
4143
sendOnePacketInRecovery bool
4244

45+
// inRecovery is set when we are in the recovery state.
46+
inRecovery bool
47+
4348
// underutilized is set if the congestion window is underutilized
4449
// due to insufficient application data, flow control limits, or
4550
// anti-amplification limits.
@@ -100,12 +105,19 @@ func (c *ccReno) canSend() bool {
100105
// congestion controller permits sending data, but no data is sent.
101106
//
102107
// https://www.rfc-editor.org/rfc/rfc9002#section-7.8
103-
func (c *ccReno) setUnderutilized(v bool) {
108+
func (c *ccReno) setUnderutilized(log *slog.Logger, v bool) {
109+
if c.underutilized == v {
110+
return
111+
}
112+
oldState := c.state()
104113
c.underutilized = v
114+
if logEnabled(log, QLogLevelPacket) {
115+
logCongestionStateUpdated(log, oldState, c.state())
116+
}
105117
}
106118

107119
// packetSent indicates that a packet has been sent.
108-
func (c *ccReno) packetSent(now time.Time, space numberSpace, sent *sentPacket) {
120+
func (c *ccReno) packetSent(now time.Time, log *slog.Logger, space numberSpace, sent *sentPacket) {
109121
if !sent.inFlight {
110122
return
111123
}
@@ -185,7 +197,11 @@ func (c *ccReno) packetLost(now time.Time, space numberSpace, sent *sentPacket,
185197
}
186198

187199
// packetBatchEnd is called at the end of processing a batch of acked or lost packets.
188-
func (c *ccReno) packetBatchEnd(now time.Time, space numberSpace, rtt *rttState, maxAckDelay time.Duration) {
200+
func (c *ccReno) packetBatchEnd(now time.Time, log *slog.Logger, space numberSpace, rtt *rttState, maxAckDelay time.Duration) {
201+
if logEnabled(log, QLogLevelPacket) {
202+
oldState := c.state()
203+
defer func() { logCongestionStateUpdated(log, oldState, c.state()) }()
204+
}
189205
if !c.ackLastLoss.IsZero() && !c.ackLastLoss.Before(c.recoveryStartTime) {
190206
// Enter the recovery state.
191207
// https://www.rfc-editor.org/rfc/rfc9002.html#section-7.3.2
@@ -196,8 +212,10 @@ func (c *ccReno) packetBatchEnd(now time.Time, space numberSpace, rtt *rttState,
196212
// Clear congestionPendingAcks to avoid increasing the congestion
197213
// window based on acks in a frame that sends us into recovery.
198214
c.congestionPendingAcks = 0
215+
c.inRecovery = true
199216
} else if c.congestionPendingAcks > 0 {
200217
// We are in slow start or congestion avoidance.
218+
c.inRecovery = false
201219
if c.congestionWindow < c.slowStartThreshold {
202220
// When the congestion window is less than the slow start threshold,
203221
// we are in slow start and increase the window by the number of
@@ -253,3 +271,38 @@ func (c *ccReno) minimumCongestionWindow() int {
253271
// https://www.rfc-editor.org/rfc/rfc9002.html#section-7.2-4
254272
return 2 * c.maxDatagramSize
255273
}
274+
275+
func logCongestionStateUpdated(log *slog.Logger, oldState, newState congestionState) {
276+
if oldState == newState {
277+
return
278+
}
279+
log.LogAttrs(context.Background(), QLogLevelPacket,
280+
"recovery:congestion_state_updated",
281+
slog.String("old", oldState.String()),
282+
slog.String("new", newState.String()),
283+
)
284+
}
285+
286+
type congestionState string
287+
288+
func (s congestionState) String() string { return string(s) }
289+
290+
const (
291+
congestionSlowStart = congestionState("slow_start")
292+
congestionCongestionAvoidance = congestionState("congestion_avoidance")
293+
congestionApplicationLimited = congestionState("application_limited")
294+
congestionRecovery = congestionState("recovery")
295+
)
296+
297+
func (c *ccReno) state() congestionState {
298+
switch {
299+
case c.inRecovery:
300+
return congestionRecovery
301+
case c.underutilized:
302+
return congestionApplicationLimited
303+
case c.congestionWindow < c.slowStartThreshold:
304+
return congestionSlowStart
305+
default:
306+
return congestionCongestionAvoidance
307+
}
308+
}

internal/quic/congestion_reno_test.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ func (c *ccTest) setRTT(smoothedRTT, rttvar time.Duration) {
470470
func (c *ccTest) setUnderutilized(v bool) {
471471
c.t.Helper()
472472
c.t.Logf("set underutilized = %v", v)
473-
c.cc.setUnderutilized(v)
473+
c.cc.setUnderutilized(nil, v)
474474
}
475475

476476
func (c *ccTest) packetSent(space numberSpace, size int, fns ...func(*sentPacket)) *sentPacket {
@@ -488,7 +488,7 @@ func (c *ccTest) packetSent(space numberSpace, size int, fns ...func(*sentPacket
488488
f(sent)
489489
}
490490
c.t.Logf("packet sent: num=%v.%v, size=%v", space, sent.num, sent.size)
491-
c.cc.packetSent(c.now, space, sent)
491+
c.cc.packetSent(c.now, nil, space, sent)
492492
return sent
493493
}
494494

@@ -519,7 +519,7 @@ func (c *ccTest) packetDiscarded(space numberSpace, sent *sentPacket) {
519519
func (c *ccTest) packetBatchEnd(space numberSpace) {
520520
c.t.Helper()
521521
c.t.Logf("(end of batch)")
522-
c.cc.packetBatchEnd(c.now, space, &c.rtt, c.maxAckDelay)
522+
c.cc.packetBatchEnd(c.now, nil, space, &c.rtt, c.maxAckDelay)
523523
}
524524

525525
func (c *ccTest) wantCanSend(want bool) {

internal/quic/conn.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ func (c *Conn) discardKeys(now time.Time, space numberSpace) {
210210
case handshakeSpace:
211211
c.keysHandshake.discard()
212212
}
213-
c.loss.discardKeys(now, space)
213+
c.loss.discardKeys(now, c.log, space)
214214
}
215215

216216
// receiveTransportParameters applies transport parameters sent by the peer.

internal/quic/conn_loss.go

+4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ import "fmt"
2020
// See RFC 9000, Section 13.3 for a complete list of information which is retransmitted on loss.
2121
// https://www.rfc-editor.org/rfc/rfc9000#section-13.3
2222
func (c *Conn) handleAckOrLoss(space numberSpace, sent *sentPacket, fate packetFate) {
23+
if fate == packetLost && c.logEnabled(QLogLevelPacket) {
24+
c.logPacketLost(space, sent)
25+
}
26+
2327
// The list of frames in a sent packet is marshaled into a buffer in the sentPacket
2428
// by the packetWriter. Unmarshal that buffer here. This code must be kept in sync with
2529
// packetWriter.append*.

internal/quic/conn_recv.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ func (c *Conn) handleRetry(now time.Time, pkt []byte) {
192192
c.connIDState.handleRetryPacket(p.srcConnID)
193193
// We need to resend any data we've already sent in Initial packets.
194194
// We must not reuse already sent packet numbers.
195-
c.loss.discardPackets(initialSpace, c.handleAckOrLoss)
195+
c.loss.discardPackets(initialSpace, c.log, c.handleAckOrLoss)
196196
// TODO: Discard 0-RTT packets as well, once we support 0-RTT.
197197
}
198198

@@ -416,7 +416,7 @@ func (c *Conn) handleAckFrame(now time.Time, space numberSpace, payload []byte)
416416
if c.peerAckDelayExponent >= 0 {
417417
delay = ackDelay.Duration(uint8(c.peerAckDelayExponent))
418418
}
419-
c.loss.receiveAckEnd(now, space, delay, c.handleAckOrLoss)
419+
c.loss.receiveAckEnd(now, c.log, space, delay, c.handleAckOrLoss)
420420
if space == appDataSpace {
421421
c.keysAppData.handleAckFor(largest)
422422
}

internal/quic/conn_send.go

+13-8
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@ func (c *Conn) maybeSend(now time.Time) (next time.Time) {
2222
// Assumption: The congestion window is not underutilized.
2323
// If congestion control, pacing, and anti-amplification all permit sending,
2424
// but we have no packet to send, then we will declare the window underutilized.
25-
c.loss.cc.setUnderutilized(false)
25+
underutilized := false
26+
defer func() {
27+
c.loss.cc.setUnderutilized(c.log, underutilized)
28+
}()
2629

2730
// Send one datagram on each iteration of this loop,
2831
// until we hit a limit or run out of data to send.
@@ -80,7 +83,6 @@ func (c *Conn) maybeSend(now time.Time) (next time.Time) {
8083
}
8184
sentInitial = c.w.finishProtectedLongHeaderPacket(pnumMaxAcked, c.keysInitial.w, p)
8285
if sentInitial != nil {
83-
c.idleHandlePacketSent(now, sentInitial)
8486
// Client initial packets and ack-eliciting server initial packaets
8587
// need to be sent in a datagram padded to at least 1200 bytes.
8688
// We can't add the padding yet, however, since we may want to
@@ -111,8 +113,7 @@ func (c *Conn) maybeSend(now time.Time) (next time.Time) {
111113
c.logPacketSent(packetTypeHandshake, pnum, p.srcConnID, p.dstConnID, c.w.packetLen(), c.w.payload())
112114
}
113115
if sent := c.w.finishProtectedLongHeaderPacket(pnumMaxAcked, c.keysHandshake.w, p); sent != nil {
114-
c.idleHandlePacketSent(now, sent)
115-
c.loss.packetSent(now, handshakeSpace, sent)
116+
c.packetSent(now, handshakeSpace, sent)
116117
if c.side == clientSide {
117118
// "[...] a client MUST discard Initial keys when it first
118119
// sends a Handshake packet [...]"
@@ -142,8 +143,7 @@ func (c *Conn) maybeSend(now time.Time) (next time.Time) {
142143
c.logPacketSent(packetType1RTT, pnum, nil, dstConnID, c.w.packetLen(), c.w.payload())
143144
}
144145
if sent := c.w.finish1RTTPacket(pnum, pnumMaxAcked, dstConnID, &c.keysAppData); sent != nil {
145-
c.idleHandlePacketSent(now, sent)
146-
c.loss.packetSent(now, appDataSpace, sent)
146+
c.packetSent(now, appDataSpace, sent)
147147
}
148148
}
149149

@@ -152,7 +152,7 @@ func (c *Conn) maybeSend(now time.Time) (next time.Time) {
152152
if limit == ccOK {
153153
// We have nothing to send, and congestion control does not
154154
// block sending. The congestion window is underutilized.
155-
c.loss.cc.setUnderutilized(true)
155+
underutilized = true
156156
}
157157
return next
158158
}
@@ -175,14 +175,19 @@ func (c *Conn) maybeSend(now time.Time) (next time.Time) {
175175
// with a Handshake packet, then we've discarded Initial keys
176176
// since constructing the packet and shouldn't record it as in-flight.
177177
if c.keysInitial.canWrite() {
178-
c.loss.packetSent(now, initialSpace, sentInitial)
178+
c.packetSent(now, initialSpace, sentInitial)
179179
}
180180
}
181181

182182
c.endpoint.sendDatagram(buf, c.peerAddr)
183183
}
184184
}
185185

186+
func (c *Conn) packetSent(now time.Time, space numberSpace, sent *sentPacket) {
187+
c.idleHandlePacketSent(now, sent)
188+
c.loss.packetSent(now, c.log, space, sent)
189+
}
190+
186191
func (c *Conn) appendFrames(now time.Time, space numberSpace, pnum packetNumber, limit ccLimit) {
187192
if c.lifetime.localErr != nil {
188193
c.appendConnectionCloseFrame(now, space, c.lifetime.localErr)

internal/quic/loss.go

+41-6
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
package quic
88

99
import (
10+
"context"
11+
"log/slog"
1012
"math"
1113
"time"
1214
)
@@ -179,21 +181,24 @@ func (c *lossState) nextNumber(space numberSpace) packetNumber {
179181
}
180182

181183
// packetSent records a sent packet.
182-
func (c *lossState) packetSent(now time.Time, space numberSpace, sent *sentPacket) {
184+
func (c *lossState) packetSent(now time.Time, log *slog.Logger, space numberSpace, sent *sentPacket) {
183185
sent.time = now
184186
c.spaces[space].add(sent)
185187
size := sent.size
186188
if c.antiAmplificationLimit != antiAmplificationUnlimited {
187189
c.antiAmplificationLimit = max(0, c.antiAmplificationLimit-size)
188190
}
189191
if sent.inFlight {
190-
c.cc.packetSent(now, space, sent)
192+
c.cc.packetSent(now, log, space, sent)
191193
c.pacer.packetSent(now, size, c.cc.congestionWindow, c.rtt.smoothedRTT)
192194
if sent.ackEliciting {
193195
c.spaces[space].lastAckEliciting = sent.num
194196
c.ptoExpired = false // reset expired PTO timer after sending probe
195197
}
196198
c.scheduleTimer(now)
199+
if logEnabled(log, QLogLevelPacket) {
200+
logBytesInFlight(log, c.cc.bytesInFlight)
201+
}
197202
}
198203
if sent.ackEliciting {
199204
c.consecutiveNonAckElicitingPackets = 0
@@ -267,7 +272,7 @@ func (c *lossState) receiveAckRange(now time.Time, space numberSpace, rangeIndex
267272

268273
// receiveAckEnd finishes processing an ack frame.
269274
// The lossf function is called for each packet newly detected as lost.
270-
func (c *lossState) receiveAckEnd(now time.Time, space numberSpace, ackDelay time.Duration, lossf func(numberSpace, *sentPacket, packetFate)) {
275+
func (c *lossState) receiveAckEnd(now time.Time, log *slog.Logger, space numberSpace, ackDelay time.Duration, lossf func(numberSpace, *sentPacket, packetFate)) {
271276
c.spaces[space].sentPacketList.clean()
272277
// Update the RTT sample when the largest acknowledged packet in the ACK frame
273278
// is newly acknowledged, and at least one newly acknowledged packet is ack-eliciting.
@@ -286,24 +291,44 @@ func (c *lossState) receiveAckEnd(now time.Time, space numberSpace, ackDelay tim
286291
// https://www.rfc-editor.org/rfc/rfc9002.html#section-6.2.2.1-3
287292
c.timer = time.Time{}
288293
c.detectLoss(now, lossf)
289-
c.cc.packetBatchEnd(now, space, &c.rtt, c.maxAckDelay)
294+
c.cc.packetBatchEnd(now, log, space, &c.rtt, c.maxAckDelay)
295+
296+
if logEnabled(log, QLogLevelPacket) {
297+
var ssthresh slog.Attr
298+
if c.cc.slowStartThreshold != math.MaxInt {
299+
ssthresh = slog.Int("ssthresh", c.cc.slowStartThreshold)
300+
}
301+
log.LogAttrs(context.Background(), QLogLevelPacket,
302+
"recovery:metrics_updated",
303+
slog.Duration("min_rtt", c.rtt.minRTT),
304+
slog.Duration("smoothed_rtt", c.rtt.smoothedRTT),
305+
slog.Duration("latest_rtt", c.rtt.latestRTT),
306+
slog.Duration("rtt_variance", c.rtt.rttvar),
307+
slog.Int("congestion_window", c.cc.congestionWindow),
308+
slog.Int("bytes_in_flight", c.cc.bytesInFlight),
309+
ssthresh,
310+
)
311+
}
290312
}
291313

292314
// discardPackets declares that packets within a number space will not be delivered
293315
// and that data contained in them should be resent.
294316
// For example, after receiving a Retry packet we discard already-sent Initial packets.
295-
func (c *lossState) discardPackets(space numberSpace, lossf func(numberSpace, *sentPacket, packetFate)) {
317+
func (c *lossState) discardPackets(space numberSpace, log *slog.Logger, lossf func(numberSpace, *sentPacket, packetFate)) {
296318
for i := 0; i < c.spaces[space].size; i++ {
297319
sent := c.spaces[space].nth(i)
298320
sent.lost = true
299321
c.cc.packetDiscarded(sent)
300322
lossf(numberSpace(space), sent, packetLost)
301323
}
302324
c.spaces[space].clean()
325+
if logEnabled(log, QLogLevelPacket) {
326+
logBytesInFlight(log, c.cc.bytesInFlight)
327+
}
303328
}
304329

305330
// discardKeys is called when dropping packet protection keys for a number space.
306-
func (c *lossState) discardKeys(now time.Time, space numberSpace) {
331+
func (c *lossState) discardKeys(now time.Time, log *slog.Logger, space numberSpace) {
307332
// https://www.rfc-editor.org/rfc/rfc9002.html#section-6.4
308333
for i := 0; i < c.spaces[space].size; i++ {
309334
sent := c.spaces[space].nth(i)
@@ -313,6 +338,9 @@ func (c *lossState) discardKeys(now time.Time, space numberSpace) {
313338
c.spaces[space].maxAcked = -1
314339
c.spaces[space].lastAckEliciting = -1
315340
c.scheduleTimer(now)
341+
if logEnabled(log, QLogLevelPacket) {
342+
logBytesInFlight(log, c.cc.bytesInFlight)
343+
}
316344
}
317345

318346
func (c *lossState) lossDuration() time.Duration {
@@ -459,3 +487,10 @@ func (c *lossState) ptoBasePeriod() time.Duration {
459487
}
460488
return pto
461489
}
490+
491+
func logBytesInFlight(log *slog.Logger, bytesInFlight int) {
492+
log.LogAttrs(context.Background(), QLogLevelPacket,
493+
"recovery:metrics_updated",
494+
slog.Int("bytes_in_flight", bytesInFlight),
495+
)
496+
}

internal/quic/loss_test.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -1060,7 +1060,7 @@ func TestLossPersistentCongestion(t *testing.T) {
10601060
maxDatagramSize: 1200,
10611061
})
10621062
test.send(initialSpace, 0, testSentPacketSize(1200))
1063-
test.c.cc.setUnderutilized(true)
1063+
test.c.cc.setUnderutilized(nil, true)
10641064

10651065
test.advance(10 * time.Millisecond)
10661066
test.ack(initialSpace, 0*time.Millisecond, i64range[packetNumber]{0, 1})
@@ -1377,7 +1377,7 @@ func (c *lossTest) setRTTVar(d time.Duration) {
13771377

13781378
func (c *lossTest) setUnderutilized(v bool) {
13791379
c.t.Logf("set congestion window underutilized: %v", v)
1380-
c.c.cc.setUnderutilized(v)
1380+
c.c.cc.setUnderutilized(nil, v)
13811381
}
13821382

13831383
func (c *lossTest) advance(d time.Duration) {
@@ -1438,7 +1438,7 @@ func (c *lossTest) send(spaceID numberSpace, opts ...any) {
14381438
sent := &sentPacket{}
14391439
*sent = prototype
14401440
sent.num = num
1441-
c.c.packetSent(c.now, spaceID, sent)
1441+
c.c.packetSent(c.now, nil, spaceID, sent)
14421442
}
14431443
}
14441444

@@ -1462,7 +1462,7 @@ func (c *lossTest) ack(spaceID numberSpace, ackDelay time.Duration, rs ...i64ran
14621462
c.t.Logf("ack %v delay=%v [%v,%v)", spaceID, ackDelay, r.start, r.end)
14631463
c.c.receiveAckRange(c.now, spaceID, i, r.start, r.end, c.onAckOrLoss)
14641464
}
1465-
c.c.receiveAckEnd(c.now, spaceID, ackDelay, c.onAckOrLoss)
1465+
c.c.receiveAckEnd(c.now, nil, spaceID, ackDelay, c.onAckOrLoss)
14661466
}
14671467

14681468
func (c *lossTest) onAckOrLoss(space numberSpace, sent *sentPacket, fate packetFate) {
@@ -1491,7 +1491,7 @@ func (c *lossTest) discardKeys(spaceID numberSpace) {
14911491
c.t.Helper()
14921492
c.checkUnexpectedEvents()
14931493
c.t.Logf("discard %s keys", spaceID)
1494-
c.c.discardKeys(c.now, spaceID)
1494+
c.c.discardKeys(c.now, nil, spaceID)
14951495
}
14961496

14971497
func (c *lossTest) setMaxAckDelay(d time.Duration) {

0 commit comments

Comments
 (0)