Skip to content

Commit 7cfb814

Browse files
committed
runtime: add ReadMemStats latency benchmark
This change adds a benchmark to the runtime which measures ReadMemStats latencies. It generates allocations with lots of pointers to keep the GC busy while hitting ReadMemStats and measuring the time it takes to complete. Updates #19812. Change-Id: I7a76aaf497ba5324d3c7a7b3df32461b3e6c3ac8 Reviewed-on: https://go-review.googlesource.com/c/go/+/220177 Run-TryBot: Michael Knyszek <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Austin Clements <[email protected]> Reviewed-by: Emmanuel Odeke <[email protected]>
1 parent 79b43fa commit 7cfb814

File tree

1 file changed

+85
-0
lines changed

1 file changed

+85
-0
lines changed

src/runtime/gc_test.go

+85
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"reflect"
1111
"runtime"
1212
"runtime/debug"
13+
"sort"
1314
"sync"
1415
"sync/atomic"
1516
"testing"
@@ -506,6 +507,90 @@ func BenchmarkReadMemStats(b *testing.B) {
506507
hugeSink = nil
507508
}
508509

510+
func BenchmarkReadMemStatsLatency(b *testing.B) {
511+
// We’ll apply load to the runtime with maxProcs-1 goroutines
512+
// and use one more to actually benchmark. It doesn't make sense
513+
// to try to run this test with only 1 P (that's what
514+
// BenchmarkReadMemStats is for).
515+
maxProcs := runtime.GOMAXPROCS(-1)
516+
if maxProcs == 1 {
517+
b.Skip("This benchmark can only be run with GOMAXPROCS > 1")
518+
}
519+
520+
// Code to build a big tree with lots of pointers.
521+
type node struct {
522+
children [16]*node
523+
}
524+
var buildTree func(depth int) *node
525+
buildTree = func(depth int) *node {
526+
tree := new(node)
527+
if depth != 0 {
528+
for i := range tree.children {
529+
tree.children[i] = buildTree(depth - 1)
530+
}
531+
}
532+
return tree
533+
}
534+
535+
// Keep the GC busy by continuously generating large trees.
536+
done := make(chan struct{})
537+
var wg sync.WaitGroup
538+
for i := 0; i < maxProcs-1; i++ {
539+
wg.Add(1)
540+
go func() {
541+
defer wg.Done()
542+
var hold *node
543+
loop:
544+
for {
545+
hold = buildTree(5)
546+
select {
547+
case <-done:
548+
break loop
549+
default:
550+
}
551+
}
552+
runtime.KeepAlive(hold)
553+
}()
554+
}
555+
556+
// Spend this much time measuring latencies.
557+
latencies := make([]time.Duration, 0, 1024)
558+
559+
// Run for timeToBench hitting ReadMemStats continuously
560+
// and measuring the latency.
561+
b.ResetTimer()
562+
var ms runtime.MemStats
563+
for i := 0; i < b.N; i++ {
564+
// Sleep for a bit, otherwise we're just going to keep
565+
// stopping the world and no one will get to do anything.
566+
time.Sleep(100 * time.Millisecond)
567+
start := time.Now()
568+
runtime.ReadMemStats(&ms)
569+
latencies = append(latencies, time.Now().Sub(start))
570+
}
571+
close(done)
572+
// Make sure to stop the timer before we wait! The goroutines above
573+
// are very heavy-weight and not easy to stop, so we could end up
574+
// confusing the benchmarking framework for small b.N.
575+
b.StopTimer()
576+
wg.Wait()
577+
578+
// Disable the default */op metrics.
579+
// ns/op doesn't mean anything because it's an average, but we
580+
// have a sleep in our b.N loop above which skews this significantly.
581+
b.ReportMetric(0, "ns/op")
582+
b.ReportMetric(0, "B/op")
583+
b.ReportMetric(0, "allocs/op")
584+
585+
// Sort latencies then report percentiles.
586+
sort.Slice(latencies, func(i, j int) bool {
587+
return latencies[i] < latencies[j]
588+
})
589+
b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns")
590+
b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
591+
b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
592+
}
593+
509594
func TestUserForcedGC(t *testing.T) {
510595
// Test that runtime.GC() triggers a GC even if GOGC=off.
511596
defer debug.SetGCPercent(debug.SetGCPercent(-1))

0 commit comments

Comments
 (0)