Skip to content

Commit 87abb4a

Browse files
nsrip-ddcherrymui
authored andcommitted
runtime: avoid multiple records with identical stacks from MutexProfile
When using frame pointer unwinding, we defer frame skipping and inline expansion for call stacks until profile reporting time. We can end up with records which have different stacks if no frames are skipped, but identical stacks once skipping is taken into account. Returning multiple records with the same stack (but different values) has broken programs which rely on the records already being fully aggregated by call stack when returned from runtime.MutexProfile. This CL addresses the problem by handling skipping at recording time. We do full inline expansion to correctly skip the desired number of frames when recording the call stack, and then handle the rest of inline expansion when reporting the profile. The regression test in this CL is adapted from the reproducer in grafana/pyroscope-go#103, authored by Tolya Korniltsev. Fixes #67548 This reapplies CL 595966. The original version of this CL introduced a bounds error in MutexProfile and failed to correctly expand inlined frames from that call. This CL applies the original CL, fixing the bounds error and adding a test for the output of MutexProfile to ensure the frames are expanded properly. Change-Id: I5ef8aafb9f88152a704034065c0742ba767c4dbb Reviewed-on: https://go-review.googlesource.com/c/go/+/598515 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Carlos Amedee <[email protected]> Reviewed-by: Cherry Mui <[email protected]>
1 parent 8c88f0c commit 87abb4a

File tree

3 files changed

+282
-18
lines changed

3 files changed

+282
-18
lines changed

src/runtime/mprof.go

+97-16
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ package runtime
99

1010
import (
1111
"internal/abi"
12+
"internal/goarch"
1213
"internal/profilerecord"
1314
"internal/runtime/atomic"
1415
"runtime/internal/sys"
@@ -542,36 +543,80 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) {
542543
gp := getg()
543544
mp := acquirem() // we must not be preempted while accessing profstack
544545

545-
nstk := 1
546+
var nstk int
546547
if tracefpunwindoff() || gp.m.hasCgoOnStack() {
547-
mp.profStack[0] = logicalStackSentinel
548548
if gp.m.curg == nil || gp.m.curg == gp {
549-
nstk = callers(skip, mp.profStack[1:])
549+
nstk = callers(skip, mp.profStack)
550550
} else {
551-
nstk = gcallers(gp.m.curg, skip, mp.profStack[1:])
551+
nstk = gcallers(gp.m.curg, skip, mp.profStack)
552552
}
553553
} else {
554-
mp.profStack[0] = uintptr(skip)
555554
if gp.m.curg == nil || gp.m.curg == gp {
556555
if skip > 0 {
557556
// We skip one fewer frame than the provided value for frame
558557
// pointer unwinding because the skip value includes the current
559558
// frame, whereas the saved frame pointer will give us the
560559
// caller's return address first (so, not including
561560
// saveblockevent)
562-
mp.profStack[0] -= 1
561+
skip -= 1
563562
}
564-
nstk += fpTracebackPCs(unsafe.Pointer(getfp()), mp.profStack[1:])
563+
nstk = fpTracebackPartialExpand(skip, unsafe.Pointer(getfp()), mp.profStack)
565564
} else {
566-
mp.profStack[1] = gp.m.curg.sched.pc
567-
nstk += 1 + fpTracebackPCs(unsafe.Pointer(gp.m.curg.sched.bp), mp.profStack[2:])
565+
mp.profStack[0] = gp.m.curg.sched.pc
566+
nstk = 1 + fpTracebackPartialExpand(skip, unsafe.Pointer(gp.m.curg.sched.bp), mp.profStack[1:])
568567
}
569568
}
570569

571570
saveBlockEventStack(cycles, rate, mp.profStack[:nstk], which)
572571
releasem(mp)
573572
}
574573

574+
// fpTracebackPartialExpand records a call stack obtained starting from fp.
575+
// This function will skip the given number of frames, properly accounting for
576+
// inlining, and save remaining frames as "physical" return addresses. The
577+
// consumer should later use CallersFrames or similar to expand inline frames.
578+
func fpTracebackPartialExpand(skip int, fp unsafe.Pointer, pcBuf []uintptr) int {
579+
var n int
580+
lastFuncID := abi.FuncIDNormal
581+
skipOrAdd := func(retPC uintptr) bool {
582+
if skip > 0 {
583+
skip--
584+
} else if n < len(pcBuf) {
585+
pcBuf[n] = retPC
586+
n++
587+
}
588+
return n < len(pcBuf)
589+
}
590+
for n < len(pcBuf) && fp != nil {
591+
// return addr sits one word above the frame pointer
592+
pc := *(*uintptr)(unsafe.Pointer(uintptr(fp) + goarch.PtrSize))
593+
594+
if skip > 0 {
595+
callPC := pc - 1
596+
fi := findfunc(callPC)
597+
u, uf := newInlineUnwinder(fi, callPC)
598+
for ; uf.valid(); uf = u.next(uf) {
599+
sf := u.srcFunc(uf)
600+
if sf.funcID == abi.FuncIDWrapper && elideWrapperCalling(lastFuncID) {
601+
// ignore wrappers
602+
} else if more := skipOrAdd(uf.pc + 1); !more {
603+
return n
604+
}
605+
lastFuncID = sf.funcID
606+
}
607+
} else {
608+
// We've skipped the desired number of frames, so no need
609+
// to perform further inline expansion now.
610+
pcBuf[n] = pc
611+
n++
612+
}
613+
614+
// follow the frame pointer to the next one
615+
fp = unsafe.Pointer(*(*uintptr)(fp))
616+
}
617+
return n
618+
}
619+
575620
// lockTimer assists with profiling contention on runtime-internal locks.
576621
//
577622
// There are several steps between the time that an M experiences contention and
@@ -1075,10 +1120,34 @@ type BlockProfileRecord struct {
10751120
// the [testing] package's -test.blockprofile flag instead
10761121
// of calling BlockProfile directly.
10771122
func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {
1078-
return blockProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
1079-
copyBlockProfileRecord(&p[0], r)
1080-
p = p[1:]
1123+
var m int
1124+
n, ok = blockProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
1125+
copyBlockProfileRecord(&p[m], r)
1126+
m++
10811127
})
1128+
if ok {
1129+
expandFrames(p[:n])
1130+
}
1131+
return
1132+
}
1133+
1134+
func expandFrames(p []BlockProfileRecord) {
1135+
expandedStack := makeProfStack()
1136+
for i := range p {
1137+
cf := CallersFrames(p[i].Stack())
1138+
j := 0
1139+
for ; j < len(expandedStack); j++ {
1140+
f, more := cf.Next()
1141+
// f.PC is a "call PC", but later consumers will expect
1142+
// "return PCs"
1143+
expandedStack[j] = f.PC + 1
1144+
if !more {
1145+
break
1146+
}
1147+
}
1148+
k := copy(p[i].Stack0[:], expandedStack[:j])
1149+
clear(p[i].Stack0[k:])
1150+
}
10821151
}
10831152

10841153
// blockProfileInternal returns the number of records n in the profile. If there
@@ -1111,6 +1180,9 @@ func blockProfileInternal(size int, copyFn func(profilerecord.BlockProfileRecord
11111180
return
11121181
}
11131182

1183+
// copyBlockProfileRecord copies the sample values and call stack from src to dst.
1184+
// The call stack is copied as-is. The caller is responsible for handling inline
1185+
// expansion, needed when the call stack was collected with frame pointer unwinding.
11141186
func copyBlockProfileRecord(dst *BlockProfileRecord, src profilerecord.BlockProfileRecord) {
11151187
dst.Count = src.Count
11161188
dst.Cycles = src.Cycles
@@ -1123,7 +1195,11 @@ func copyBlockProfileRecord(dst *BlockProfileRecord, src profilerecord.BlockProf
11231195
if asanenabled {
11241196
asanwrite(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0))
11251197
}
1126-
i := fpunwindExpand(dst.Stack0[:], src.Stack)
1198+
// We just copy the stack here without inline expansion
1199+
// (needed if frame pointer unwinding is used)
1200+
// since this function is called under the profile lock,
1201+
// and doing something that might allocate can violate lock ordering.
1202+
i := copy(dst.Stack0[:], src.Stack)
11271203
clear(dst.Stack0[i:])
11281204
}
11291205

@@ -1142,10 +1218,15 @@ func pprof_blockProfileInternal(p []profilerecord.BlockProfileRecord) (n int, ok
11421218
// Most clients should use the [runtime/pprof] package
11431219
// instead of calling MutexProfile directly.
11441220
func MutexProfile(p []BlockProfileRecord) (n int, ok bool) {
1145-
return mutexProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
1146-
copyBlockProfileRecord(&p[0], r)
1147-
p = p[1:]
1221+
var m int
1222+
n, ok = mutexProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
1223+
copyBlockProfileRecord(&p[m], r)
1224+
m++
11481225
})
1226+
if ok {
1227+
expandFrames(p[:n])
1228+
}
1229+
return
11491230
}
11501231

11511232
// mutexProfileInternal returns the number of records n in the profile. If there

src/runtime/pprof/pprof.go

+21-2
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,25 @@ type countProfile interface {
404404
Label(i int) *labelMap
405405
}
406406

407+
// expandInlinedFrames copies the call stack from pcs into dst, expanding any
408+
// PCs corresponding to inlined calls into the corresponding PCs for the inlined
409+
// functions. Returns the number of frames copied to dst.
410+
func expandInlinedFrames(dst, pcs []uintptr) int {
411+
cf := runtime.CallersFrames(pcs)
412+
var n int
413+
for n < len(dst) {
414+
f, more := cf.Next()
415+
// f.PC is a "call PC", but later consumers will expect
416+
// "return PCs"
417+
dst[n] = f.PC + 1
418+
n++
419+
if !more {
420+
break
421+
}
422+
}
423+
return n
424+
}
425+
407426
// printCountCycleProfile outputs block profile records (for block or mutex profiles)
408427
// as the pprof-proto format output. Translations from cycle count to time duration
409428
// are done because The proto expects count and time (nanoseconds) instead of count
@@ -426,7 +445,7 @@ func printCountCycleProfile(w io.Writer, countName, cycleName string, records []
426445
values[1] = int64(float64(r.Cycles) / cpuGHz)
427446
// For count profiles, all stack addresses are
428447
// return PCs, which is what appendLocsForStack expects.
429-
n := pprof_fpunwindExpand(expandedStack[:], r.Stack)
448+
n := expandInlinedFrames(expandedStack, r.Stack)
430449
locs = b.appendLocsForStack(locs[:0], expandedStack[:n])
431450
b.pbSample(values, locs, nil)
432451
}
@@ -935,7 +954,7 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu
935954
for i := range p {
936955
r := &p[i]
937956
fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count)
938-
n := pprof_fpunwindExpand(expandedStack, r.Stack)
957+
n := expandInlinedFrames(expandedStack, r.Stack)
939958
stack := expandedStack[:n]
940959
for _, pc := range stack {
941960
fmt.Fprintf(w, " %#x", pc)

src/runtime/pprof/pprof_test.go

+164
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"regexp"
2222
"runtime"
2323
"runtime/debug"
24+
"strconv"
2425
"strings"
2526
"sync"
2627
"sync/atomic"
@@ -2578,3 +2579,166 @@ func produceProfileEvents(t *testing.T, depth int) {
25782579
runtime.GC()
25792580
goroutineDeep(t, depth-4) // -4 for produceProfileEvents, **, chanrecv1, chanrev, gopark
25802581
}
2582+
2583+
func getProfileStacks(collect func([]runtime.BlockProfileRecord) (int, bool), fileLine bool) []string {
2584+
var n int
2585+
var ok bool
2586+
var p []runtime.BlockProfileRecord
2587+
for {
2588+
p = make([]runtime.BlockProfileRecord, n)
2589+
n, ok = collect(p)
2590+
if ok {
2591+
p = p[:n]
2592+
break
2593+
}
2594+
}
2595+
var stacks []string
2596+
for _, r := range p {
2597+
var stack strings.Builder
2598+
for i, pc := range r.Stack() {
2599+
if i > 0 {
2600+
stack.WriteByte('\n')
2601+
}
2602+
// Use FuncForPC instead of CallersFrames,
2603+
// because we want to see the info for exactly
2604+
// the PCs returned by the mutex profile to
2605+
// ensure inlined calls have already been properly
2606+
// expanded.
2607+
f := runtime.FuncForPC(pc - 1)
2608+
stack.WriteString(f.Name())
2609+
if fileLine {
2610+
stack.WriteByte(' ')
2611+
file, line := f.FileLine(pc - 1)
2612+
stack.WriteString(file)
2613+
stack.WriteByte(':')
2614+
stack.WriteString(strconv.Itoa(line))
2615+
}
2616+
}
2617+
stacks = append(stacks, stack.String())
2618+
}
2619+
return stacks
2620+
}
2621+
2622+
func TestMutexBlockFullAggregation(t *testing.T) {
2623+
// This regression test is adapted from
2624+
// https://github.com/grafana/pyroscope-go/issues/103,
2625+
// authored by Tolya Korniltsev
2626+
2627+
var m sync.Mutex
2628+
2629+
prev := runtime.SetMutexProfileFraction(-1)
2630+
defer runtime.SetMutexProfileFraction(prev)
2631+
2632+
const fraction = 1
2633+
const iters = 100
2634+
const workers = 2
2635+
2636+
runtime.SetMutexProfileFraction(fraction)
2637+
runtime.SetBlockProfileRate(1)
2638+
defer runtime.SetBlockProfileRate(0)
2639+
2640+
wg := sync.WaitGroup{}
2641+
wg.Add(workers)
2642+
for j := 0; j < workers; j++ {
2643+
go func() {
2644+
for i := 0; i < iters; i++ {
2645+
m.Lock()
2646+
// Wait at least 1 millisecond to pass the
2647+
// starvation threshold for the mutex
2648+
time.Sleep(time.Millisecond)
2649+
m.Unlock()
2650+
}
2651+
wg.Done()
2652+
}()
2653+
}
2654+
wg.Wait()
2655+
2656+
assertNoDuplicates := func(name string, collect func([]runtime.BlockProfileRecord) (int, bool)) {
2657+
stacks := getProfileStacks(collect, true)
2658+
seen := make(map[string]struct{})
2659+
for _, s := range stacks {
2660+
if _, ok := seen[s]; ok {
2661+
t.Errorf("saw duplicate entry in %s profile with stack:\n%s", name, s)
2662+
}
2663+
seen[s] = struct{}{}
2664+
}
2665+
if len(seen) == 0 {
2666+
t.Errorf("did not see any samples in %s profile for this test", name)
2667+
}
2668+
}
2669+
t.Run("mutex", func(t *testing.T) {
2670+
assertNoDuplicates("mutex", runtime.MutexProfile)
2671+
})
2672+
t.Run("block", func(t *testing.T) {
2673+
assertNoDuplicates("block", runtime.BlockProfile)
2674+
})
2675+
}
2676+
2677+
func inlineA(mu *sync.Mutex, wg *sync.WaitGroup) { inlineB(mu, wg) }
2678+
func inlineB(mu *sync.Mutex, wg *sync.WaitGroup) { inlineC(mu, wg) }
2679+
func inlineC(mu *sync.Mutex, wg *sync.WaitGroup) {
2680+
defer wg.Done()
2681+
mu.Lock()
2682+
mu.Unlock()
2683+
}
2684+
2685+
func inlineD(mu *sync.Mutex, wg *sync.WaitGroup) { inlineE(mu, wg) }
2686+
func inlineE(mu *sync.Mutex, wg *sync.WaitGroup) { inlineF(mu, wg) }
2687+
func inlineF(mu *sync.Mutex, wg *sync.WaitGroup) {
2688+
defer wg.Done()
2689+
mu.Unlock()
2690+
}
2691+
2692+
func TestBlockMutexProfileInlineExpansion(t *testing.T) {
2693+
runtime.SetBlockProfileRate(1)
2694+
defer runtime.SetBlockProfileRate(0)
2695+
prev := runtime.SetMutexProfileFraction(1)
2696+
defer runtime.SetMutexProfileFraction(prev)
2697+
2698+
var mu sync.Mutex
2699+
var wg sync.WaitGroup
2700+
wg.Add(2)
2701+
mu.Lock()
2702+
go inlineA(&mu, &wg)
2703+
awaitBlockedGoroutine(t, "sync.Mutex.Lock", "inlineC", 1)
2704+
// inlineD will unblock inlineA
2705+
go inlineD(&mu, &wg)
2706+
wg.Wait()
2707+
2708+
tcs := []struct {
2709+
Name string
2710+
Collect func([]runtime.BlockProfileRecord) (int, bool)
2711+
SubStack string
2712+
}{
2713+
{
2714+
Name: "mutex",
2715+
Collect: runtime.MutexProfile,
2716+
SubStack: `sync.(*Mutex).Unlock
2717+
runtime/pprof.inlineF
2718+
runtime/pprof.inlineE
2719+
runtime/pprof.inlineD`,
2720+
},
2721+
{
2722+
Name: "block",
2723+
Collect: runtime.BlockProfile,
2724+
SubStack: `sync.(*Mutex).Lock
2725+
runtime/pprof.inlineC
2726+
runtime/pprof.inlineB
2727+
runtime/pprof.inlineA`,
2728+
},
2729+
}
2730+
2731+
for _, tc := range tcs {
2732+
t.Run(tc.Name, func(t *testing.T) {
2733+
stacks := getProfileStacks(tc.Collect, false)
2734+
for _, s := range stacks {
2735+
if strings.Contains(s, tc.SubStack) {
2736+
return
2737+
}
2738+
}
2739+
t.Error("did not see expected stack")
2740+
t.Logf("wanted:\n%s", tc.SubStack)
2741+
t.Logf("got: %s", stacks)
2742+
})
2743+
}
2744+
}

0 commit comments

Comments
 (0)