Skip to content

Commit 6948b4d

Browse files
committed
Revert "runtime: avoid multiple records with identical stacks from MutexProfile"
This reverts CL 595966. Reason for revert: This CL contains a bug. See the comment in https://go-review.googlesource.com/c/go/+/595966/8#message-57f4c1f9570b5fe912e06f4ae3b52817962533c0 Change-Id: I48030907ded173ae20a8965bf1b41a713dd06059 Reviewed-on: https://go-review.googlesource.com/c/go/+/598219 Reviewed-by: Than McIntosh <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Carlos Amedee <[email protected]>
1 parent 8f1ec59 commit 6948b4d

File tree

3 files changed

+14
-192
lines changed

3 files changed

+14
-192
lines changed

src/runtime/mprof.go

+12-93
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ package runtime
99

1010
import (
1111
"internal/abi"
12-
"internal/goarch"
1312
"internal/profilerecord"
1413
"internal/runtime/atomic"
1514
"runtime/internal/sys"
@@ -543,80 +542,36 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) {
543542
gp := getg()
544543
mp := acquirem() // we must not be preempted while accessing profstack
545544

546-
var nstk int
545+
nstk := 1
547546
if tracefpunwindoff() || gp.m.hasCgoOnStack() {
547+
mp.profStack[0] = logicalStackSentinel
548548
if gp.m.curg == nil || gp.m.curg == gp {
549-
nstk = callers(skip, mp.profStack)
549+
nstk = callers(skip, mp.profStack[1:])
550550
} else {
551-
nstk = gcallers(gp.m.curg, skip, mp.profStack)
551+
nstk = gcallers(gp.m.curg, skip, mp.profStack[1:])
552552
}
553553
} else {
554+
mp.profStack[0] = uintptr(skip)
554555
if gp.m.curg == nil || gp.m.curg == gp {
555556
if skip > 0 {
556557
// We skip one fewer frame than the provided value for frame
557558
// pointer unwinding because the skip value includes the current
558559
// frame, whereas the saved frame pointer will give us the
559560
// caller's return address first (so, not including
560561
// saveblockevent)
561-
skip -= 1
562+
mp.profStack[0] -= 1
562563
}
563-
nstk = fpTracebackPartialExpand(skip, unsafe.Pointer(getfp()), mp.profStack)
564+
nstk += fpTracebackPCs(unsafe.Pointer(getfp()), mp.profStack[1:])
564565
} else {
565-
mp.profStack[0] = gp.m.curg.sched.pc
566-
nstk = 1 + fpTracebackPartialExpand(skip, unsafe.Pointer(gp.m.curg.sched.bp), mp.profStack[1:])
566+
mp.profStack[1] = gp.m.curg.sched.pc
567+
nstk += 1 + fpTracebackPCs(unsafe.Pointer(gp.m.curg.sched.bp), mp.profStack[2:])
567568
}
568569
}
569570

570571
saveBlockEventStack(cycles, rate, mp.profStack[:nstk], which)
571572
releasem(mp)
572573
}
573574

574-
// fpTracebackPartialExpand records a call stack obtained starting from fp.
575-
// This function will skip the given number of frames, properly accounting for
576-
// inlining, and save remaining frames as "physical" return addresses. The
577-
// consumer should later use CallersFrames or similar to expand inline frames.
578-
func fpTracebackPartialExpand(skip int, fp unsafe.Pointer, pcBuf []uintptr) int {
579-
var n int
580-
lastFuncID := abi.FuncIDNormal
581-
skipOrAdd := func(retPC uintptr) bool {
582-
if skip > 0 {
583-
skip--
584-
} else if n < len(pcBuf) {
585-
pcBuf[n] = retPC
586-
n++
587-
}
588-
return n < len(pcBuf)
589-
}
590-
for n < len(pcBuf) && fp != nil {
591-
// return addr sits one word above the frame pointer
592-
pc := *(*uintptr)(unsafe.Pointer(uintptr(fp) + goarch.PtrSize))
593-
594-
if skip > 0 {
595-
callPC := pc - 1
596-
fi := findfunc(callPC)
597-
u, uf := newInlineUnwinder(fi, callPC)
598-
for ; uf.valid(); uf = u.next(uf) {
599-
sf := u.srcFunc(uf)
600-
if sf.funcID == abi.FuncIDWrapper && elideWrapperCalling(lastFuncID) {
601-
// ignore wrappers
602-
} else if more := skipOrAdd(uf.pc + 1); !more {
603-
return n
604-
}
605-
lastFuncID = sf.funcID
606-
}
607-
} else {
608-
// We've skipped the desired number of frames, so no need
609-
// to perform further inline expansion now.
610-
pcBuf[n] = pc
611-
n++
612-
}
613-
614-
// follow the frame pointer to the next one
615-
fp = unsafe.Pointer(*(*uintptr)(fp))
616-
}
617-
return n
618-
}
619-
620575
// lockTimer assists with profiling contention on runtime-internal locks.
621576
//
622577
// There are several steps between the time that an M experiences contention and
@@ -1120,34 +1075,10 @@ type BlockProfileRecord struct {
11201075
// the [testing] package's -test.blockprofile flag instead
11211076
// of calling BlockProfile directly.
11221077
func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {
1123-
n, ok = blockProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
1078+
return blockProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
11241079
copyBlockProfileRecord(&p[0], r)
11251080
p = p[1:]
11261081
})
1127-
if !ok {
1128-
return
1129-
}
1130-
expandFrames(p[:n])
1131-
return
1132-
}
1133-
1134-
func expandFrames(p []BlockProfileRecord) {
1135-
expandedStack := makeProfStack()
1136-
for i := range p {
1137-
cf := CallersFrames(p[i].Stack())
1138-
j := 0
1139-
for ; j < len(expandedStack); j++ {
1140-
f, more := cf.Next()
1141-
// f.PC is a "call PC", but later consumers will expect
1142-
// "return PCs"
1143-
expandedStack[i] = f.PC + 1
1144-
if !more {
1145-
break
1146-
}
1147-
}
1148-
k := copy(p[i].Stack0[:], expandedStack[:j])
1149-
clear(p[i].Stack0[k:])
1150-
}
11511082
}
11521083

11531084
// blockProfileInternal returns the number of records n in the profile. If there
@@ -1180,9 +1111,6 @@ func blockProfileInternal(size int, copyFn func(profilerecord.BlockProfileRecord
11801111
return
11811112
}
11821113

1183-
// copyBlockProfileRecord copies the sample values and call stack from src to dst.
1184-
// The call stack is copied as-is. The caller is responsible for handling inline
1185-
// expansion, needed when the call stack was collected with frame pointer unwinding.
11861114
func copyBlockProfileRecord(dst *BlockProfileRecord, src profilerecord.BlockProfileRecord) {
11871115
dst.Count = src.Count
11881116
dst.Cycles = src.Cycles
@@ -1195,11 +1123,7 @@ func copyBlockProfileRecord(dst *BlockProfileRecord, src profilerecord.BlockProf
11951123
if asanenabled {
11961124
asanwrite(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0))
11971125
}
1198-
// We just copy the stack here without inline expansion
1199-
// (needed if frame pointer unwinding is used)
1200-
// since this function is called under the profile lock,
1201-
// and doing something that might allocate can violate lock ordering.
1202-
i := copy(dst.Stack0[:], src.Stack)
1126+
i := fpunwindExpand(dst.Stack0[:], src.Stack)
12031127
clear(dst.Stack0[i:])
12041128
}
12051129

@@ -1218,15 +1142,10 @@ func pprof_blockProfileInternal(p []profilerecord.BlockProfileRecord) (n int, ok
12181142
// Most clients should use the [runtime/pprof] package
12191143
// instead of calling MutexProfile directly.
12201144
func MutexProfile(p []BlockProfileRecord) (n int, ok bool) {
1221-
n, ok = mutexProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
1145+
return mutexProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
12221146
copyBlockProfileRecord(&p[0], r)
12231147
p = p[1:]
12241148
})
1225-
if !ok {
1226-
return
1227-
}
1228-
expandFrames(p[:n])
1229-
return
12301149
}
12311150

12321151
// mutexProfileInternal returns the number of records n in the profile. If there

src/runtime/pprof/pprof.go

+2-21
Original file line numberDiff line numberDiff line change
@@ -404,25 +404,6 @@ type countProfile interface {
404404
Label(i int) *labelMap
405405
}
406406

407-
// expandInlinedFrames copies the call stack from pcs into dst, expanding any
408-
// PCs corresponding to inlined calls into the corresponding PCs for the inlined
409-
// functions. Returns the number of frames copied to dst.
410-
func expandInlinedFrames(dst, pcs []uintptr) int {
411-
cf := runtime.CallersFrames(pcs)
412-
var n int
413-
for n < len(dst) {
414-
f, more := cf.Next()
415-
// f.PC is a "call PC", but later consumers will expect
416-
// "return PCs"
417-
dst[n] = f.PC + 1
418-
n++
419-
if !more {
420-
break
421-
}
422-
}
423-
return n
424-
}
425-
426407
// printCountCycleProfile outputs block profile records (for block or mutex profiles)
427408
// as the pprof-proto format output. Translations from cycle count to time duration
428409
// are done because The proto expects count and time (nanoseconds) instead of count
@@ -445,7 +426,7 @@ func printCountCycleProfile(w io.Writer, countName, cycleName string, records []
445426
values[1] = int64(float64(r.Cycles) / cpuGHz)
446427
// For count profiles, all stack addresses are
447428
// return PCs, which is what appendLocsForStack expects.
448-
n := expandInlinedFrames(expandedStack, r.Stack)
429+
n := pprof_fpunwindExpand(expandedStack[:], r.Stack)
449430
locs = b.appendLocsForStack(locs[:0], expandedStack[:n])
450431
b.pbSample(values, locs, nil)
451432
}
@@ -954,7 +935,7 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu
954935
for i := range p {
955936
r := &p[i]
956937
fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count)
957-
n := expandInlinedFrames(expandedStack, r.Stack)
938+
n := pprof_fpunwindExpand(expandedStack, r.Stack)
958939
stack := expandedStack[:n]
959940
for _, pc := range stack {
960941
fmt.Fprintf(w, " %#x", pc)

src/runtime/pprof/pprof_test.go

-78
Original file line numberDiff line numberDiff line change
@@ -2578,81 +2578,3 @@ func produceProfileEvents(t *testing.T, depth int) {
25782578
runtime.GC()
25792579
goroutineDeep(t, depth-4) // -4 for produceProfileEvents, **, chanrecv1, chanrev, gopark
25802580
}
2581-
2582-
func TestMutexBlockFullAggregation(t *testing.T) {
2583-
// This regression test is adapted from
2584-
// https://github.com/grafana/pyroscope-go/issues/103,
2585-
// authored by Tolya Korniltsev
2586-
2587-
var m sync.Mutex
2588-
2589-
prev := runtime.SetMutexProfileFraction(-1)
2590-
defer runtime.SetMutexProfileFraction(prev)
2591-
2592-
const fraction = 1
2593-
const iters = 100
2594-
const workers = 2
2595-
2596-
runtime.SetMutexProfileFraction(fraction)
2597-
runtime.SetBlockProfileRate(1)
2598-
defer runtime.SetBlockProfileRate(0)
2599-
2600-
wg := sync.WaitGroup{}
2601-
wg.Add(workers)
2602-
for j := 0; j < workers; j++ {
2603-
go func() {
2604-
for i := 0; i < iters; i++ {
2605-
m.Lock()
2606-
// Wait at least 1 millisecond to pass the
2607-
// starvation threshold for the mutex
2608-
time.Sleep(time.Millisecond)
2609-
m.Unlock()
2610-
}
2611-
wg.Done()
2612-
}()
2613-
}
2614-
wg.Wait()
2615-
2616-
assertNoDuplicates := func(name string, collect func([]runtime.BlockProfileRecord) (int, bool)) {
2617-
var p []runtime.BlockProfileRecord
2618-
n, ok := collect(nil)
2619-
for {
2620-
p = make([]runtime.BlockProfileRecord, n+50)
2621-
n, ok = collect(p)
2622-
if ok {
2623-
p = p[:n]
2624-
break
2625-
}
2626-
}
2627-
seen := make(map[string]struct{})
2628-
for _, r := range p {
2629-
cf := runtime.CallersFrames(r.Stack())
2630-
var stack strings.Builder
2631-
for {
2632-
f, more := cf.Next()
2633-
stack.WriteString(f.Func.Name())
2634-
if !more {
2635-
break
2636-
}
2637-
stack.WriteByte('\n')
2638-
}
2639-
s := stack.String()
2640-
if !strings.Contains(s, "TestMutexBlockFullAggregation") {
2641-
continue
2642-
}
2643-
if _, ok := seen[s]; ok {
2644-
t.Errorf("saw duplicate entry in %s profile with stack:\n%s", name, s)
2645-
}
2646-
seen[s] = struct{}{}
2647-
}
2648-
if len(seen) == 0 {
2649-
t.Errorf("did not see any samples in %s profile for this test", name)
2650-
}
2651-
}
2652-
t.Run("mutex", func(t *testing.T) {
2653-
assertNoDuplicates("mutex", runtime.MutexProfile)
2654-
})
2655-
t.Run("block", func(t *testing.T) {
2656-
assertNoDuplicates("block", runtime.BlockProfile)
2657-
})
2658-
}

0 commit comments

Comments
 (0)