Skip to content

Commit fadbf74

Browse files
committed
runtime/pprof: expand final stack frame to avoid truncation
When generating stacks, the runtime automatically expands inline functions to inline all inline frames in the stack. However, due to the stack size limit, the final frame may be truncated in the middle of several inline frames at the same location. As-is, we assume that the final frame is a normal function, and emit and cache a Location for it. If we later receive a complete stack frame, we will first use the cached Location for the inlined function and then generate a new Location for the "caller" frame, in violation of the pprof requirement to merge inlined functions into the same Location. As a result, we: 1. Nondeterministically may generate a profile with the different stacks combined or split, depending on which is encountered first. This is particularly problematic when performing a diff of profiles. 2. When split stacks are generated, we lose the inlining information. We avoid both of these problems by performing a second expansion of the last stack frame to recover additional inline frames that may have been lost. This expansion is a bit simpler than the one done by the runtime because we don't have to handle skipping, and we know that the last emitted frame is not an elided wrapper, since it by definition is already included in the stack. Fixes #37446 Change-Id: If3ca2af25b21d252cf457cc867dd932f107d4c61 Reviewed-on: https://go-review.googlesource.com/c/go/+/221577 Run-TryBot: Michael Pratt <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Heschi Kreinick <[email protected]> Reviewed-by: Keith Randall <[email protected]> Reviewed-by: Hyang-Ah Hana Kim <[email protected]>
1 parent 4a70ff4 commit fadbf74

File tree

5 files changed

+115
-31
lines changed

5 files changed

+115
-31
lines changed

src/runtime/pprof/map.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ Search:
6868
if len(m.freeStk) < len(stk) {
6969
m.freeStk = make([]uintptr, 1024)
7070
}
71-
e.stk = m.freeStk[:len(stk)]
71+
// Limit cap to prevent append from clobbering freeStk.
72+
e.stk = m.freeStk[:len(stk):len(stk)]
7273
m.freeStk = m.freeStk[len(stk):]
7374

7475
for j := range stk {

src/runtime/pprof/pprof_test.go

+40-7
Original file line numberDiff line numberDiff line change
@@ -1172,16 +1172,25 @@ func TestTryAdd(t *testing.T) {
11721172
{Value: []int64{20, 20 * period}, Location: []*profile.Location{{ID: 1}}},
11731173
},
11741174
}, {
1175-
name: "recursive_inlined_funcs",
1175+
// If a function is called recursively then it must not be
1176+
// inlined in the caller.
1177+
//
1178+
// N.B. We're generating an impossible profile here, with a
1179+
// recursive inlineCallee call. This is simulating a non-Go
1180+
// function that looks like an inlined Go function other than
1181+
// its recursive property. See pcDeck.tryAdd.
1182+
name: "recursive_func_is_not_inlined",
11761183
input: []uint64{
11771184
3, 0, 500, // hz = 500. Must match the period.
11781185
5, 0, 30, inlinedCalleePtr, inlinedCalleePtr,
11791186
4, 0, 40, inlinedCalleePtr,
11801187
},
1181-
wantLocs: [][]string{{"runtime/pprof.inlinedCallee"}},
1188+
// inlinedCaller shows up here because
1189+
// runtime_expandFinalInlineFrame adds it to the stack frame.
1190+
wantLocs: [][]string{{"runtime/pprof.inlinedCallee"}, {"runtime/pprof.inlinedCaller"}},
11821191
wantSamples: []*profile.Sample{
1183-
{Value: []int64{30, 30 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}}},
1184-
{Value: []int64{40, 40 * period}, Location: []*profile.Location{{ID: 1}}},
1192+
{Value: []int64{30, 30 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}, {ID: 2}}},
1193+
{Value: []int64{40, 40 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}}},
11851194
},
11861195
}, {
11871196
name: "truncated_stack_trace_later",
@@ -1202,12 +1211,36 @@ func TestTryAdd(t *testing.T) {
12021211
4, 0, 70, inlinedCalleePtr,
12031212
5, 0, 80, inlinedCalleePtr, inlinedCallerPtr,
12041213
},
1205-
wantLocs: [][]string{ // the inline info is screwed up, but better than a crash.
1206-
{"runtime/pprof.inlinedCallee"},
1214+
wantLocs: [][]string{{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}},
1215+
wantSamples: []*profile.Sample{
1216+
{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
1217+
{Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 1}}},
1218+
},
1219+
}, {
1220+
// We can recover the inlined caller from a truncated stack.
1221+
name: "truncated_stack_trace_only",
1222+
input: []uint64{
1223+
3, 0, 500, // hz = 500. Must match the period.
1224+
4, 0, 70, inlinedCalleePtr,
1225+
},
1226+
wantLocs: [][]string{{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}},
1227+
wantSamples: []*profile.Sample{
1228+
{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
1229+
},
1230+
}, {
1231+
// The same location is used for duplicated stacks.
1232+
name: "truncated_stack_trace_twice",
1233+
input: []uint64{
1234+
3, 0, 500, // hz = 500. Must match the period.
1235+
4, 0, 70, inlinedCalleePtr,
1236+
5, 0, 80, inlinedCallerPtr, inlinedCalleePtr,
1237+
},
1238+
wantLocs: [][]string{
1239+
{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"},
12071240
{"runtime/pprof.inlinedCaller"}},
12081241
wantSamples: []*profile.Sample{
12091242
{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
1210-
{Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}}},
1243+
{Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 2}, {ID: 1}}},
12111244
},
12121245
}}
12131246

src/runtime/pprof/proto.go

+17-23
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,10 @@ func (b *profileBuilder) build() {
384384
// It may emit to b.pb, so there must be no message encoding in progress.
385385
func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) {
386386
b.deck.reset()
387+
388+
// The last frame might be truncated. Recover lost inline frames.
389+
stk = runtime_expandFinalInlineFrame(stk)
390+
387391
for len(stk) > 0 {
388392
addr := stk[0]
389393
if l, ok := b.locs[addr]; ok {
@@ -395,22 +399,12 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo
395399
// then, record the cached location.
396400
locs = append(locs, l.id)
397401

398-
// The stk may be truncated due to the stack depth limit
399-
// (e.g. See maxStack and maxCPUProfStack in runtime) or
400-
// bugs in runtime. Avoid the crash in either case.
401-
// TODO(hyangah): The correct fix may require using the exact
402-
// pcs as the key for b.locs cache management instead of just
403-
// relying on the very first pc. We are late in the go1.14 dev
404-
// cycle, so this is a workaround with little code change.
405-
if len(l.pcs) > len(stk) {
406-
stk = nil
407-
// TODO(hyangah): would be nice if we can enable
408-
// debug print out on demand and report the problematic
409-
// cached location entry and stack traces. Do we already
410-
// have such facility to utilize (e.g. GODEBUG)?
411-
} else {
412-
stk = stk[len(l.pcs):] // skip the matching pcs.
413-
}
402+
// Skip the matching pcs.
403+
//
404+
// Even if stk was truncated due to the stack depth
405+
// limit, expandFinalInlineFrame above has already
406+
// fixed the truncation, ensuring it is long enough.
407+
stk = stk[len(l.pcs):]
414408
continue
415409
}
416410

@@ -427,9 +421,9 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo
427421
stk = stk[1:]
428422
continue
429423
}
430-
// add failed because this addr is not inlined with
431-
// the existing PCs in the deck. Flush the deck and retry to
432-
// handle this pc.
424+
// add failed because this addr is not inlined with the
425+
// existing PCs in the deck. Flush the deck and retry handling
426+
// this pc.
433427
if id := b.emitLocation(); id > 0 {
434428
locs = append(locs, id)
435429
}
@@ -463,8 +457,8 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo
463457
// the fake pcs and restore the inlined and entry functions. Inlined functions
464458
// have the following properties:
465459
// Frame's Func is nil (note: also true for non-Go functions), and
466-
// Frame's Entry matches its entry function frame's Entry. (note: could also be true for recursive calls and non-Go functions),
467-
// Frame's Name does not match its entry function frame's name.
460+
// Frame's Entry matches its entry function frame's Entry (note: could also be true for recursive calls and non-Go functions), and
461+
// Frame's Name does not match its entry function frame's name (note: inlined functions cannot be recursive).
468462
//
469463
// As reading and processing the pcs in a stack trace one by one (from leaf to the root),
470464
// we use pcDeck to temporarily hold the observed pcs and their expanded frames
@@ -486,8 +480,8 @@ func (d *pcDeck) reset() {
486480
// to the deck. If it fails the caller needs to flush the deck and retry.
487481
func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symbolizeFlag) (success bool) {
488482
if existing := len(d.pcs); existing > 0 {
489-
// 'frames' are all expanded from one 'pc' and represent all inlined functions
490-
// so we check only the last one.
483+
// 'd.frames' are all expanded from one 'pc' and represent all
484+
// inlined functions so we check only the last one.
491485
newFrame := frames[0]
492486
last := d.frames[existing-1]
493487
if last.Func != nil { // the last frame can't be inlined. Flush.

src/runtime/pprof/runtime.go

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ import (
99
"unsafe"
1010
)
1111

12+
// runtime_expandFinalInlineFrame is defined in runtime/symtab.go.
13+
func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr
14+
1215
// runtime_setProfLabel is defined in runtime/proflabel.go.
1316
func runtime_setProfLabel(labels unsafe.Pointer)
1417

src/runtime/symtab.go

+53
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,59 @@ func (ci *Frames) Next() (frame Frame, more bool) {
148148
return
149149
}
150150

151+
// runtime_expandFinalInlineFrame expands the final pc in stk to include all
152+
// "callers" if pc is inline.
153+
//
154+
//go:linkname runtime_expandFinalInlineFrame runtime/pprof.runtime_expandFinalInlineFrame
155+
func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr {
156+
pc := stk[len(stk)-1]
157+
tracepc := pc - 1
158+
159+
f := findfunc(tracepc)
160+
if !f.valid() {
161+
// Not a Go function.
162+
return stk
163+
}
164+
165+
inldata := funcdata(f, _FUNCDATA_InlTree)
166+
if inldata == nil {
167+
// Nothing inline in f.
168+
return stk
169+
}
170+
171+
// Treat the previous func as normal. We haven't actually checked, but
172+
// since this pc was included in the stack, we know it shouldn't be
173+
// elided.
174+
lastFuncID := funcID_normal
175+
176+
// Remove pc from stk; we'll re-add it below.
177+
stk = stk[:len(stk)-1]
178+
179+
// See inline expansion in gentraceback.
180+
var cache pcvalueCache
181+
inltree := (*[1 << 20]inlinedCall)(inldata)
182+
for {
183+
ix := pcdatavalue(f, _PCDATA_InlTreeIndex, tracepc, &cache)
184+
if ix < 0 {
185+
break
186+
}
187+
if inltree[ix].funcID == funcID_wrapper && elideWrapperCalling(lastFuncID) {
188+
// ignore wrappers
189+
} else {
190+
stk = append(stk, pc)
191+
}
192+
lastFuncID = inltree[ix].funcID
193+
// Back up to an instruction in the "caller".
194+
tracepc = f.entry + uintptr(inltree[ix].parentPc)
195+
pc = tracepc + 1
196+
}
197+
198+
// N.B. we want to keep the last parentPC which is not inline.
199+
stk = append(stk, pc)
200+
201+
return stk
202+
}
203+
151204
// expandCgoFrames expands frame information for pc, known to be
152205
// a non-Go function, using the cgoSymbolizer hook. expandCgoFrames
153206
// returns nil if pc could not be expanded.

0 commit comments

Comments
 (0)