|
| 1 | +package pprof |
| 2 | + |
| 3 | +import ( |
| 4 | + "bytes" |
| 5 | + "reflect" |
| 6 | + "sort" |
| 7 | + "unsafe" |
| 8 | + |
| 9 | + "golang.org/x/exp/slices" |
| 10 | + |
| 11 | + profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" |
| 12 | +) |
| 13 | + |
| 14 | +const ( |
| 15 | + minGroupSize = 2 |
| 16 | + |
| 17 | + tokens = 8 |
| 18 | + tokenLen = 16 |
| 19 | + suffixLen = tokens + tokenLen |
| 20 | + |
| 21 | + tokenBytesLen = tokenLen * 8 |
| 22 | + suffixBytesLen = suffixLen * 8 |
| 23 | +) |
| 24 | + |
| 25 | +// MayHaveGoHeapTruncatedStacktraces reports whether there are |
| 26 | +// any chances that the profile may have truncated stack traces. |
| 27 | +func MayHaveGoHeapTruncatedStacktraces(p *profilev1.Profile) bool { |
| 28 | + if !hasGoHeapSampleTypes(p) { |
| 29 | + return false |
| 30 | + } |
| 31 | + // Some truncated stacks have depth less than the depth limit (32). |
| 32 | + const minDepth = 28 |
| 33 | + for _, s := range p.Sample { |
| 34 | + if len(s.LocationId) >= minDepth { |
| 35 | + return true |
| 36 | + } |
| 37 | + } |
| 38 | + return false |
| 39 | +} |
| 40 | + |
| 41 | +func hasGoHeapSampleTypes(p *profilev1.Profile) bool { |
| 42 | + for _, st := range p.SampleType { |
| 43 | + switch p.StringTable[st.Type] { |
| 44 | + case |
| 45 | + "alloc_objects", |
| 46 | + "alloc_space", |
| 47 | + "inuse_objects", |
| 48 | + "inuse_space": |
| 49 | + return true |
| 50 | + } |
| 51 | + } |
| 52 | + return false |
| 53 | +} |
| 54 | + |
| 55 | +// RepairGoHeapTruncatedStacktraces repairs truncated stack traces |
| 56 | +// in Go heap profiles. |
| 57 | +// |
| 58 | +// Go heap profile has a depth limit of 32 frames, which often |
| 59 | +// renders profiles unreadable, and also increases cardinality |
| 60 | +// of stack traces. |
| 61 | +// |
| 62 | +// The function guesses truncated frames based on neighbors and |
| 63 | +// repairs stack traces if there are high chances that this |
| 64 | +// part is present in the profile. The heuristic is as follows: |
| 65 | +// |
| 66 | +// For each stack trace S taller than 24 frames: if there is another |
| 67 | +// stack trace R taller than 24 frames that overlaps with the given |
| 68 | +// one by at least 16 frames in a row from the top, and has frames |
| 69 | +// above its root, stack S considered truncated, and the missing part |
| 70 | +// is copied from R. |
| 71 | +func RepairGoHeapTruncatedStacktraces(p *profilev1.Profile) { |
| 72 | + // Group stack traces by bottom (closest to the root) locations. |
| 73 | + // Typically, there are very few groups (a hundred or two). |
| 74 | + samples, groups := split(p) |
| 75 | + // Each group's suffix is then tokenized: each part is shifted by one |
| 76 | + // location from the previous one (like n-grams). |
| 77 | + // Tokens are written into the token=>group map, Where the value is the |
| 78 | + // index of the group with the token found at the furthest position from |
| 79 | + // the root (across all groups). |
| 80 | + m := make(map[string]group, len(groups)/2) |
| 81 | + for i := 0; i < len(groups); i++ { |
| 82 | + g := groups[i] |
| 83 | + n := len(groups) |
| 84 | + if i+1 < len(groups) { |
| 85 | + n = groups[i+1] |
| 86 | + } |
| 87 | + if s := n - g; s < minGroupSize { |
| 88 | + continue |
| 89 | + } |
| 90 | + // We take suffix of the first sample in the group. |
| 91 | + s := suffix(samples[g].LocationId) |
| 92 | + // Tokenize the suffix: token position is relative to the stack |
| 93 | + // trace root: 0 means that the token is the closest to the root. |
| 94 | + // TODO: unroll? |
| 95 | + // 0 : 64 : 192 // No need. |
| 96 | + // 1 : 56 : 184 |
| 97 | + // 2 : 48 : 176 |
| 98 | + // 3 : 40 : 168 |
| 99 | + // 4 : 32 : 160 |
| 100 | + // 5 : 24 : 152 |
| 101 | + // 6 : 16 : 144 |
| 102 | + // 7 : 8 : 136 |
| 103 | + // 8 : 0 : 128 |
| 104 | + // |
| 105 | + // We skip the top/right-most token, as it is not needed, |
| 106 | + // because there can be no more complete stack trace. |
| 107 | + for j := uint32(1); j <= tokens; j++ { |
| 108 | + hi := suffixBytesLen - j*tokens |
| 109 | + lo := hi - tokenBytesLen |
| 110 | + // By taking a string representation of the slice, |
| 111 | + // we eliminate the need to hash the token explicitly: |
| 112 | + // Go map will do it this way or another. |
| 113 | + k := unsafeString(s[lo:hi]) |
| 114 | + // Current candidate: the group where the token is |
| 115 | + // located at the furthest position from the root. |
| 116 | + c, ok := m[k] |
| 117 | + if !ok || j > c.off { |
| 118 | + // This group has more complete stack traces: |
| 119 | + m[k] = group{ |
| 120 | + gid: uint32(i), |
| 121 | + off: j, |
| 122 | + } |
| 123 | + } |
| 124 | + } |
| 125 | + } |
| 126 | + |
| 127 | + // Now we handle chaining. Consider the following stacks: |
| 128 | + // 1 2 3 4 |
| 129 | + // a b [d] (f) |
| 130 | + // b c [e] (g) |
| 131 | + // c [d] (f) h |
| 132 | + // d [e] (g) i |
| 133 | + // |
| 134 | + // We can't associate 3-rd stack with the 1-st one because their tokens |
| 135 | + // do not overlap (given the token size is 2). However, we can associate |
| 136 | + // it transitively through the 2nd stack. |
| 137 | + // |
| 138 | + // Dependencies: |
| 139 | + // - group i depends on d[i]. |
| 140 | + // - d[i] depends on d[d[i].gid]. |
| 141 | + d := make([]group, len(groups)) |
| 142 | + for i := 0; i < len(groups); i++ { |
| 143 | + g := groups[i] |
| 144 | + t := topToken(samples[g].LocationId) |
| 145 | + k := unsafeString(t) |
| 146 | + c, ok := m[k] |
| 147 | + if !ok || c.off == 0 || groups[c.gid] == g { |
| 148 | + // The current group has the most complete stack trace. |
| 149 | + continue |
| 150 | + } |
| 151 | + d[i] = c |
| 152 | + } |
| 153 | + |
| 154 | + // Then, for each group, we test, if there is another group with a more |
| 155 | + // complete suffix, overlapping with the given one by at least one token. |
| 156 | + // If such stack trace exists, all stack traces of the group are appended |
| 157 | + // with the missing part. |
| 158 | + for i := 0; i < len(groups); i++ { |
| 159 | + g := groups[i] |
| 160 | + c := d[i] |
| 161 | + var off uint32 |
| 162 | + for c.off > 0 { |
| 163 | + off += c.off |
| 164 | + n := d[c.gid] |
| 165 | + if n.off == 0 { |
| 166 | + // Stop early to preserve c. |
| 167 | + break |
| 168 | + } |
| 169 | + c = n |
| 170 | + } |
| 171 | + if off == 0 { |
| 172 | + // The current group has the most complete stack trace. |
| 173 | + continue |
| 174 | + } |
| 175 | + // The reference stack trace. |
| 176 | + appx := samples[groups[c.gid]].LocationId |
| 177 | + // It's possible that the reference stack trace does not |
| 178 | + // include the part we're looking for. In this case, we |
| 179 | + // simply ignore the group. Although it's possible to infer |
| 180 | + // this piece from other stacks, this is left for further |
| 181 | + // improvements. |
| 182 | + if int(off) >= len(appx) { |
| 183 | + continue |
| 184 | + } |
| 185 | + appx = appx[uint32(len(appx))-off:] |
| 186 | + // Now we append the missing part to all stack traces of the group. |
| 187 | + n := len(groups) |
| 188 | + if i+1 < len(groups) { |
| 189 | + n = groups[i+1] |
| 190 | + } |
| 191 | + for j := g; j < n; j++ { |
| 192 | + // Locations typically already have some extra capacity, |
| 193 | + // therefore no major allocations are expected here. |
| 194 | + samples[j].LocationId = append(samples[j].LocationId, appx...) |
| 195 | + } |
| 196 | + } |
| 197 | +} |
| 198 | + |
| 199 | +type group struct { |
| 200 | + gid uint32 |
| 201 | + off uint32 |
| 202 | +} |
| 203 | + |
| 204 | +// suffix returns the last suffixLen locations |
| 205 | +// of the given stack trace represented as bytes. |
| 206 | +// The return slice is always suffixBytesLen long. |
| 207 | +// function panics if s is shorter than suffixLen. |
| 208 | +func suffix(s []uint64) []byte { |
| 209 | + return locBytes(s[len(s)-suffixLen:]) |
| 210 | +} |
| 211 | + |
| 212 | +// topToken returns the last tokenLen locations |
| 213 | +// of the given stack trace represented as bytes. |
| 214 | +// The return slice is always tokenBytesLen long. |
| 215 | +// function panics if s is shorter than tokenLen. |
| 216 | +func topToken(s []uint64) []byte { |
| 217 | + return locBytes(s[len(s)-tokenLen:]) |
| 218 | +} |
| 219 | + |
| 220 | +func locBytes(s []uint64) []byte { |
| 221 | + size := len(s) * 8 |
| 222 | + h := (*reflect.SliceHeader)(unsafe.Pointer(&s)) |
| 223 | + h.Len = size |
| 224 | + h.Cap = size |
| 225 | + return *(*[]byte)(unsafe.Pointer(h)) |
| 226 | +} |
| 227 | + |
| 228 | +func unsafeString(b []byte) string { |
| 229 | + return *(*string)(unsafe.Pointer(&b)) |
| 230 | +} |
| 231 | + |
| 232 | +// split into groups of samples by stack trace suffixes. |
| 233 | +// Return slice contains indices of the first sample |
| 234 | +// of each group in the collection of selected samples. |
| 235 | +func split(p *profilev1.Profile) ([]*profilev1.Sample, []int) { |
| 236 | + slices.SortFunc(p.Sample, func(a, b *profilev1.Sample) int { |
| 237 | + if len(a.LocationId) < suffixLen { |
| 238 | + return -1 |
| 239 | + } |
| 240 | + if len(b.LocationId) < suffixLen { |
| 241 | + return 1 |
| 242 | + } |
| 243 | + return bytes.Compare( |
| 244 | + suffix(a.LocationId), |
| 245 | + suffix(b.LocationId), |
| 246 | + ) |
| 247 | + }) |
| 248 | + o := sort.Search(len(p.Sample), func(i int) bool { |
| 249 | + return len(p.Sample[i].LocationId) >= suffixLen |
| 250 | + }) |
| 251 | + if o == len(p.Sample) { |
| 252 | + return nil, nil |
| 253 | + } |
| 254 | + samples := p.Sample[o:] |
| 255 | + const avgGroupSize = 16 // Estimate. |
| 256 | + groups := make([]int, 0, len(samples)/avgGroupSize) |
| 257 | + var prev []byte |
| 258 | + for i := 0; i < len(samples); i++ { |
| 259 | + cur := suffix(samples[i].LocationId) |
| 260 | + if !bytes.Equal(cur, prev) { |
| 261 | + groups = append(groups, i) |
| 262 | + prev = cur |
| 263 | + } |
| 264 | + } |
| 265 | + return samples, groups |
| 266 | +} |
0 commit comments