Skip to content

Commit 5ccb743

Browse files
committed
profiler: log cpuProfileRate when starting a trace
This will improve our ability to analyze OS scheduling latency. See golang/go#60701 for a better approach in the long run. For PROF-7728
1 parent e3272aa commit 5ccb743

File tree

2 files changed

+35
-0
lines changed

2 files changed

+35
-0
lines changed

profiler/profile.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package profiler
88
import (
99
"bytes"
1010
"compress/gzip"
11+
"context"
1112
"errors"
1213
"fmt"
1314
"io"
@@ -194,6 +195,7 @@ var profileTypes = map[ProfileType]profileType{
194195
if err := trace.Start(lt); err != nil {
195196
return nil, err
196197
}
198+
traceLogCPUProfileRate(p.cfg.cpuProfileRate)
197199
select {
198200
case <-p.exit: // Profiling was stopped
199201
case <-time.After(p.cfg.period): // The profiling cycle has ended
@@ -205,6 +207,19 @@ var profileTypes = map[ProfileType]profileType{
205207
},
206208
}
207209

210+
// traceLogCPUProfileRate logs the cpuProfileRate to the execution tracer if
211+
// its not 0. This gives us a better chance to correctly guess the CPU duration
212+
// of traceEvCPUSample events. It will not work correctly if the user is
213+
// calling runtime.SetCPUProfileRate() themselves, and there is no way to
214+
// handle this scenario given the current APIs. See
215+
// https://github.com/golang/go/issues/60701 for a proposal to improve the
216+
// situation.
217+
func traceLogCPUProfileRate(cpuProfileRate int) {
218+
if cpuProfileRate != 0 {
219+
trace.Log(context.Background(), "cpuProfileRate", fmt.Sprintf("%d", cpuProfileRate))
220+
}
221+
}
222+
208223
// defaultExecutionTraceSizeLimit is the default upper bound, in bytes,
209224
// of an executiont trace.
210225
//

profiler/profiler_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
package profiler
77

88
import (
9+
"bytes"
910
"context"
1011
"encoding/json"
1112
"fmt"
1213
"io"
14+
"math/rand"
1315
"net"
1416
"net/http"
1517
"net/http/httptest"
@@ -451,12 +453,22 @@ func TestExecutionTrace(t *testing.T) {
451453
server := httptest.NewServer(&mockBackend{t: t, profiles: got})
452454
defer server.Close()
453455

456+
// cpuProfileRate is picked randomly so we can check for it in the trace
457+
// data to reduce the chance that it occurs in the trace data for some other
458+
// reduce. In theory we could use the entire int64 space, but when we do
459+
// this the runtime can crash with the error shown below.
460+
//
461+
// runtime: kevent on fd 3 failed with 60
462+
// fatal error: runtime: netpoll failed
463+
cpuProfileRate := int(9999 + rand.Int63n(9999))
464+
454465
t.Setenv("DD_PROFILING_EXECUTION_TRACE_ENABLED", "true")
455466
t.Setenv("DD_PROFILING_EXECUTION_TRACE_PERIOD", "3s")
456467
err := Start(
457468
WithAgentAddr(server.Listener.Addr().String()),
458469
WithProfileTypes(CPUProfile),
459470
WithPeriod(1*time.Second),
471+
CPUProfileRate(int(cpuProfileRate)),
460472
)
461473
require.NoError(t, err)
462474
defer Stop()
@@ -475,6 +487,7 @@ func TestExecutionTrace(t *testing.T) {
475487
t.Log(m.event.Attachments, m.tags)
476488
if contains(m.event.Attachments, "go.trace") && contains(m.tags, "go_execution_traced:yes") {
477489
seenTraces++
490+
assertContainsCPUProfileRateLog(t, m.attachments["go.trace"], cpuProfileRate)
478491
}
479492
}
480493
// With a trace frequency of 3 seconds and a profiling period of 1
@@ -485,6 +498,13 @@ func TestExecutionTrace(t *testing.T) {
485498
}
486499
}
487500

501+
// assertContainsCPUProfileRateLog checks for the presence of the log written by
502+
// traceLogCPUProfileRate. It's a bit hacky, but probably good enough for now :).
503+
func assertContainsCPUProfileRateLog(t *testing.T, traceData []byte, cpuProfileRate int) {
504+
assert.True(t, bytes.Contains(traceData, []byte("cpuProfileRate")))
505+
assert.True(t, bytes.Contains(traceData, []byte(fmt.Sprintf("%d", cpuProfileRate))))
506+
}
507+
488508
// TestEndpointCounts verfies that the unit of work feature works end to end.
489509
func TestEndpointCounts(t *testing.T) {
490510
for _, enabled := range []bool{true, false} {

0 commit comments

Comments
 (0)