Skip to content

Commit 2b24697

Browse files
cecile75Cecile Terpinbm1549
authored
Add a built-in trace interceptor for keeping traces depending of their latency (#8040)
* add latency trace interceptor * Fix test * add comments * improve comments * changes after review * Add "experimental" in configuration key Co-authored-by: Brian Marks <[email protected]> * Add "experimental" in configuration key (for tests) Co-authored-by: Brian Marks <[email protected]> * spotlessApply --------- Co-authored-by: Cecile Terpin <“[email protected]”> Co-authored-by: Brian Marks <[email protected]>
1 parent 39e43da commit 2b24697

File tree

7 files changed

+126
-1
lines changed

7 files changed

+126
-1
lines changed

dd-trace-api/src/main/java/datadog/trace/api/ConfigDefaults.java

+2
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ public final class ConfigDefaults {
6969
static final boolean DEFAULT_DB_CLIENT_HOST_SPLIT_BY_INSTANCE_TYPE_SUFFIX = false;
7070
static final boolean DEFAULT_DB_CLIENT_HOST_SPLIT_BY_HOST = false;
7171
static final String DEFAULT_DB_DBM_PROPAGATION_MODE_MODE = "disabled";
72+
// Default value is set to 0, it disables the latency trace interceptor
73+
static final int DEFAULT_TRACE_KEEP_LATENCY_THRESHOLD_MS = 0;
7274
static final int DEFAULT_SCOPE_DEPTH_LIMIT = 100;
7375
static final int DEFAULT_SCOPE_ITERATION_KEEP_ALIVE = 30; // in seconds
7476
static final int DEFAULT_PARTIAL_FLUSH_MIN_SPANS = 1000;

dd-trace-api/src/main/java/datadog/trace/api/config/TracerConfig.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,9 @@ public final class TracerConfig {
7373
public static final String TRACE_HTTP_CLIENT_ERROR_STATUSES = "trace.http.client.error.statuses";
7474

7575
public static final String SPLIT_BY_TAGS = "trace.split-by-tags";
76-
76+
// trace latency interceptor value should be in ms
77+
public static final String TRACE_KEEP_LATENCY_THRESHOLD_MS =
78+
"trace.experimental.keep.latency.threshold.ms";
7779
public static final String SCOPE_DEPTH_LIMIT = "trace.scope.depth.limit";
7880
public static final String SCOPE_STRICT_MODE = "trace.scope.strict.mode";
7981
public static final String SCOPE_ITERATION_KEEP_ALIVE = "trace.scope.iteration.keep.alive";

dd-trace-api/src/main/java/datadog/trace/api/interceptor/AbstractTraceInterceptor.java

+3
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ public enum Priority {
2222
DD_INTAKE(2),
2323
GIT_METADATA(3),
2424

25+
// trace custom sampling
26+
ROOT_SPAN_LATENCY(Integer.MAX_VALUE - 2),
27+
2528
// trace data collection
2629
CI_VISIBILITY_TELEMETRY(Integer.MAX_VALUE - 1),
2730
SERVICE_NAME_COLLECTING(Integer.MAX_VALUE);

dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java

+5
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
import datadog.trace.core.scopemanager.ContinuableScopeManager;
8989
import datadog.trace.core.taginterceptor.RuleFlags;
9090
import datadog.trace.core.taginterceptor.TagInterceptor;
91+
import datadog.trace.core.traceinterceptor.LatencyTraceInterceptor;
9192
import datadog.trace.lambda.LambdaHandler;
9293
import datadog.trace.relocate.api.RatelimitedLogger;
9394
import datadog.trace.util.AgentTaskScheduler;
@@ -745,6 +746,10 @@ private CoreTracer(
745746
addTraceInterceptor(GitMetadataTraceInterceptor.INSTANCE);
746747
}
747748

749+
if (config.isTraceKeepLatencyThresholdEnabled()) {
750+
addTraceInterceptor(LatencyTraceInterceptor.INSTANCE);
751+
}
752+
748753
this.instrumentationGateway = instrumentationGateway;
749754
callbackProviderAppSec = instrumentationGateway.getCallbackProvider(RequestContextSlot.APPSEC);
750755
callbackProviderIast = instrumentationGateway.getCallbackProvider(RequestContextSlot.IAST);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package datadog.trace.core.traceinterceptor;
2+
3+
import datadog.trace.api.Config;
4+
import datadog.trace.api.DDTags;
5+
import datadog.trace.api.interceptor.AbstractTraceInterceptor;
6+
import datadog.trace.api.interceptor.MutableSpan;
7+
import datadog.trace.api.interceptor.TraceInterceptor;
8+
import java.util.Collection;
9+
import org.slf4j.Logger;
10+
import org.slf4j.LoggerFactory;
11+
12+
/**
13+
* This trace latency interceptor is disabled by default. We can activate it by setting the value of
14+
* dd.trace.latency.interceptor.value to a positive value This value should be in milliseconds and
15+
* this interceptor will retain any local trace who has a root span duration greater than this
16+
* value. The activation of this interceptor is ignored if partial flush is enabled in order to
17+
* avoid incomplete local trace (incomplete chunk of trace). Note that since we're changing the
18+
* sampling priority at the end of local trace, there is no guarantee to get complete traces, since
19+
* the original sampling priority for this trace may have already been propagated.
20+
*/
21+
public class LatencyTraceInterceptor extends AbstractTraceInterceptor {
22+
private static final Logger log = LoggerFactory.getLogger(LatencyTraceInterceptor.class);
23+
// duration configured in ms, need to be converted in nano seconds
24+
private static final long LATENCY = Config.get().getTraceKeepLatencyThreshold() * 1000000L;
25+
26+
public static final TraceInterceptor INSTANCE =
27+
new LatencyTraceInterceptor(Priority.ROOT_SPAN_LATENCY);
28+
29+
protected LatencyTraceInterceptor(Priority priority) {
30+
super(priority);
31+
}
32+
33+
@Override
34+
public Collection<? extends MutableSpan> onTraceComplete(
35+
Collection<? extends MutableSpan> latencyTrace) {
36+
if (latencyTrace.isEmpty()) {
37+
return latencyTrace;
38+
}
39+
MutableSpan rootSpan = latencyTrace.iterator().next().getLocalRootSpan();
40+
if (rootSpan != null && rootSpan.getDurationNano() > LATENCY) {
41+
rootSpan.setTag(DDTags.MANUAL_KEEP, true);
42+
}
43+
return latencyTrace;
44+
}
45+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package datadog.trace.core.traceinterceptor
2+
3+
import datadog.trace.api.DDTags
4+
import datadog.trace.common.writer.ListWriter
5+
6+
import datadog.trace.core.test.DDCoreSpecification
7+
8+
import spock.lang.Timeout
9+
10+
@Timeout(10)
11+
class LatencyTraceInterceptorTest extends DDCoreSpecification {
12+
13+
14+
def "test set sampling priority according to latency"() {
15+
setup:
16+
17+
injectSysConfig("trace.partial.flush.enabled", partialFlushEnabled)
18+
injectSysConfig("trace.experimental.keep.latency.threshold.ms", latencyThreshold)
19+
20+
when:
21+
def writer = new ListWriter()
22+
def tracer = tracerBuilder().writer(writer).build()
23+
24+
def spanSetup = tracer.buildSpan("test","my_operation_name").withTag(priorityTag, true).start()
25+
sleep(minDuration)
26+
spanSetup.finish()
27+
28+
then:
29+
def trace = writer.firstTrace()
30+
trace.size() == 1
31+
def span = trace[0]
32+
span.context().getSamplingPriority() == expected
33+
34+
cleanup:
35+
tracer.close()
36+
37+
where:
38+
partialFlushEnabled | latencyThreshold | priorityTag | minDuration | expected
39+
"true" | "200" | DDTags.MANUAL_KEEP | 10 | 2
40+
"true" | "200" | DDTags.MANUAL_DROP | 10 | -1
41+
"true" | "200" | DDTags.MANUAL_KEEP | 300 | 2
42+
"true" | "200" | DDTags.MANUAL_DROP | 300 | -1
43+
"false" | "200" | DDTags.MANUAL_KEEP | 10 | 2
44+
"false" | "200" | DDTags.MANUAL_DROP | 10 | -1
45+
"false" | "200" | DDTags.MANUAL_KEEP | 300 | 2
46+
"false" | "200" | DDTags.MANUAL_DROP | 300 | 2
47+
}
48+
}

internal-api/src/main/java/datadog/trace/api/Config.java

+20
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,8 @@ public static String getHostName() {
176176
private final boolean scopeStrictMode;
177177
private final int scopeIterationKeepAlive;
178178
private final int partialFlushMinSpans;
179+
private final int traceKeepLatencyThreshold;
180+
private final boolean traceKeepLatencyThresholdEnabled;
179181
private final boolean traceStrictWritesEnabled;
180182
private final boolean logExtractHeaderNames;
181183
private final Set<PropagationStyle> propagationStylesToExtract;
@@ -861,6 +863,12 @@ private Config(final ConfigProvider configProvider, final InstrumenterConfig ins
861863
? 0
862864
: configProvider.getInteger(PARTIAL_FLUSH_MIN_SPANS, DEFAULT_PARTIAL_FLUSH_MIN_SPANS);
863865

866+
traceKeepLatencyThreshold =
867+
configProvider.getInteger(
868+
TRACE_KEEP_LATENCY_THRESHOLD_MS, DEFAULT_TRACE_KEEP_LATENCY_THRESHOLD_MS);
869+
870+
traceKeepLatencyThresholdEnabled = !partialFlushEnabled && (traceKeepLatencyThreshold > 0);
871+
864872
traceStrictWritesEnabled = configProvider.getBoolean(TRACE_STRICT_WRITES_ENABLED, false);
865873

866874
logExtractHeaderNames =
@@ -2077,6 +2085,14 @@ public int getPartialFlushMinSpans() {
20772085
return partialFlushMinSpans;
20782086
}
20792087

2088+
public int getTraceKeepLatencyThreshold() {
2089+
return traceKeepLatencyThreshold;
2090+
}
2091+
2092+
public boolean isTraceKeepLatencyThresholdEnabled() {
2093+
return traceKeepLatencyThresholdEnabled;
2094+
}
2095+
20802096
public boolean isTraceStrictWritesEnabled() {
20812097
return traceStrictWritesEnabled;
20822098
}
@@ -4164,6 +4180,10 @@ public String toString() {
41644180
+ scopeIterationKeepAlive
41654181
+ ", partialFlushMinSpans="
41664182
+ partialFlushMinSpans
4183+
+ ", traceKeepLatencyThresholdEnabled="
4184+
+ traceKeepLatencyThresholdEnabled
4185+
+ ", traceKeepLatencyThreshold="
4186+
+ traceKeepLatencyThreshold
41674187
+ ", traceStrictWritesEnabled="
41684188
+ traceStrictWritesEnabled
41694189
+ ", tracePropagationStylesToExtract="

0 commit comments

Comments
 (0)