Skip to content

Commit 4c7de01

Browse files
committed
Fix APM trace start time
In #96205 we started recording the start time of the authn phase of REST request processing, because this happens too early to even start a span for the request. However we capture the threadpool's cached time which can be 200ms slow, yielding spans that appear much longer than they should. This commit moves to capturing the time using `System.currentTimeMillis` to avoid this problem. Backport of #98113 to 8.9
1 parent f43bfa2 commit 4c7de01

File tree

3 files changed

+24
-8
lines changed

3 files changed

+24
-8
lines changed

docs/changelog/98113.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 98113
2+
summary: Fix APM trace start time
3+
area: Infra/Core
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/action/ActionModule.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@ public ActionModule(
581581
public void copyRequestHeadersToThreadContext(HttpPreRequest request, ThreadContext threadContext) {
582582
// the request's thread-context must always be populated (by calling this method) before undergoing any request related processing
583583
// we use this opportunity to first record the request processing start time
584-
threadContext.putTransient(Task.TRACE_START_TIME, Instant.ofEpochMilli(threadPool.absoluteTimeInMillis()));
584+
threadContext.putTransient(Task.TRACE_START_TIME, Instant.ofEpochMilli(System.currentTimeMillis()));
585585
for (final RestHeaderDefinition restHeader : headersToCopy) {
586586
final String name = restHeader.getName();
587587
final List<String> headerValues = request.getHeaders().get(name);

server/src/test/java/org/elasticsearch/http/AbstractHttpServerTransportTests.java

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656

5757
import java.net.InetSocketAddress;
5858
import java.net.UnknownHostException;
59+
import java.time.Instant;
5960
import java.util.ArrayList;
6061
import java.util.Arrays;
6162
import java.util.Collection;
@@ -68,6 +69,7 @@
6869
import java.util.Set;
6970
import java.util.concurrent.CountDownLatch;
7071
import java.util.concurrent.TimeUnit;
72+
import java.util.concurrent.atomic.AtomicReference;
7173

7274
import static java.net.InetAddress.getByName;
7375
import static java.util.Arrays.asList;
@@ -82,9 +84,11 @@
8284
import static org.hamcrest.Matchers.containsInAnyOrder;
8385
import static org.hamcrest.Matchers.containsString;
8486
import static org.hamcrest.Matchers.equalTo;
87+
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
8588
import static org.hamcrest.Matchers.hasSize;
8689
import static org.hamcrest.Matchers.instanceOf;
8790
import static org.hamcrest.Matchers.is;
91+
import static org.hamcrest.Matchers.lessThanOrEqualTo;
8892
import static org.hamcrest.Matchers.notNullValue;
8993
import static org.hamcrest.Matchers.nullValue;
9094
import static org.mockito.Mockito.mock;
@@ -324,6 +328,7 @@ protected void populatePerRequestThreadContext(RestRequest restRequest, ThreadCo
324328
*/
325329
public void testTraceParentAndTraceId() {
326330
final String traceParentValue = "00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01";
331+
final AtomicReference<Instant> traceStartTimeRef = new AtomicReference<>();
327332
final HttpServerTransport.Dispatcher dispatcher = new HttpServerTransport.Dispatcher() {
328333

329334
@Override
@@ -332,7 +337,8 @@ public void dispatchRequest(final RestRequest request, final RestChannel channel
332337
assertThat(threadContext.getHeader(Task.TRACE_PARENT_HTTP_HEADER), nullValue());
333338
assertThat(threadContext.getTransient("parent_" + Task.TRACE_PARENT_HTTP_HEADER), equalTo(traceParentValue));
334339
// request trace start time is also set
335-
assertThat(threadContext.getTransient(Task.TRACE_START_TIME), notNullValue());
340+
assertTrue(traceStartTimeRef.compareAndSet(null, threadContext.getTransient(Task.TRACE_START_TIME)));
341+
assertNotNull(traceStartTimeRef.get());
336342
}
337343

338344
@Override
@@ -371,14 +377,10 @@ protected HttpServerChannel bind(InetSocketAddress hostAddress) {
371377
}
372378

373379
@Override
374-
protected void doStart() {
375-
376-
}
380+
protected void doStart() {}
377381

378382
@Override
379-
protected void stopInternal() {
380-
381-
}
383+
protected void stopInternal() {}
382384

383385
@Override
384386
public HttpStats stats() {
@@ -391,11 +393,20 @@ protected void populatePerRequestThreadContext(RestRequest restRequest, ThreadCo
391393
}
392394
}
393395
) {
396+
final var systemTimeBeforeRequest = System.currentTimeMillis();
394397
transport.dispatchRequest(fakeRequest, channel, null);
398+
final var systemTimeAfterRequest = System.currentTimeMillis();
395399
// headers are "null" here, aka not present, because the thread context changes containing them is to be confined to the request
396400
assertThat(threadPool.getThreadContext().getHeader(Task.TRACE_ID), nullValue());
397401
assertThat(threadPool.getThreadContext().getHeader(Task.TRACE_PARENT_HTTP_HEADER), nullValue());
398402
assertThat(threadPool.getThreadContext().getTransient("parent_" + Task.TRACE_PARENT_HTTP_HEADER), nullValue());
403+
404+
// system clock is not _technically_ monotonic but in practice it's very unlikely to see a discontinuity here
405+
assertThat(
406+
traceStartTimeRef.get().toEpochMilli(),
407+
allOf(greaterThanOrEqualTo(systemTimeBeforeRequest), lessThanOrEqualTo(systemTimeAfterRequest))
408+
);
409+
399410
transport.dispatchRequest(null, null, new Exception());
400411
// headers are "null" here, aka not present, because the thread context changes containing them is to be confined to the request
401412
assertThat(threadPool.getThreadContext().getHeader(Task.TRACE_ID), nullValue());

0 commit comments

Comments
 (0)