Skip to content

Commit 63fbd6d

Browse files
committed
Improve recorded detail of exception stacktraces
In many X-Ray SDK use cases, including the Django/Flask middleware and aws_xray_sdk.core.xray_recorder.capture and .capture_async cases, there is a broad try/except "catch-all" for exceptions which records useful information in an X-Ray trace in the event of an unexpected exception. However, the approach (prior to this patch) of using `traceback.extract_stack` in the `except` block to extract stack trace information isn't terribly useful when viewing X-Ray traces for debugging purposes. `traceback.extract_stack` only shows the stack state up to and including the `traceback.extract_stack` call in the `except` block; it does not include the stack trace information from the caught exception--which are arguably the most important details to include in the X-Ray trace! Similarly, including _only_ the exception's stack trace information can omit important code path context if the exeception-producing code is wrapped in a decorator. This is a very subtle but important detail. There is a good discussion of exactly this problem--specificaly in the context of decorators with a "catch-all" try/except--on Stackoverflow: https://stackoverflow.com/questions/14527819/traceback-shows-up-until-decorator. This patch introduces the `stacktrace` utility module and is an attempt to include more relevant info in recorded X-Ray traces while respecting the `max_trace_back` behavior expressed in many places to avoid bloated trace messages. To illustrate the differencethis makes, consider a trivial Lambda function written in Python with a single module called `myfunction.py`: ``` from aws_xray_sdk.core import xray_recorder @xray_recorder.capture('handler') def handler(event, context): data = foo() return data def foo(): return bar() def bar(): raise Exception('Something went wrong!') return {'data': [1,2,3]} ``` There are multiple function calls here in order to create an interesting call stack for tracing purposes. With the current implementation of aws_xray_sdk, here's the trace that gets recorded by X-Ray: Exception: Something went wrong! at <module> (bootstrap.py:538) at main (bootstrap.py:533) at handle_event_request (bootstrap.py:250) With this patch applied, we get a lot more information about the call stack, as we would expect: Exception: Something went wrong! at <module> (bootstrap.py:538) at main (bootstrap.py:533) at handle_event_request (bootstrap.py:250) at handler (myfunction.py:6) at foo (myfunction.py:11) at bar (myfunction.py:15) With this we can much more easily trace the origin of a runtime error.
1 parent 461d488 commit 63fbd6d

File tree

7 files changed

+63
-13
lines changed

7 files changed

+63
-13
lines changed

aws_xray_sdk/core/async_recorder.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import time
2-
import traceback
32

43
import wrapt
54

65
from aws_xray_sdk.core.recorder import AWSXRayRecorder
6+
from aws_xray_sdk.core.utils import stacktrace
77

88

99
class AsyncAWSXRayRecorder(AWSXRayRecorder):
@@ -47,7 +47,7 @@ async def record_subsegment_async(self, wrapped, instance, args, kwargs, name,
4747
return return_value
4848
except Exception as e:
4949
exception = e
50-
stack = traceback.extract_stack(limit=self._max_trace_back)
50+
stack = stacktrace.get_stacktrace(limit=self._max_trace_back)
5151
raise
5252
finally:
5353
# No-op if subsegment is `None` due to `LOG_ERROR`.

aws_xray_sdk/core/recorder.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import os
55
import platform
66
import time
7-
import traceback
87

98
import wrapt
109

@@ -23,6 +22,7 @@
2322
from .lambda_launcher import check_in_lambda
2423
from .exceptions.exceptions import SegmentNameMissingException
2524
from .utils.compat import string_types
25+
from .utils import stacktrace
2626

2727
log = logging.getLogger(__name__)
2828

@@ -398,7 +398,7 @@ def record_subsegment(self, wrapped, instance, args, kwargs, name,
398398
return return_value
399399
except Exception as e:
400400
exception = e
401-
stack = traceback.extract_stack(limit=self.max_trace_back)
401+
stack = stacktrace.get_stacktrace(limit=self.max_trace_back)
402402
raise
403403
finally:
404404
# No-op if subsegment is `None` due to `LOG_ERROR`.

aws_xray_sdk/core/utils/stacktrace.py

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import sys
2+
import traceback
3+
4+
5+
def get_stacktrace(limit=None):
6+
"""
7+
Get a full stacktrace for the current state of execution.
8+
9+
Include the current state of the stack, minus this function.
10+
If there is an active exception, include the stacktrace information from
11+
the exception as well.
12+
13+
:param int limit:
14+
Optionally limit stack trace size results. This parmaeters has the same
15+
meaning as the `limit` parameter in `traceback.print_stack`.
16+
:returns:
17+
List of stack trace objects, in the same form as
18+
`traceback.extract_stack`.
19+
"""
20+
if limit is not None and limit == 0:
21+
# Nothing to return. This is consistent with the behavior of the
22+
# functions in the `traceback` module.
23+
return []
24+
25+
stack = traceback.extract_stack()
26+
# Remove this `get_stacktrace()` function call from the stack info.
27+
# For what we want to report, this is superfluous information and arguably
28+
# adds garbage to the report.
29+
# Also drop the `traceback.extract_stack()` call above from the returned
30+
# stack info, since this is also superfluous.
31+
stack = stack[:-2]
32+
33+
_exc_type, _exc, exc_traceback = sys.exc_info()
34+
if exc_traceback is not None:
35+
# If and only if there is a currently triggered exception, combine the
36+
# exception traceback information with the current stack state to get a
37+
# complete trace.
38+
exc_stack = traceback.extract_tb(exc_traceback)
39+
stack += exc_stack
40+
41+
# Limit the stack trace size, if a limit was specified:
42+
if limit is not None:
43+
# Copy the behavior of `traceback` functions with a `limit` argument.
44+
# See https://docs.python.org/3/library/traceback.html.
45+
if limit > 0:
46+
# limit > 0: include the last `limit` items
47+
stack = stack[-limit:]
48+
else:
49+
# limit < 0: include the first `abs(limit)` items
50+
stack = stack[:abs(limit)]
51+
return stack

aws_xray_sdk/ext/aiohttp/client.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
AioHttp Client tracing, only compatible with Aiohttp 3.X versions
33
"""
44
import aiohttp
5-
import traceback
65

76
from types import SimpleNamespace
87

98
from aws_xray_sdk.core import xray_recorder
109
from aws_xray_sdk.core.models import http
10+
from aws_xray_sdk.core.utils import stacktrace
1111
from aws_xray_sdk.ext.util import inject_trace_header, strip_url
1212

1313
# All aiohttp calls will entail outgoing HTTP requests, only in some ad-hoc
@@ -51,7 +51,7 @@ async def end_subsegment_with_exception(session, trace_config_ctx, params):
5151
subsegment = xray_recorder.current_subsegment()
5252
subsegment.add_exception(
5353
params.exception,
54-
traceback.extract_stack(limit=xray_recorder._max_trace_back)
54+
stacktrace.get_stacktrace(limit=xray_recorder._max_trace_back)
5555
)
5656

5757
if isinstance(params.exception, LOCAL_EXCEPTIONS):

aws_xray_sdk/ext/aiohttp/middleware.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
"""
22
AioHttp Middleware
33
"""
4-
import traceback
54
from aiohttp import web
65
from aiohttp.web_exceptions import HTTPException
76

87
from aws_xray_sdk.core import xray_recorder
98
from aws_xray_sdk.core.models import http
9+
from aws_xray_sdk.core.utils import stacktrace
1010
from aws_xray_sdk.ext.util import calculate_sampling_decision, \
1111
calculate_segment_name, construct_xray_header, prepare_response_header
1212

@@ -69,7 +69,7 @@ async def middleware(request, handler):
6969
# Store exception information including the stacktrace to the segment
7070
response = None
7171
segment.put_http_meta(http.STATUS, 500)
72-
stack = traceback.extract_stack(limit=xray_recorder.max_trace_back)
72+
stack = stacktrace.get_stacktrace(limit=xray_recorder.max_trace_back)
7373
segment.add_exception(err, stack)
7474
raise
7575
finally:

aws_xray_sdk/ext/django/middleware.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import logging
2-
import traceback
32

43
from aws_xray_sdk.core import xray_recorder
54
from aws_xray_sdk.core.models import http
5+
from aws_xray_sdk.core.utils import stacktrace
66
from aws_xray_sdk.ext.util import calculate_sampling_decision, \
77
calculate_segment_name, construct_xray_header, prepare_response_header
88

@@ -87,5 +87,5 @@ def process_exception(self, request, exception):
8787
segment = xray_recorder.current_segment()
8888
segment.put_http_meta(http.STATUS, 500)
8989

90-
stack = traceback.extract_stack(limit=xray_recorder._max_trace_back)
90+
stack = stacktrace.get_stacktrace(limit=xray_recorder._max_trace_back)
9191
segment.add_exception(exception, stack)

aws_xray_sdk/ext/flask/middleware.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
import traceback
2-
31
import flask.templating
42
from flask import request
53

64
from aws_xray_sdk.core.models import http
5+
from aws_xray_sdk.core.utils import stacktrace
76
from aws_xray_sdk.ext.util import calculate_sampling_decision, \
87
calculate_segment_name, construct_xray_header, prepare_response_header
98

@@ -86,7 +85,7 @@ def _handle_exception(self, exception):
8685
return
8786

8887
segment.put_http_meta(http.STATUS, 500)
89-
stack = traceback.extract_stack(limit=self._recorder._max_trace_back)
88+
stack = stacktrace.get_stacktrace(limit=self._recorder._max_trace_back)
9089
segment.add_exception(exception, stack)
9190
self._recorder.end_segment()
9291

0 commit comments

Comments
 (0)