Skip to content

Commit f7fa041

Browse files
agocsmajorgreysZStriker19emmettbutler
authored
feat(botocore): patch stepfunctions (#7514)
Patch botocore stepfunctions start-execution and start-sync-execution. We want to be able to add trace context to the `input` so the stepfunction logs trace reducer can link stepfunction traces to upstream traces. ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) - [x] If this PR touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. - [x] This PR doesn't touch any of that. --------- Co-authored-by: Tahir H. Butt <[email protected]> Co-authored-by: Zachary Groves <[email protected]> Co-authored-by: Emmett Butler <[email protected]>
1 parent 7c75365 commit f7fa041

File tree

4 files changed

+185
-0
lines changed

4 files changed

+185
-0
lines changed

ddtrace/contrib/botocore/patch.py

+16
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
from .services.sqs import inject_trace_to_sqs_or_sns_batch_message
3636
from .services.sqs import inject_trace_to_sqs_or_sns_message
3737
from .services.sqs import patched_sqs_api_call
38+
from .services.stepfunctions import inject_trace_to_stepfunction_input
39+
from .services.stepfunctions import patched_stepfunction_api_call
3840
from .utils import inject_trace_to_client_context
3941
from .utils import inject_trace_to_eventbridge_detail
4042
from .utils import set_patched_api_call_span_tags
@@ -150,6 +152,10 @@ def patched_api_call(original_func, instance, args, kwargs):
150152
kwargs=kwargs,
151153
function_vars=function_vars,
152154
)
155+
elif endpoint_name == "states":
156+
return patched_stepfunction_api_call(
157+
original_func=original_func, instance=instance, args=args, kwargs=kwargs, function_vars=function_vars
158+
)
153159
else:
154160
# this is the default patched api call
155161
return patched_api_call_fallback(
@@ -220,6 +226,16 @@ def patched_api_call_fallback(original_func, instance, args, kwargs, function_va
220226
cloud_service="sns",
221227
direction=SpanDirection.OUTBOUND,
222228
)
229+
if endpoint_name == "states" and (
230+
operation == "StartExecution" or operation == "StartSyncExecution"
231+
):
232+
inject_trace_to_stepfunction_input(params, span)
233+
span.name = schematize_cloud_messaging_operation(
234+
trace_operation,
235+
cloud_provider="aws",
236+
cloud_service="stepfunctions",
237+
direction=SpanDirection.OUTBOUND,
238+
)
223239
except Exception:
224240
log.warning("Unable to inject trace context", exc_info=True)
225241

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import json
2+
from typing import Any # noqa:F401
3+
from typing import Dict # noqa:F401
4+
5+
import botocore.exceptions
6+
7+
from ddtrace import Span # noqa:F401
8+
from ddtrace import config
9+
from ddtrace.ext import http
10+
from ddtrace.propagation.http import HTTPPropagator
11+
12+
from ....ext import SpanTypes
13+
from ....internal.logger import get_logger
14+
from ....internal.schema import SpanDirection
15+
from ....internal.schema import schematize_cloud_messaging_operation
16+
from ....internal.schema import schematize_service_name
17+
from ..utils import set_patched_api_call_span_tags
18+
from ..utils import set_response_metadata_tags
19+
20+
21+
log = get_logger(__name__)
22+
23+
24+
def inject_trace_to_stepfunction_input(params, span):
25+
# type: (Any, Span) -> None
26+
"""
27+
:params: contains the params for the current botocore action
28+
:span: the span which provides the trace context to be propagated
29+
30+
Inject the trace headers into the StepFunction input if the input is a JSON string
31+
"""
32+
if "input" not in params:
33+
log.warning("Unable to inject context. The StepFunction input had no input.")
34+
return
35+
36+
if params["input"] is None:
37+
log.warning("Unable to inject context. The StepFunction input was None.")
38+
return
39+
40+
elif isinstance(params["input"], dict):
41+
if "_datadog" in params["input"]:
42+
log.warning("Input already has trace context.")
43+
return
44+
params["input"]["_datadog"] = {}
45+
HTTPPropagator.inject(span.context, params["input"]["_datadog"])
46+
return
47+
48+
elif isinstance(params["input"], str):
49+
try:
50+
input_obj = json.loads(params["input"])
51+
except ValueError:
52+
log.warning("Input is not a valid JSON string")
53+
return
54+
55+
if isinstance(input_obj, dict):
56+
input_obj["_datadog"] = {}
57+
HTTPPropagator.inject(span.context, input_obj["_datadog"])
58+
input_json = json.dumps(input_obj)
59+
60+
params["input"] = input_json
61+
return
62+
else:
63+
log.warning("Unable to inject context. The StepFunction input was not a dict.")
64+
return
65+
66+
else:
67+
log.warning("Unable to inject context. The StepFunction input was not a dict or a JSON string.")
68+
69+
70+
def patched_stepfunction_api_call(original_func, instance, args, kwargs: Dict, function_vars: Dict):
71+
params = function_vars.get("params")
72+
trace_operation = function_vars.get("trace_operation")
73+
pin = function_vars.get("pin")
74+
endpoint_name = function_vars.get("endpoint_name")
75+
operation = function_vars.get("operation")
76+
77+
with pin.tracer.trace(
78+
trace_operation,
79+
service=schematize_service_name("{}.{}".format(pin.service, endpoint_name)),
80+
span_type=SpanTypes.HTTP,
81+
) as span:
82+
set_patched_api_call_span_tags(span, instance, args, params, endpoint_name, operation)
83+
84+
if args:
85+
if config.botocore["distributed_tracing"]:
86+
try:
87+
if endpoint_name == "states" and operation in {"StartExecution", "StartSyncExecution"}:
88+
inject_trace_to_stepfunction_input(params, span)
89+
span.name = schematize_cloud_messaging_operation(
90+
trace_operation,
91+
cloud_provider="aws",
92+
cloud_service="stepfunctions",
93+
direction=SpanDirection.OUTBOUND,
94+
)
95+
except Exception:
96+
log.warning("Unable to inject trace context", exc_info=True)
97+
98+
try:
99+
return original_func(*args, **kwargs)
100+
except botocore.exceptions.ClientError as e:
101+
set_response_metadata_tags(span, e.response)
102+
103+
# If we have a status code, and the status code is not an error,
104+
# then ignore the exception being raised
105+
status_code = span.get_tag(http.STATUS_CODE)
106+
if status_code and not config.botocore.operations[span.resource].is_error_code(int(status_code)):
107+
span._ignore_exception(botocore.exceptions.ClientError)
108+
raise
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
features:
3+
- |
4+
botocore: Add the ability to inject trace context into the input field of botocore stepfunction start_execution and
5+
start_sync_execution calls.

tests/contrib/botocore/test.py

+56
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from moto import mock_s3
1919
from moto import mock_sns
2020
from moto import mock_sqs
21+
from moto import mock_stepfunctions
2122
import pytest
2223

2324
from tests.utils import get_128_bit_trace_id_from_headers
@@ -885,6 +886,61 @@ def test_schematized_unspecified_service_sqs_client_v1(self):
885886
assert spans[2].service == DEFAULT_SPAN_SERVICE_NAME
886887
assert spans[2].name == "aws.sqs.receive"
887888

889+
@mock_stepfunctions
890+
def test_stepfunctions_send_start_execution_trace_injection(self):
891+
sf = self.session.create_client("stepfunctions", region_name="us-west-2", endpoint_url="http://localhost:4566")
892+
sf.create_state_machine(
893+
name="lincoln",
894+
definition='{"StartAt": "HelloWorld","States": {"HelloWorld": {"Type": "Pass","End": true}}}',
895+
roleArn="arn:aws:iam::012345678901:role/DummyRole",
896+
)
897+
Pin(service=self.TEST_SERVICE, tracer=self.tracer).onto(sf)
898+
sf.start_execution(
899+
stateMachineArn="arn:aws:states:us-west-2:000000000000:stateMachine:lincoln", input='{"baz":1}'
900+
)
901+
# I've tried to find a way to make Moto show me the input to the execution, but can't get that to work.
902+
spans = self.get_spans()
903+
assert spans
904+
span = spans[0]
905+
assert span.name == "states.command" # This confirms our patch is working
906+
sf.delete_state_machine(stateMachineArn="arn:aws:states:us-west-2:000000000000:stateMachine:lincoln")
907+
908+
@mock_stepfunctions
909+
def test_stepfunctions_send_start_execution_trace_injection_with_array_input(self):
910+
sf = self.session.create_client("stepfunctions", region_name="us-west-2", endpoint_url="http://localhost:4566")
911+
sf.create_state_machine(
912+
name="miller",
913+
definition='{"StartAt": "HelloWorld","States": {"HelloWorld": {"Type": "Pass","End": true}}}',
914+
roleArn="arn:aws:iam::012345678901:role/DummyRole",
915+
)
916+
Pin(service=self.TEST_SERVICE, tracer=self.tracer).onto(sf)
917+
sf.start_execution(
918+
stateMachineArn="arn:aws:states:us-west-2:000000000000:stateMachine:miller", input='["one", "two", "three"]'
919+
)
920+
# I've tried to find a way to make Moto show me the input to the execution, but can't get that to work.
921+
spans = self.get_spans()
922+
assert spans
923+
span = spans[0]
924+
assert span.name == "states.command" # This confirms our patch is working
925+
sf.delete_state_machine(stateMachineArn="arn:aws:states:us-west-2:000000000000:stateMachine:miller")
926+
927+
@mock_stepfunctions
928+
def test_stepfunctions_send_start_execution_trace_injection_with_true_input(self):
929+
sf = self.session.create_client("stepfunctions", region_name="us-west-2", endpoint_url="http://localhost:4566")
930+
sf.create_state_machine(
931+
name="hobart",
932+
definition='{"StartAt": "HelloWorld","States": {"HelloWorld": {"Type": "Pass","End": true}}}',
933+
roleArn="arn:aws:iam::012345678901:role/DummyRole",
934+
)
935+
Pin(service=self.TEST_SERVICE, tracer=self.tracer).onto(sf)
936+
sf.start_execution(stateMachineArn="arn:aws:states:us-west-2:000000000000:stateMachine:hobart", input="true")
937+
# I've tried to find a way to make Moto show me the input to the execution, but can't get that to work.
938+
spans = self.get_spans()
939+
assert spans
940+
span = spans[0]
941+
assert span.name == "states.command" # This confirms our patch is working
942+
sf.delete_state_machine(stateMachineArn="arn:aws:states:us-west-2:000000000000:stateMachine:hobart")
943+
888944
def _test_kinesis_client(self):
889945
client = self.session.create_client("kinesis", region_name="us-east-1")
890946
stream_name = "test"

0 commit comments

Comments
 (0)