Skip to content

Commit a6e9fd5

Browse files
authored
Merge branch 'main' into chris.agocs/patch_botocore_stepfunctions
2 parents 907c2ba + 40b31b7 commit a6e9fd5

40 files changed

+1187
-547
lines changed

.github/workflows/stale.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,9 @@ jobs:
1717
# DEV: GitHub Actions have an API rate limit of 1000 operations per hour per repository
1818
# This limit is shared across all actions
1919
operations-per-run: 200
20-
days-before-stale: 180
2120
days-before-close: 180
2221
exempt-issue-labels: 'proposal'
2322
exempt-pr-labels: 'proposal'
24-
remove-stale-when-updated: true
2523
close-issue-message: |
2624
This issue has been automatically closed after six months of inactivity. If it's a
2725
feature request, it has been added to the maintainers' internal backlog and will be

.github/workflows/testrunner.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@ name: Testrunner
33
on:
44
push:
55
branches:
6-
- '1.x'
7-
pull_request:
6+
- 'main'
87
paths:
98
- 'docker/**'
109

ddtrace/appsec/_asm_request_context.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -412,14 +412,14 @@ def _on_set_request_tags(request, span, flask_config):
412412
if _is_iast_enabled():
413413
from ddtrace.appsec._iast._metrics import _set_metric_iast_instrumented_source
414414
from ddtrace.appsec._iast._taint_tracking import OriginType
415-
from ddtrace.appsec._iast._taint_utils import LazyTaintDict
415+
from ddtrace.appsec._iast._taint_utils import taint_structure
416416

417417
_set_metric_iast_instrumented_source(OriginType.COOKIE_NAME)
418418
_set_metric_iast_instrumented_source(OriginType.COOKIE)
419-
420-
request.cookies = LazyTaintDict(
419+
request.cookies = taint_structure(
421420
request.cookies,
422-
origins=(OriginType.COOKIE_NAME, OriginType.COOKIE),
421+
OriginType.COOKIE_NAME,
422+
OriginType.COOKIE,
423423
override_pyobject_tainted=True,
424424
)
425425

ddtrace/appsec/_constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ class IAST(metaclass=Constant_Class):
7575
ENV = "DD_IAST_ENABLED"
7676
ENV_DEBUG = "_DD_IAST_DEBUG"
7777
TELEMETRY_REPORT_LVL = "DD_IAST_TELEMETRY_VERBOSITY"
78+
LAZY_TAINT = "_DD_IAST_LAZY_TAINT"
7879
JSON = "_dd.iast.json"
7980
ENABLED = "_dd.iast.enabled"
8081
CONTEXT_KEY = "_iast_data"

ddtrace/appsec/_handlers.py

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -210,16 +210,13 @@ def _on_django_func_wrapped(fn_args, fn_kwargs, first_arg_expected_type, *_):
210210
from ddtrace.appsec._iast._taint_tracking import OriginType # noqa: F401
211211
from ddtrace.appsec._iast._taint_tracking import is_pyobject_tainted
212212
from ddtrace.appsec._iast._taint_tracking import taint_pyobject
213-
from ddtrace.appsec._iast._taint_utils import LazyTaintDict
213+
from ddtrace.appsec._iast._taint_utils import taint_structure
214214

215215
http_req = fn_args[0]
216216

217-
if not isinstance(http_req.COOKIES, LazyTaintDict):
218-
http_req.COOKIES = LazyTaintDict(http_req.COOKIES, origins=(OriginType.COOKIE_NAME, OriginType.COOKIE))
219-
if not isinstance(http_req.GET, LazyTaintDict):
220-
http_req.GET = LazyTaintDict(http_req.GET, origins=(OriginType.PARAMETER_NAME, OriginType.PARAMETER))
221-
if not isinstance(http_req.POST, LazyTaintDict):
222-
http_req.POST = LazyTaintDict(http_req.POST, origins=(OriginType.BODY, OriginType.BODY))
217+
http_req.COOKIES = taint_structure(http_req.COOKIES, OriginType.COOKIE_NAME, OriginType.COOKIE)
218+
http_req.GET = taint_structure(http_req.GET, OriginType.PARAMETER_NAME, OriginType.PARAMETER)
219+
http_req.POST = taint_structure(http_req.POST, OriginType.BODY, OriginType.BODY)
223220
if not is_pyobject_tainted(getattr(http_req, "_body", None)):
224221
http_req._body = taint_pyobject(
225222
http_req.body,
@@ -228,10 +225,7 @@ def _on_django_func_wrapped(fn_args, fn_kwargs, first_arg_expected_type, *_):
228225
source_origin=OriginType.BODY,
229226
)
230227

231-
if not isinstance(http_req.META, LazyTaintDict):
232-
http_req.META = LazyTaintDict(http_req.META, origins=(OriginType.HEADER_NAME, OriginType.HEADER))
233-
if not isinstance(http_req.headers, LazyTaintDict):
234-
http_req.headers = LazyTaintDict(http_req.headers, origins=(OriginType.HEADER_NAME, OriginType.HEADER))
228+
http_req.headers = taint_structure(http_req.headers, OriginType.HEADER_NAME, OriginType.HEADER)
235229
http_req.path = taint_pyobject(
236230
http_req.path, source_name="path", source_value=http_req.path, source_origin=OriginType.PATH
237231
)
@@ -247,6 +241,7 @@ def _on_django_func_wrapped(fn_args, fn_kwargs, first_arg_expected_type, *_):
247241
source_value=http_req.path,
248242
source_origin=OriginType.PATH,
249243
)
244+
http_req.META = taint_structure(http_req.META, OriginType.HEADER_NAME, OriginType.HEADER)
250245
if fn_kwargs:
251246
try:
252247
for k, v in fn_kwargs.items():
@@ -264,7 +259,7 @@ def _on_wsgi_environ(wrapped, _instance, args, kwargs):
264259

265260
from ddtrace.appsec._iast._metrics import _set_metric_iast_instrumented_source
266261
from ddtrace.appsec._iast._taint_tracking import OriginType # noqa: F401
267-
from ddtrace.appsec._iast._taint_utils import LazyTaintDict
262+
from ddtrace.appsec._iast._taint_utils import taint_structure
268263

269264
_set_metric_iast_instrumented_source(OriginType.HEADER_NAME)
270265
_set_metric_iast_instrumented_source(OriginType.HEADER)
@@ -277,9 +272,7 @@ def _on_wsgi_environ(wrapped, _instance, args, kwargs):
277272
_set_metric_iast_instrumented_source(OriginType.PARAMETER_NAME)
278273
_set_metric_iast_instrumented_source(OriginType.BODY)
279274

280-
return wrapped(
281-
*((LazyTaintDict(args[0], origins=(OriginType.HEADER_NAME, OriginType.HEADER)),) + args[1:]), **kwargs
282-
)
275+
return wrapped(*((taint_structure(args[0], OriginType.HEADER_NAME, OriginType.HEADER),) + args[1:]), **kwargs)
283276

284277
return wrapped(*args, **kwargs)
285278

ddtrace/appsec/_iast/_patches/json_tainting.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from .._patch import try_wrap_function_wrapper
88
from .._taint_utils import LazyTaintDict
99
from .._taint_utils import LazyTaintList
10+
from .._taint_utils import taint_structure
1011

1112

1213
log = get_logger(__name__)
@@ -24,8 +25,9 @@ def unpatch_iast():
2425
# type: () -> None
2526
set_module_unpatched("json", default_attr=_DEFAULT_ATTR)
2627
try_unwrap("json", "loads")
27-
try_unwrap("json.encoder", "JSONEncoder.default")
28-
try_unwrap("simplejson.encoder", "JSONEncoder.default")
28+
if asm_config._iast_lazy_taint:
29+
try_unwrap("json.encoder", "JSONEncoder.default")
30+
try_unwrap("simplejson.encoder", "JSONEncoder.default")
2931

3032

3133
def patch():
@@ -34,8 +36,9 @@ def patch():
3436
if not set_and_check_module_is_patched("json", default_attr=_DEFAULT_ATTR):
3537
return
3638
try_wrap_function_wrapper("json", "loads", wrapped_loads)
37-
try_wrap_function_wrapper("json.encoder", "JSONEncoder.default", patched_json_encoder_default)
38-
try_wrap_function_wrapper("simplejson.encoder", "JSONEncoder.default", patched_json_encoder_default)
39+
if asm_config._iast_lazy_taint:
40+
try_wrap_function_wrapper("json.encoder", "JSONEncoder.default", patched_json_encoder_default)
41+
try_wrap_function_wrapper("simplejson.encoder", "JSONEncoder.default", patched_json_encoder_default)
3942

4043

4144
def wrapped_loads(wrapped, instance, args, kwargs):
@@ -54,9 +57,9 @@ def wrapped_loads(wrapped, instance, args, kwargs):
5457
# take the first source as main source
5558
source = ranges[0].source
5659
if isinstance(obj, dict):
57-
obj = LazyTaintDict(obj, origins=(source.origin, source.origin))
60+
obj = taint_structure(obj, source.origin, source.origin)
5861
elif isinstance(obj, list):
59-
obj = LazyTaintList(obj, origins=(source.origin, source.origin))
62+
obj = taint_structure(obj, source.origin, source.origin)
6063
elif isinstance(obj, (str, bytes, bytearray)):
6164
obj = taint_pyobject(obj, source.name, source.value, source.origin)
6265
pass

ddtrace/appsec/_iast/_taint_utils.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
11
#!/usr/bin/env python3
22
from collections import abc
3+
import dataclasses
4+
from typing import Any
5+
from typing import List
6+
from typing import Optional
7+
from typing import Union
38

49
from ddtrace.internal.logger import get_logger
10+
from ddtrace.settings.asm import config as asm_config
511

612

713
DBAPI_INTEGRATIONS = ("sqlite", "psycopg", "mysql", "mariadb")
@@ -10,6 +16,124 @@
1016
log = get_logger(__name__)
1117

1218

19+
# Non Lazy Tainting
20+
21+
22+
@dataclasses.dataclass
23+
class _DeepTaintCommand:
24+
pre: bool
25+
source_key: str
26+
obj: Any
27+
store_struct: Union[list, dict]
28+
key: Optional[List[str]] = None
29+
struct: Optional[Union[list, dict]] = None
30+
is_key: bool = False
31+
32+
def store(self, value):
33+
if isinstance(self.store_struct, list):
34+
self.store_struct.append(value)
35+
elif isinstance(self.store_struct, dict):
36+
key = self.key[0] if self.key else None
37+
self.store_struct[key] = value
38+
else:
39+
raise ValueError(f"store_struct of type {type(self.store_struct)}")
40+
41+
def post(self, struct):
42+
return self.__class__(False, self.source_key, self.obj, self.store_struct, self.key, struct)
43+
44+
45+
def build_new_tainted_object_from_generic_object(initial_object, wanted_object):
46+
if initial_object.__class__ is wanted_object.__class__:
47+
return wanted_object
48+
#### custom tailor actions
49+
wanted_type = initial_object.__class__.__module__, initial_object.__class__.__name__
50+
if wanted_type == ("builtins", "tuple"):
51+
return tuple(wanted_object)
52+
# Django
53+
if wanted_type == ("django.http.request", "HttpHeaders"):
54+
res = initial_object.__class__({})
55+
res._store = {k.lower(): (k, v) for k, v in wanted_object.items()}
56+
return res
57+
if wanted_type == ("django.http.request", "QueryDict"):
58+
res = initial_object.__class__()
59+
for k, v in wanted_object.items():
60+
dict.__setitem__(res, k, v)
61+
return res
62+
# Flask 2+
63+
if wanted_type == ("werkzeug.datastructures.structures", "ImmutableMultiDict"):
64+
return initial_object.__class__(wanted_object)
65+
# Flask 1
66+
if wanted_type == ("werkzeug.datastructures", "ImmutableMultiDict"):
67+
return initial_object.__class__(wanted_object)
68+
69+
# if the class is unknown, return the initial object
70+
# this may prevent interned string to be tainted but ensure
71+
# that normal behavior of the code is not changed.
72+
return initial_object
73+
74+
75+
def taint_structure(main_obj, source_key, source_value, override_pyobject_tainted=False):
76+
"""taint any structured object
77+
use a queue like mechanism to avoid recursion
78+
Best effort: mutate mutable structures and rebuild immutable ones if possible
79+
"""
80+
from ._taint_tracking import is_pyobject_tainted
81+
from ._taint_tracking import taint_pyobject
82+
83+
if not main_obj:
84+
return main_obj
85+
86+
main_res = []
87+
try:
88+
# fifo contains tuple (pre/post:bool, source key, object to taint,
89+
# key to use, struct to store result, struct to )
90+
stack = [_DeepTaintCommand(True, source_key, main_obj, main_res)]
91+
while stack:
92+
command = stack.pop()
93+
if command.pre: # first processing of the object
94+
if not command.obj:
95+
command.store(command.obj)
96+
elif isinstance(command.obj, (str, bytes, bytearray)):
97+
if override_pyobject_tainted or not is_pyobject_tainted(command.obj):
98+
new_obj = taint_pyobject(
99+
pyobject=command.obj,
100+
source_name=command.source_key,
101+
source_value=command.obj,
102+
source_origin=source_key if command.is_key else source_value,
103+
)
104+
command.store(new_obj)
105+
else:
106+
command.store(command.obj)
107+
elif isinstance(command.obj, abc.Mapping):
108+
res = {}
109+
stack.append(command.post(res))
110+
# use dict fondamental enumeration if possible to bypass any override of custom classes
111+
iterable = dict.items(command.obj) if isinstance(command.obj, dict) else command.obj.items()
112+
todo = []
113+
for k, v in list(iterable):
114+
key_store = []
115+
todo.append(_DeepTaintCommand(True, k, k, key_store, is_key=True))
116+
todo.append(_DeepTaintCommand(True, k, v, res, key_store))
117+
stack.extend(reversed(todo))
118+
elif isinstance(command.obj, abc.Sequence):
119+
res = []
120+
stack.append(command.post(res))
121+
todo = [_DeepTaintCommand(True, command.source_key, v, res) for v in command.obj]
122+
stack.extend(reversed(todo))
123+
else:
124+
command.store(command.obj)
125+
else:
126+
command.store(build_new_tainted_object_from_generic_object(command.obj, command.struct))
127+
except BaseException:
128+
log.debug("taint_structure error", exc_info=True)
129+
pass
130+
finally:
131+
return main_res[0] if main_res else main_obj
132+
133+
134+
# Lazy Tainting
135+
136+
13137
def _is_tainted_struct(obj):
14138
return hasattr(obj, "_origins")
15139

@@ -402,3 +526,13 @@ def check_tainted_args(args, kwargs, tracer, integration_name, method):
402526
return len(args) and args[0] and is_pyobject_tainted(args[0])
403527

404528
return False
529+
530+
531+
if asm_config._iast_lazy_taint:
532+
# redefining taint_structure to use lazy object if required
533+
534+
def taint_structure(main_obj, source_key, source_value, override_pyobject_tainted=False): # noqa: F811
535+
if isinstance(main_obj, abc.Mapping):
536+
return LazyTaintDict(main_obj, source_key, source_value, override_pyobject_tainted)
537+
elif isinstance(main_obj, abc.Sequence):
538+
return LazyTaintList(main_obj, source_key, source_value, override_pyobject_tainted)

ddtrace/contrib/langchain/patch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,7 @@ def traced_embedding(langchain, pin, func, instance, args, kwargs):
555555
# langchain currently does not support token tracking for OpenAI embeddings:
556556
# https://github.com/hwchase17/langchain/issues/945
557557
embeddings = func(*args, **kwargs)
558-
if isinstance(embeddings, list) and isinstance(embeddings[0], list):
558+
if isinstance(embeddings, list) and embeddings and isinstance(embeddings[0], list):
559559
for idx, embedding in enumerate(embeddings):
560560
span.set_metric("langchain.response.outputs.%d.embedding_length" % idx, len(embedding))
561561
else:

0 commit comments

Comments
 (0)