Skip to content

Commit d44f714

Browse files
authored
fix(iast): avoid excessive filtering of stacktrace locations [2.21] (#13345)
Backport of #13272 [APPSEC-57414](https://datadoghq.atlassian.net/browse/APPSEC-57414)
1 parent 28fb540 commit d44f714

File tree

8 files changed

+193
-179
lines changed

8 files changed

+193
-179
lines changed

ddtrace/appsec/_iast/_stacktrace.c

Lines changed: 163 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <Python.h>
22
#include <frameobject.h>
33
#include <patchlevel.h>
4+
#include <stdbool.h>
45

56
#ifdef _WIN32
67
#define DD_TRACE_INSTALLED_PREFIX "\\ddtrace\\"
@@ -17,6 +18,7 @@
1718
#define GET_LINENO(frame) PyFrame_GetLineNumber((PyFrameObject*)frame)
1819
#define GET_FRAME(tstate) PyThreadState_GetFrame(tstate)
1920
#define GET_PREVIOUS(frame) PyFrame_GetBack(frame)
21+
#define FRAME_INCREF(frame) Py_INCREF(frame)
2022
#define FRAME_DECREF(frame) Py_DecRef(frame)
2123
#define FRAME_XDECREF(frame) Py_XDECREF(frame)
2224
#define FILENAME_DECREF(filename) Py_DecRef(filename)
@@ -38,6 +40,7 @@ GET_FILENAME(PyFrameObject* frame)
3840
#define GET_FRAME(tstate) tstate->frame
3941
#define GET_PREVIOUS(frame) frame->f_back
4042
#define GET_FILENAME(frame) frame->f_code->co_filename
43+
#define FRAME_INCREF(frame)
4144
#define FRAME_DECREF(frame)
4245
#define FRAME_XDECREF(frame)
4346
#define FILENAME_DECREF(filename)
@@ -50,93 +53,188 @@ GET_FILENAME(PyFrameObject* frame)
5053
#endif
5154
#endif
5255

56+
// Python standard library path
57+
static char* STDLIB_PATH = NULL;
58+
static ssize_t STDLIB_PATH_LEN = 0;
59+
60+
// Python site-packages path
61+
static char* PURELIB_PATH = NULL;
62+
static ssize_t PURELIB_PATH_LEN = 0;
63+
5364
/**
54-
* get_file_and_line
55-
*
56-
* Get the filename (path + filename) and line number of the original wrapped
57-
*function to report it.
58-
*
59-
* @return Tuple, string and integer.
60-
**/
61-
static PyObject*
62-
get_file_and_line(PyObject* Py_UNUSED(module), PyObject* cwd_obj)
65+
* Checks if the filename is special.
66+
* For example, a frozen module (`<frozen 'os'>`), a template (`<template>`), etc.
67+
*/
68+
static inline bool
69+
_is_special_frame(const char* filename)
6370
{
64-
PyThreadState* tstate = PyThreadState_Get();
65-
if (!tstate) {
66-
goto exit_0;
67-
}
71+
return filename && strncmp(filename, "<", strlen("<")) == 0;
72+
}
6873

69-
int line;
70-
PyObject* filename_o = NULL;
71-
PyObject* result = NULL;
72-
PyObject* cwd_bytes = NULL;
73-
char* cwd = NULL;
74+
static inline bool
75+
_is_ddtrace_filename(const char* filename)
76+
{
77+
return filename && strstr(filename, DD_TRACE_INSTALLED_PREFIX) != NULL && strstr(filename, TESTS_PREFIX) == NULL;
78+
}
79+
80+
static inline bool
81+
_is_site_packages_filename(const char* filename)
82+
{
83+
const bool res = filename && PURELIB_PATH && strncmp(filename, PURELIB_PATH, PURELIB_PATH_LEN) == 0;
84+
return res;
85+
}
86+
87+
static inline bool
88+
_is_stdlib_filename(const char* filename)
89+
{
90+
// site-packages is often a subdirectory of stdlib directory, so stdlib
91+
// path is defined as prefixed by stdlib and not prefixed by purelib.
92+
// TODO: As of Python 3.10, we could use sys.stdlib_module_names.
93+
const bool res = filename && STDLIB_PATH && !_is_site_packages_filename(filename) &&
94+
strncmp(filename, STDLIB_PATH, STDLIB_PATH_LEN) == 0;
95+
return res;
96+
}
7497

75-
if (!PyUnicode_FSConverter(cwd_obj, &cwd_bytes)) {
76-
goto exit_0;
98+
static char*
99+
get_sysconfig_path(const char* name)
100+
{
101+
PyObject* sysconfig_mod = PyImport_ImportModule("sysconfig");
102+
if (!sysconfig_mod) {
103+
return NULL;
77104
}
78-
cwd = PyBytes_AsString(cwd_bytes);
79-
if (!cwd) {
80-
goto exit_0;
105+
106+
PyObject* path = PyObject_CallMethod(sysconfig_mod, "get_path", "s", name);
107+
if (!path) {
108+
Py_DECREF(sysconfig_mod);
109+
return NULL;
81110
}
82111

83-
PyFrameObject* frame = GET_FRAME(tstate);
84-
if (!frame) {
85-
goto exit_0;
112+
const char* path_str = PyUnicode_AsUTF8(path);
113+
char* res = NULL;
114+
if (path_str) {
115+
res = strdup(path_str);
86116
}
117+
Py_DECREF(path);
118+
Py_DECREF(sysconfig_mod);
119+
return res;
120+
}
87121

122+
/**
123+
* Gets a reference to a PyFrameObject and walks up the stack until a relevant frame is found.
124+
*
125+
* Returns a new reference to the PyFrameObject.
126+
*
127+
* The caller is not responsible for DECREF'ing the given PyFrameObject, but it is responsible for
128+
* DECREF'ing the returned PyFrameObject.
129+
*/
130+
static PyFrameObject*
131+
_find_relevant_frame(PyFrameObject* frame, bool allow_site_packages)
132+
{
88133
while (NULL != frame) {
89-
filename_o = GET_FILENAME(frame);
134+
PyObject* filename_o = GET_FILENAME(frame);
90135
if (!filename_o) {
91-
goto exit;
136+
FRAME_DECREF(frame);
137+
return NULL;
92138
}
93139
const char* filename = PyUnicode_AsUTF8(filename_o);
94-
if (((strstr(filename, DD_TRACE_INSTALLED_PREFIX) != NULL && strstr(filename, TESTS_PREFIX) == NULL)) ||
95-
(strstr(filename, SITE_PACKAGES_PREFIX) != NULL || strstr(filename, cwd) == NULL)) {
140+
if (_is_special_frame(filename) || _is_ddtrace_filename(filename) || _is_stdlib_filename(filename) ||
141+
(!allow_site_packages && _is_site_packages_filename(filename))) {
96142
PyFrameObject* prev_frame = GET_PREVIOUS(frame);
97143
FRAME_DECREF(frame);
98144
FILENAME_DECREF(filename_o);
99145
frame = prev_frame;
100146
continue;
101147
}
102-
/*
103-
frame->f_lineno will not always return the correct line number
104-
you need to call PyCode_Addr2Line().
105-
*/
106-
line = GET_LINENO(frame);
107-
PyObject* line_obj = Py_BuildValue("i", line);
108-
if (!line_obj) {
109-
goto exit;
110-
}
111-
result = PyTuple_Pack(2, filename_o, line_obj);
148+
FILENAME_DECREF(filename_o);
112149
break;
113150
}
114-
if (result == NULL) {
115-
goto exit_0;
151+
return frame;
152+
}
153+
154+
static PyObject*
155+
_get_result_tuple(PyFrameObject* frame)
156+
{
157+
PyObject* result = NULL;
158+
PyObject* filename_o = NULL;
159+
PyObject* line_o = NULL;
160+
PyObject* funcname_o = NULL;
161+
PyObject* classname_o = NULL;
162+
163+
filename_o = GET_FILENAME(frame);
164+
if (!filename_o) {
165+
goto error;
116166
}
117167

118-
exit:
119-
Py_DecRef(cwd_bytes);
120-
FRAME_XDECREF(frame);
168+
// frame->f_lineno will not always return the correct line number
169+
// you need to call PyCode_Addr2Line().
170+
int line = GET_LINENO(frame);
171+
line_o = Py_BuildValue("i", line);
172+
if (!line_o) {
173+
goto error;
174+
}
175+
result = PyTuple_Pack(2, filename_o, line_o);
176+
177+
error:
121178
FILENAME_XDECREF(filename_o);
179+
Py_XDECREF(line_o);
122180
return result;
181+
}
123182

124-
exit_0:; // fix: "a label can only be part of a statement and a declaration is not a statement" error
125-
// Return "", -1
126-
PyObject* line_obj = Py_BuildValue("i", -1);
127-
filename_o = PyUnicode_FromString("");
128-
result = PyTuple_Pack(2, filename_o, line_obj);
129-
Py_DecRef(cwd_bytes);
183+
/**
184+
* get_file_and_line
185+
*
186+
* Get the filename (path + filename) and line number of the original wrapped
187+
*function to report it.
188+
*
189+
* @return Tuple, string and integer.
190+
**/
191+
static PyObject*
192+
get_file_and_line(PyObject* Py_UNUSED(module), PyObject* Py_UNUSED(args))
193+
{
194+
PyFrameObject* frame = NULL;
195+
PyFrameObject* backup_frame = NULL;
196+
PyObject* result = NULL;
197+
PyThreadState* tstate = PyThreadState_Get();
198+
if (!tstate) {
199+
goto exit;
200+
}
201+
202+
frame = GET_FRAME(tstate);
203+
if (!frame) {
204+
goto exit;
205+
}
206+
207+
// Skip all frames until the first non-ddtrace and non-stdlib frame.
208+
// Store that frame as backup (if any). If there is no better frame, fallback to this.
209+
// This happens, for example, when the vulnerability is in a package installed in site-packages.
210+
frame = _find_relevant_frame(frame, true);
211+
if (NULL == frame) {
212+
goto exit;
213+
}
214+
backup_frame = frame;
215+
FRAME_INCREF(backup_frame);
216+
217+
// Continue skipping until we find a frame that is both non-ddtrace and non-site-packages.
218+
frame = _find_relevant_frame(frame, false);
219+
if (NULL == frame) {
220+
frame = backup_frame;
221+
backup_frame = NULL;
222+
} else {
223+
FRAME_DECREF(backup_frame);
224+
}
225+
226+
result = _get_result_tuple(frame);
227+
228+
exit:
130229
FRAME_XDECREF(frame);
131-
FILENAME_XDECREF(filename_o);
132-
Py_DecRef(line_obj);
133230
return result;
134231
}
135232

136-
static PyMethodDef StacktraceMethods[] = {
137-
{ "get_info_frame", (PyCFunction)get_file_and_line, METH_O, "stacktrace functions" },
138-
{ NULL, NULL, 0, NULL }
139-
};
233+
static PyMethodDef StacktraceMethods[] = { { "get_info_frame",
234+
(PyCFunction)get_file_and_line,
235+
METH_NOARGS,
236+
"Stacktrace function: returns (filename, line, method, class)" },
237+
{ NULL, NULL, 0, NULL } };
140238

141239
static struct PyModuleDef stacktrace = { PyModuleDef_HEAD_INIT,
142240
"ddtrace.appsec._iast._stacktrace",
@@ -150,5 +248,13 @@ PyInit__stacktrace(void)
150248
PyObject* m = PyModule_Create(&stacktrace);
151249
if (m == NULL)
152250
return NULL;
251+
STDLIB_PATH = get_sysconfig_path("stdlib");
252+
if (STDLIB_PATH) {
253+
STDLIB_PATH_LEN = strlen(STDLIB_PATH);
254+
}
255+
PURELIB_PATH = get_sysconfig_path("purelib");
256+
if (PURELIB_PATH) {
257+
PURELIB_PATH_LEN = strlen(PURELIB_PATH);
258+
}
153259
return m;
154260
}

ddtrace/appsec/_iast/_stacktrace.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
def get_info_frame(cwd_obj): ...
1+
def get_info_frame(): ...

ddtrace/appsec/_iast/taint_sinks/_base.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import sysconfig
23
from typing import Any
34
from typing import Callable
45
from typing import Optional
@@ -25,6 +26,9 @@
2526

2627
CWD = os.path.abspath(os.getcwd())
2728

29+
PURELIB_PATH = sysconfig.get_path("purelib")
30+
STDLIB_PATH = sysconfig.get_path("stdlib")
31+
2832

2933
class taint_sink_deduplication(deduplication):
3034
def _check_deduplication(self):
@@ -120,15 +124,16 @@ def report(cls, evidence_value: Text = "", dialect: Optional[Text] = None) -> No
120124

121125
skip_location = getattr(cls, "skip_location", False)
122126
if not skip_location:
123-
frame_info = get_info_frame(CWD)
127+
frame_info = get_info_frame()
124128
if not frame_info or frame_info[0] == "" or frame_info[0] == -1:
125129
return None
126130

127131
file_name, line_number = frame_info
128132

129-
# Remove CWD prefix
130-
if file_name.startswith(CWD):
131-
file_name = os.path.relpath(file_name, start=CWD)
133+
file_name = cls._rel_path(file_name)
134+
if not file_name:
135+
log.debug("Could not relativize vulnerability location path: %s", frame_info[0])
136+
return
132137

133138
if not cls.is_not_reported(file_name, line_number):
134139
return
@@ -144,3 +149,13 @@ def report(cls, evidence_value: Text = "", dialect: Optional[Text] = None) -> No
144149
# we need to restore the quota
145150
if not result:
146151
cls.increment_quota()
152+
153+
@staticmethod
154+
def _rel_path(file_name: str) -> str:
155+
if file_name.startswith(PURELIB_PATH):
156+
return os.path.relpath(file_name, start=PURELIB_PATH)
157+
if file_name.startswith(STDLIB_PATH):
158+
return os.path.relpath(file_name, start=STDLIB_PATH)
159+
if file_name.startswith(CWD):
160+
return os.path.relpath(file_name, start=CWD)
161+
return ""
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
fixes:
3+
- |
4+
Code Security: IAST: Avoid excessive filtering of stacktrace locations when finding vulnerabilities. After this change, vulnerabilities that were previously discarded will now be reported. In particular, if they were found within code in site-packages or outside of the working directory.

tests/appsec/iast/taint_sinks/test_weak_hash.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from contextlib import contextmanager
2+
import os
23
import sys
34
from unittest import mock
45

@@ -95,9 +96,8 @@ def test_weak_hash_hashlib(iast_context_defaults, hash_func, method):
9596
],
9697
)
9798
def test_ensure_line_reported_is_minus_one_for_edge_cases(iast_context_defaults, hash_func, method, fake_line):
98-
with mock.patch(
99-
"ddtrace.appsec._iast.taint_sinks._base.get_info_frame", return_value=(WEAK_ALGOS_FIXTURES_PATH, fake_line)
100-
):
99+
absolute_path = os.path.abspath(WEAK_ALGOS_FIXTURES_PATH)
100+
with mock.patch("ddtrace.appsec._iast.taint_sinks._base.get_info_frame", return_value=(absolute_path, fake_line)):
101101
parametrized_weak_hash(hash_func, method)
102102

103103
_, hash_value = get_line_and_hash(

tests/appsec/iast_memcheck/_stacktrace_py.py

Lines changed: 0 additions & 38 deletions
This file was deleted.

0 commit comments

Comments
 (0)