Skip to content

Commit d7a94dd

Browse files
authored
Merge pull request #147 from bcaller/bc-arguments
Certain args, kwargs of sink functions are affected by taint
2 parents cbc17f6 + d68554c commit d7a94dd

19 files changed

+545
-195
lines changed

examples/vulnerable_code/sql/sqli.py

+8
Original file line numberDiff line numberDiff line change
@@ -38,5 +38,13 @@ def filtering():
3838
print(value.username, value.email)
3939
return 'Result is displayed in console.'
4040

41+
@app.route('/users/<name>', methods=['DELETE'])
42+
def delete_user_dangerously(name):
43+
query = "DELETE FROM user WHERE username = :name"
44+
db.engine.execute(query, name=name)
45+
print('Deleted')
46+
return 'Deleted'
47+
48+
4149
if __name__ == '__main__':
4250
app.run(debug=True)

pyt/cfg/stmt_visitor.py

+1
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,7 @@ def add_blackbox_or_builtin_call(self, node, blackbox):
573573
call_node = BBorBInode(
574574
label='',
575575
left_hand_side=LHS,
576+
ast_node=node,
576577
right_hand_side_variables=[],
577578
line_number=node.lineno,
578579
path=self.filenames[-1],

pyt/core/node_types.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ def __init__(self, label, left_hand_side, right_hand_side_variables, *, line_num
202202
class BBorBInode(AssignmentNode):
203203
"""Node used for handling restore nodes returning from blackbox or builtin function calls."""
204204

205-
def __init__(self, label, left_hand_side, right_hand_side_variables, *, line_number, path, func_name):
205+
def __init__(self, label, left_hand_side, ast_node, right_hand_side_variables, *, line_number, path, func_name):
206206
"""Create a Restore node.
207207
208208
Args:
@@ -213,7 +213,7 @@ def __init__(self, label, left_hand_side, right_hand_side_variables, *, line_num
213213
path(string): Current filename.
214214
func_name(string): The string we will compare with the blackbox_mapping in vulnerabilities.py
215215
"""
216-
super().__init__(label, left_hand_side, None, right_hand_side_variables, line_number=line_number, path=path)
216+
super().__init__(label, left_hand_side, ast_node, right_hand_side_variables, line_number=line_number, path=path)
217217
self.args = list()
218218
self.inner_most_call = self
219219
self.func_name = func_name

pyt/helper_visitors/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
from .call_visitor import CallVisitor
12
from .label_visitor import LabelVisitor
23
from .right_hand_side_visitor import RHSVisitor
34
from .vars_visitor import VarsVisitor
45

56

67
__all__ = [
8+
'CallVisitor',
79
'LabelVisitor',
810
'RHSVisitor',
911
'VarsVisitor'

pyt/helper_visitors/call_visitor.py

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import ast
2+
import re
3+
from collections import defaultdict, namedtuple
4+
from itertools import count
5+
6+
from ..core.ast_helper import get_call_names_as_string
7+
from .right_hand_side_visitor import RHSVisitor
8+
9+
10+
class CallVisitorResults(
11+
namedtuple(
12+
"CallVisitorResults",
13+
("args", "kwargs", "unknown_args", "unknown_kwargs")
14+
)
15+
):
16+
__slots__ = ()
17+
18+
def all_results(self):
19+
for x in self.args:
20+
yield from x
21+
for x in self.kwargs.values():
22+
yield from x
23+
yield from self.unknown_args
24+
yield from self.unknown_kwargs
25+
26+
27+
class CallVisitor(ast.NodeVisitor):
28+
def __init__(self, trigger_str):
29+
self.unknown_arg_visitor = RHSVisitor()
30+
self.unknown_kwarg_visitor = RHSVisitor()
31+
self.argument_visitors = defaultdict(lambda: RHSVisitor())
32+
self._trigger_str = trigger_str
33+
34+
def visit_Call(self, call_node):
35+
func_name = get_call_names_as_string(call_node.func)
36+
trigger_re = r"(^|\.){}$".format(re.escape(self._trigger_str))
37+
if re.search(trigger_re, func_name):
38+
seen_starred = False
39+
for index, arg in enumerate(call_node.args):
40+
if isinstance(arg, ast.Starred):
41+
seen_starred = True
42+
if seen_starred:
43+
self.unknown_arg_visitor.visit(arg)
44+
else:
45+
self.argument_visitors[index].visit(arg)
46+
47+
for keyword in call_node.keywords:
48+
if keyword.arg is None:
49+
self.unknown_kwarg_visitor.visit(keyword.value)
50+
else:
51+
self.argument_visitors[keyword.arg].visit(keyword.value)
52+
self.generic_visit(call_node)
53+
54+
@classmethod
55+
def get_call_visit_results(cls, trigger_str, node):
56+
visitor = cls(trigger_str)
57+
visitor.visit(node)
58+
59+
arg_results = []
60+
for i in count():
61+
try:
62+
arg_results.append(set(visitor.argument_visitors.pop(i).result))
63+
except KeyError:
64+
break
65+
66+
return CallVisitorResults(
67+
arg_results,
68+
{k: set(v.result) for k, v in visitor.argument_visitors.items()},
69+
set(visitor.unknown_arg_visitor.result),
70+
set(visitor.unknown_kwarg_visitor.result),
71+
)

pyt/helper_visitors/right_hand_side_visitor.py

+6
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,9 @@ def visit_Call(self, node):
2121
if node.keywords:
2222
for keyword in node.keywords:
2323
self.visit(keyword)
24+
25+
@classmethod
26+
def result_for_node(cls, node):
27+
visitor = cls()
28+
visitor.visit(node)
29+
return visitor.result
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
1+
import json
12
from collections import namedtuple
23

34

4-
SANITISER_SEPARATOR = '->'
5-
SOURCES_KEYWORD = 'sources:'
6-
SINKS_KEYWORD = 'sinks:'
7-
85
Definitions = namedtuple(
96
'Definitions',
107
(
@@ -13,30 +10,55 @@
1310
)
1411
)
1512

13+
Source = namedtuple('Source', ('trigger_word'))
1614

17-
def parse_section(iterator):
18-
"""Parse a section of a file. Stops at empty line.
1915

20-
Args:
21-
iterator(File): file descriptor pointing at a definition file.
16+
class Sink:
17+
def __init__(
18+
self, trigger, *,
19+
unlisted_args_propagate=True, unlisted_kwargs_propagate=True,
20+
arg_list=None, kwarg_list=None,
21+
sanitisers=None
22+
):
23+
self._trigger = trigger
24+
self.sanitisers = sanitisers or []
25+
self.arg_list_propagates = not unlisted_args_propagate
26+
self.kwarg_list_propagates = not unlisted_kwargs_propagate
2227

23-
Returns:
24-
Iterator of all definitions in the section.
25-
"""
26-
try:
27-
line = next(iterator).rstrip()
28-
while line:
29-
if line.rstrip():
30-
if SANITISER_SEPARATOR in line:
31-
line = line.split(SANITISER_SEPARATOR)
32-
sink = line[0].rstrip()
33-
sanitisers = list(map(str.strip, line[1].split(',')))
34-
yield (sink, sanitisers)
35-
else:
36-
yield (line, list())
37-
line = next(iterator).rstrip()
38-
except StopIteration:
39-
return
28+
if trigger[-1] != '(':
29+
if self.arg_list_propagates or self.kwarg_list_propagates or arg_list or kwarg_list:
30+
raise ValueError("Propagation options specified, but trigger word isn't a function call")
31+
32+
self.arg_list = set(arg_list or ())
33+
self.kwarg_list = set(kwarg_list or ())
34+
35+
def arg_propagates(self, index):
36+
in_list = index in self.arg_list
37+
return self.arg_list_propagates == in_list
38+
39+
def kwarg_propagates(self, keyword):
40+
in_list = keyword in self.kwarg_list
41+
return self.kwarg_list_propagates == in_list
42+
43+
@property
44+
def all_arguments_propagate_taint(self):
45+
if self.arg_list or self.kwarg_list:
46+
return False
47+
return True
48+
49+
@property
50+
def call(self):
51+
if self._trigger[-1] == '(':
52+
return self._trigger[:-1]
53+
return None
54+
55+
@property
56+
def trigger_word(self):
57+
return self._trigger
58+
59+
@classmethod
60+
def from_json(cls, key, data):
61+
return cls(trigger=key, **data)
4062

4163

4264
def parse(trigger_word_file):
@@ -45,13 +67,11 @@ def parse(trigger_word_file):
4567
Returns:
4668
A definitions tuple with sources and sinks.
4769
"""
48-
sources = list()
49-
sinks = list()
50-
with open(trigger_word_file, 'r') as fd:
51-
for line in fd:
52-
line = line.rstrip()
53-
if line == SOURCES_KEYWORD:
54-
sources = list(parse_section(fd))
55-
elif line == SINKS_KEYWORD:
56-
sinks = list(parse_section(fd))
70+
with open(trigger_word_file) as fd:
71+
triggers_dict = json.load(fd)
72+
sources = [Source(s) for s in triggers_dict['sources']]
73+
sinks = [
74+
Sink.from_json(trigger, data)
75+
for trigger, data in triggers_dict['sinks'].items()
76+
]
5777
return Definitions(sources, sinks)

pyt/vulnerabilities/vulnerabilities.py

+46-13
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@
1313
TaintedNode
1414
)
1515
from ..helper_visitors import (
16+
CallVisitor,
1617
RHSVisitor,
1718
VarsVisitor
1819
)
19-
from .trigger_definitions_parser import parse
20+
from .trigger_definitions_parser import parse, Source
2021
from .vulnerability_helper import (
2122
Sanitiser,
2223
TriggerNode,
@@ -49,8 +50,7 @@ def identify_triggers(
4950
tainted_nodes = filter_cfg_nodes(cfg, TaintedNode)
5051
tainted_trigger_nodes = [
5152
TriggerNode(
52-
'Framework function URL parameter',
53-
sanitisers=None,
53+
Source('Framework function URL parameter'),
5454
cfg_node=node
5555
) for node in tainted_nodes
5656
]
@@ -142,7 +142,7 @@ def find_triggers(
142142
143143
Args:
144144
nodes(list[Node]): the nodes to find triggers in.
145-
trigger_word_list(list[string]): list of trigger words to look for.
145+
trigger_word_list(list[Union[Sink, Source]]): list of trigger words to look for.
146146
nosec_lines(set): lines with # nosec whitelisting
147147
148148
Returns:
@@ -157,23 +157,21 @@ def find_triggers(
157157

158158
def label_contains(
159159
node,
160-
trigger_words
160+
triggers
161161
):
162162
"""Determine if node contains any of the trigger_words provided.
163163
164164
Args:
165165
node(Node): CFG node to check.
166-
trigger_words(list[string]): list of trigger words to look for.
166+
trigger_words(list[Union[Sink, Source]]): list of trigger words to look for.
167167
168168
Returns:
169169
Iterable of TriggerNodes found. Can be multiple because multiple
170170
trigger_words can be in one node.
171171
"""
172-
for trigger_word_tuple in trigger_words:
173-
if trigger_word_tuple[0] in node.label:
174-
trigger_word = trigger_word_tuple[0]
175-
sanitisers = trigger_word_tuple[1]
176-
yield TriggerNode(trigger_word, sanitisers, node)
172+
for trigger in triggers:
173+
if trigger.trigger_word in node.label:
174+
yield TriggerNode(trigger, node)
177175

178176

179177
def build_sanitiser_node_dict(
@@ -243,6 +241,37 @@ def get_sink_args(cfg_node):
243241
return vv.result
244242

245243

244+
def get_sink_args_which_propagate(sink, ast_node):
245+
sink_args_with_positions = CallVisitor.get_call_visit_results(sink.trigger.call, ast_node)
246+
sink_args = []
247+
248+
for i, vars in enumerate(sink_args_with_positions.args):
249+
if sink.trigger.arg_propagates(i):
250+
sink_args.extend(vars)
251+
252+
if (
253+
# Either any unspecified arg propagates
254+
not sink.trigger.arg_list_propagates or
255+
# or there are some propagating args which weren't passed positionally
256+
any(1 for position in sink.trigger.arg_list if position >= len(sink_args_with_positions.args))
257+
):
258+
sink_args.extend(sink_args_with_positions.unknown_args)
259+
260+
for keyword, vars in sink_args_with_positions.kwargs.items():
261+
if sink.trigger.kwarg_propagates(keyword):
262+
sink_args.extend(vars)
263+
264+
if (
265+
# Either any unspecified kwarg propagates
266+
not sink.trigger.kwarg_list_propagates or
267+
# or there are some propagating kwargs which have not been passed by keyword
268+
sink.trigger.kwarg_list - set(sink_args_with_positions.kwargs.keys())
269+
):
270+
sink_args.extend(sink_args_with_positions.unknown_kwargs)
271+
272+
return sink_args
273+
274+
246275
def get_vulnerability_chains(
247276
current_node,
248277
sink,
@@ -377,10 +406,14 @@ def get_vulnerability(
377406
sink.cfg_node)]
378407
nodes_in_constaint.append(source.cfg_node)
379408

380-
sink_args = get_sink_args(sink.cfg_node)
409+
if sink.trigger.all_arguments_propagate_taint:
410+
sink_args = get_sink_args(sink.cfg_node)
411+
else:
412+
sink_args = get_sink_args_which_propagate(sink, sink.cfg_node.ast_node)
413+
381414
tainted_node_in_sink_arg = get_tainted_node_in_sink_args(
382415
sink_args,
383-
nodes_in_constaint
416+
nodes_in_constaint,
384417
)
385418

386419
if tainted_node_in_sink_arg:

pyt/vulnerabilities/vulnerability_helper.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -164,16 +164,22 @@ def __str__(self):
164164
class TriggerNode():
165165
def __init__(
166166
self,
167-
trigger_word,
168-
sanitisers,
167+
trigger,
169168
cfg_node,
170169
secondary_nodes=[]
171170
):
172-
self.trigger_word = trigger_word
173-
self.sanitisers = sanitisers
171+
self.trigger = trigger
174172
self.cfg_node = cfg_node
175173
self.secondary_nodes = secondary_nodes
176174

175+
@property
176+
def trigger_word(self):
177+
return self.trigger.trigger_word
178+
179+
@property
180+
def sanitisers(self):
181+
return self.trigger.sanitisers if hasattr(self.trigger, 'sanitisers') else []
182+
177183
def append(self, cfg_node):
178184
if not cfg_node == self.cfg_node:
179185
if self.secondary_nodes and cfg_node not in self.secondary_nodes:

0 commit comments

Comments
 (0)