Skip to content

Commit 9fa20f5

Browse files
authored
Merge pull request #14799 from MathiasVP/solve-modify-copy-problem
DataFlow: Add language-specific predicate for ignoring steps in flow-through calculation
2 parents ff8b796 + db0d203 commit 9fa20f5

File tree

10 files changed

+182
-12
lines changed

10 files changed

+182
-12
lines changed

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplSpecific.qll

+2
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,6 @@ module CppDataFlow implements InputSig {
2020
Node exprNode(DataFlowExpr e) { result = Public::exprNode(e) }
2121

2222
predicate getAdditionalFlowIntoCallNodeTerm = Private::getAdditionalFlowIntoCallNodeTerm/2;
23+
24+
predicate validParameterAliasStep = Private::validParameterAliasStep/2;
2325
}

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll

+52
Original file line numberDiff line numberDiff line change
@@ -1149,3 +1149,55 @@ private int countNumberOfBranchesUsingParameter(SwitchInstruction switch, Parame
11491149
)
11501150
)
11511151
}
1152+
1153+
/**
1154+
* Holds if the data-flow step from `node1` to `node2` can be used to
1155+
* determine where side-effects may return from a callable.
1156+
* For C/C++, this means that the step from `node1` to `node2` not only
1157+
* preserves the value, but also preserves the identity of the value.
1158+
* For example, the assignment to `x` that reads the value of `*p` in
1159+
* ```cpp
1160+
* int* p = ...
1161+
* int x = *p;
1162+
* ```
1163+
* does not preserve the identity of `*p`.
1164+
*/
1165+
bindingset[node1, node2]
1166+
pragma[inline_late]
1167+
predicate validParameterAliasStep(Node node1, Node node2) {
1168+
// When flow-through summaries are computed we track which parameters flow to out-going parameters.
1169+
// In an example such as:
1170+
// ```
1171+
// modify(int* px) { *px = source(); }
1172+
// void modify_copy(int* p) {
1173+
// int x = *p;
1174+
// modify(&x);
1175+
// }
1176+
// ```
1177+
// since dataflow tracks each indirection as a separate SSA variable dataflow
1178+
// sees the above roughly as
1179+
// ```
1180+
// modify(int* px, int deref_px) { deref_px = source(); }
1181+
// void modify_copy(int* p, int deref_p) {
1182+
// int x = deref_p;
1183+
// modify(&x, x);
1184+
// }
1185+
// ```
1186+
// and when dataflow computes flow from a parameter to a post-update node to
1187+
// conclude which parameters are "updated" by the call to `modify_copy` it
1188+
// finds flow from `x [post update]` to `deref_p [post update]`.
1189+
// To prevent this we exclude steps that don't preserve identity. We do this
1190+
// by excluding flow from the right-hand side of `StoreInstruction`s to the
1191+
// `StoreInstruction`. This is sufficient because, for flow-through summaries,
1192+
// we're only interested in indirect parameters such as `deref_p` in the
1193+
// exampe above (i.e., the parameters with a non-zero indirection index), and
1194+
// if that ever flows to the right-hand side of a `StoreInstruction` then
1195+
// there must have been a dereference to reduce its indirection index down to
1196+
// 0.
1197+
not exists(Operand operand |
1198+
node1.asOperand() = operand and
1199+
node2.asInstruction().(StoreInstruction).getSourceValueOperand() = operand
1200+
)
1201+
// TODO: Also block flow through models that don't preserve identity such
1202+
// as `strdup`.
1203+
}

cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected

+12
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ uniquePostUpdate
2323
postIsInSameCallable
2424
reverseRead
2525
argHasPostUpdate
26+
| flowOut.cpp:55:14:55:16 | * ... | ArgumentNode is missing PostUpdateNode. |
2627
| lambdas.cpp:18:7:18:7 | a | ArgumentNode is missing PostUpdateNode. |
2728
| lambdas.cpp:25:2:25:2 | b | ArgumentNode is missing PostUpdateNode. |
2829
| lambdas.cpp:32:2:32:2 | c | ArgumentNode is missing PostUpdateNode. |
@@ -51,7 +52,18 @@ postWithInFlow
5152
| example.c:28:23:28:25 | pos [inner post update] | PostUpdateNode should not be the target of local flow. |
5253
| flowOut.cpp:5:5:5:12 | * ... [post update] | PostUpdateNode should not be the target of local flow. |
5354
| flowOut.cpp:5:6:5:12 | toTaint [inner post update] | PostUpdateNode should not be the target of local flow. |
55+
| flowOut.cpp:8:5:8:12 | * ... [post update] | PostUpdateNode should not be the target of local flow. |
56+
| flowOut.cpp:8:6:8:12 | toTaint [inner post update] | PostUpdateNode should not be the target of local flow. |
5457
| flowOut.cpp:18:17:18:17 | x [inner post update] | PostUpdateNode should not be the target of local flow. |
58+
| flowOut.cpp:30:12:30:12 | x [inner post update] | PostUpdateNode should not be the target of local flow. |
59+
| flowOut.cpp:37:5:37:6 | p2 [inner post update] | PostUpdateNode should not be the target of local flow. |
60+
| flowOut.cpp:37:5:37:9 | access to array [post update] | PostUpdateNode should not be the target of local flow. |
61+
| flowOut.cpp:84:3:84:7 | call to deref [inner post update] | PostUpdateNode should not be the target of local flow. |
62+
| flowOut.cpp:84:3:84:14 | access to array [post update] | PostUpdateNode should not be the target of local flow. |
63+
| flowOut.cpp:84:10:84:10 | p [inner post update] | PostUpdateNode should not be the target of local flow. |
64+
| flowOut.cpp:90:3:90:4 | * ... [post update] | PostUpdateNode should not be the target of local flow. |
65+
| flowOut.cpp:90:4:90:4 | q [inner post update] | PostUpdateNode should not be the target of local flow. |
66+
| flowOut.cpp:101:14:101:14 | p [inner post update] | PostUpdateNode should not be the target of local flow. |
5567
| globals.cpp:13:5:13:19 | flowTestGlobal1 [post update] | PostUpdateNode should not be the target of local flow. |
5668
| globals.cpp:23:5:23:19 | flowTestGlobal2 [post update] | PostUpdateNode should not be the target of local flow. |
5769
| lambdas.cpp:23:3:23:14 | v [post update] | PostUpdateNode should not be the target of local flow. |

cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected

+2
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,15 @@ compatibleTypesReflexive
1212
unreachableNodeCCtx
1313
localCallNodes
1414
postIsNotPre
15+
| flowOut.cpp:84:3:84:14 | access to array indirection | PostUpdateNode should not equal its pre-update node. |
1516
postHasUniquePre
1617
uniquePostUpdate
1718
| example.c:24:13:24:18 | coords indirection | Node has multiple PostUpdateNodes. |
1819
postIsInSameCallable
1920
reverseRead
2021
argHasPostUpdate
2122
postWithInFlow
23+
| flowOut.cpp:84:3:84:14 | access to array indirection | PostUpdateNode should not be the target of local flow. |
2224
| test.cpp:384:10:384:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. |
2325
| test.cpp:391:10:391:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. |
2426
| test.cpp:400:10:400:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. |
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,103 @@
1-
int source();
2-
void sink(int);
1+
int source(); char source(bool);
2+
void sink(int); void sink(char);
33

44
void source_ref(int *toTaint) { // $ ir-def=*toTaint ast-def=toTaint
55
*toTaint = source();
66
}
7-
8-
9-
7+
void source_ref(char *toTaint) { // $ ir-def=*toTaint ast-def=toTaint
8+
*toTaint = source();
9+
}
1010
void modify_copy(int* ptr) { // $ ast-def=ptr
1111
int deref = *ptr;
1212
int* other = &deref;
1313
source_ref(other);
1414
}
1515

16-
void test_output() {
16+
void test_output_copy() {
1717
int x = 0;
1818
modify_copy(&x);
19-
sink(x); // $ SPURIOUS: ir
20-
}
19+
sink(x); // clean
20+
}
21+
22+
void modify(int* ptr) { // $ ast-def=ptr
23+
int* deref = ptr;
24+
int* other = &*deref;
25+
source_ref(other);
26+
}
27+
28+
void test_output() {
29+
int x = 0;
30+
modify(&x);
31+
sink(x); // $ ir MISSING: ast
32+
}
33+
34+
void modify_copy_of_pointer(int* p, unsigned len) { // $ ast-def=p
35+
int* p2 = new int[len];
36+
for(unsigned i = 0; i < len; ++i) {
37+
p2[i] = p[i];
38+
}
39+
40+
source_ref(p2);
41+
}
42+
43+
void test_modify_copy_of_pointer() {
44+
int x[10];
45+
modify_copy_of_pointer(x, 10);
46+
sink(x[0]); // $ SPURIOUS: ast // clean
47+
}
48+
49+
void modify_pointer(int* p, unsigned len) { // $ ast-def=p
50+
int** p2 = &p;
51+
for(unsigned i = 0; i < len; ++i) {
52+
*p2[i] = p[i];
53+
}
54+
55+
source_ref(*p2);
56+
}
57+
58+
void test_modify_of_pointer() {
59+
int x[10];
60+
modify_pointer(x, 10);
61+
sink(x[0]); // $ ast,ir
62+
}
63+
64+
char* strdup(const char* p);
65+
66+
void modify_copy_via_strdup(char* p) { // $ ast-def=p
67+
char* p2 = strdup(p);
68+
source_ref(p2);
69+
}
70+
71+
void test_modify_copy_via_strdup(char* p) { // $ ast-def=p
72+
modify_copy_via_strdup(p);
73+
sink(*p); // $ SPURIOUS: ir
74+
}
75+
76+
int* deref(int** p) { // $ ast-def=p
77+
int* q = *p;
78+
return q;
79+
}
80+
81+
void test1() {
82+
int x = 0;
83+
int* p = &x;
84+
deref(&p)[0] = source();
85+
sink(*p); // $ ir MISSING: ast
86+
}
87+
88+
89+
void addtaint1(int* q) { // $ ast-def=q ir-def=*q
90+
*q = source();
91+
}
92+
93+
void addtaint2(int** p) { // $ ast-def=p
94+
int* q = *p;
95+
addtaint1(q);
96+
}
97+
98+
void test2() {
99+
int x = 0;
100+
int* p = &x;
101+
addtaint2(&p);
102+
sink(*p); // $ ir MISSING: ast
103+
}
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
WARNING: Module DataFlow has been deprecated and may be removed in future (has-parameter-flow-out.ql:5,18-61)
2-
failures
32
testFailures
3+
failures

cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected

+7-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ astFlow
2828
| dispatch.cpp:10:37:10:42 | call to source | dispatch.cpp:44:15:44:24 | call to notSource2 |
2929
| dispatch.cpp:37:19:37:24 | call to source | dispatch.cpp:11:38:11:38 | x |
3030
| dispatch.cpp:45:18:45:23 | call to source | dispatch.cpp:11:38:11:38 | x |
31+
| flowOut.cpp:44:7:44:7 | x | flowOut.cpp:46:8:46:11 | access to array |
32+
| flowOut.cpp:59:7:59:7 | x | flowOut.cpp:61:8:61:11 | access to array |
3133
| globals.cpp:5:17:5:22 | call to source | globals.cpp:6:10:6:14 | local |
3234
| lambdas.cpp:8:10:8:15 | call to source | lambdas.cpp:14:3:14:6 | t |
3335
| lambdas.cpp:8:10:8:15 | call to source | lambdas.cpp:18:8:18:8 | call to operator() |
@@ -170,7 +172,11 @@ irFlow
170172
| dispatch.cpp:107:17:107:22 | call to source | dispatch.cpp:96:8:96:8 | x |
171173
| dispatch.cpp:140:8:140:13 | call to source | dispatch.cpp:96:8:96:8 | x |
172174
| dispatch.cpp:144:8:144:13 | call to source | dispatch.cpp:96:8:96:8 | x |
173-
| flowOut.cpp:5:16:5:21 | call to source | flowOut.cpp:19:9:19:9 | x |
175+
| flowOut.cpp:5:16:5:21 | call to source | flowOut.cpp:31:9:31:9 | x |
176+
| flowOut.cpp:5:16:5:21 | call to source | flowOut.cpp:61:8:61:11 | access to array |
177+
| flowOut.cpp:8:16:8:23 | call to source | flowOut.cpp:73:8:73:9 | * ... |
178+
| flowOut.cpp:84:18:84:23 | call to source | flowOut.cpp:85:8:85:9 | * ... |
179+
| flowOut.cpp:90:8:90:13 | call to source | flowOut.cpp:102:8:102:9 | * ... |
174180
| globals.cpp:5:17:5:22 | call to source | globals.cpp:6:10:6:14 | local |
175181
| globals.cpp:13:23:13:28 | call to source | globals.cpp:12:10:12:24 | flowTestGlobal1 |
176182
| globals.cpp:23:23:23:28 | call to source | globals.cpp:19:10:19:24 | flowTestGlobal2 |

cpp/ql/test/library-tests/dataflow/dataflow-tests/uninitialized.expected

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
| flowOut.cpp:44:7:44:7 | x | flowOut.cpp:45:26:45:26 | x |
2+
| flowOut.cpp:44:7:44:7 | x | flowOut.cpp:46:8:46:8 | x |
3+
| flowOut.cpp:59:7:59:7 | x | flowOut.cpp:60:18:60:18 | x |
4+
| flowOut.cpp:59:7:59:7 | x | flowOut.cpp:61:8:61:8 | x |
15
| ref.cpp:53:9:53:10 | x1 | ref.cpp:55:19:55:20 | x1 |
26
| ref.cpp:53:9:53:10 | x1 | ref.cpp:56:10:56:11 | x1 |
37
| ref.cpp:53:13:53:14 | x2 | ref.cpp:58:15:58:16 | x2 |

shared/dataflow/codeql/dataflow/DataFlow.qll

+7
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,13 @@ signature module InputSig {
170170

171171
predicate simpleLocalFlowStep(Node node1, Node node2);
172172

173+
/**
174+
* Holds if the data-flow step from `node1` to `node2` can be used to
175+
* determine where side-effects may return from a callable.
176+
*/
177+
bindingset[node1, node2]
178+
default predicate validParameterAliasStep(Node node1, Node node2) { any() }
179+
173180
/**
174181
* Holds if data can flow from `node1` to `node2` through a non-local step
175182
* that does not follow a call edge. For example, a step through a global

shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll

+4-2
Original file line numberDiff line numberDiff line change
@@ -551,7 +551,8 @@ module MakeImplCommon<InputSig Lang> {
551551
// local flow
552552
exists(Node mid |
553553
parameterValueFlowCand(p, mid, read) and
554-
simpleLocalFlowStep(mid, node)
554+
simpleLocalFlowStep(mid, node) and
555+
validParameterAliasStep(mid, node)
555556
)
556557
or
557558
// read
@@ -670,7 +671,8 @@ module MakeImplCommon<InputSig Lang> {
670671
// local flow
671672
exists(Node mid |
672673
parameterValueFlow(p, mid, read) and
673-
simpleLocalFlowStep(mid, node)
674+
simpleLocalFlowStep(mid, node) and
675+
validParameterAliasStep(mid, node)
674676
)
675677
or
676678
// read

0 commit comments

Comments
 (0)