-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[analyzer] Model overflow builtins #102602
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-static-analyzer-1 Author: Pavel Skripkin (pskrgag) ChangesAdd basic support for These helps a lot for checking custom calloc-like functions with inlinable body. Without such support code like #include <stddef.h>
#include <stdlib.h>
static void *myMalloc(size_t a1, size_t a2)
{
size_t res;
if (__builtin_smul_overflow(a1, a2, &res))
return NULL;
return malloc(res);
}
void test(void)
{
char *ptr = myMalloc(10, 1);
ptr[20] = 10;
} does not trigger any warnings. Full diff: https://github.com/llvm/llvm-project/pull/102602.diff 3 Files Affected:
diff --git a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
index b198b1c2ff4d11..0c8b9fa3d947b0 100644
--- a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
@@ -21,16 +21,67 @@
#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
using namespace clang;
using namespace ento;
namespace {
+QualType getOverflowBuiltinResultType(const CallEvent &Call) {
+ assert(Call.getNumArgs() == 3);
+
+ return Call.getArgExpr(2)->getType()->getPointeeType();
+}
+
+QualType getOverflowBuiltinResultType(const CallEvent &Call, CheckerContext &C,
+ unsigned BI) {
+ assert(Call.getNumArgs() == 3);
+
+ ASTContext &Ast = C.getASTContext();
+
+ switch (BI) {
+ case Builtin::BI__builtin_smul_overflow:
+ case Builtin::BI__builtin_ssub_overflow:
+ case Builtin::BI__builtin_sadd_overflow:
+ return Ast.IntTy;
+ case Builtin::BI__builtin_smull_overflow:
+ case Builtin::BI__builtin_ssubl_overflow:
+ case Builtin::BI__builtin_saddl_overflow:
+ return Ast.LongTy;
+ case Builtin::BI__builtin_smulll_overflow:
+ case Builtin::BI__builtin_ssubll_overflow:
+ case Builtin::BI__builtin_saddll_overflow:
+ return Ast.LongLongTy;
+ case Builtin::BI__builtin_umul_overflow:
+ case Builtin::BI__builtin_usub_overflow:
+ case Builtin::BI__builtin_uadd_overflow:
+ return Ast.UnsignedIntTy;
+ case Builtin::BI__builtin_umull_overflow:
+ case Builtin::BI__builtin_usubl_overflow:
+ case Builtin::BI__builtin_uaddl_overflow:
+ return Ast.UnsignedLongTy;
+ case Builtin::BI__builtin_umulll_overflow:
+ case Builtin::BI__builtin_usubll_overflow:
+ case Builtin::BI__builtin_uaddll_overflow:
+ return Ast.UnsignedLongLongTy;
+ case Builtin::BI__builtin_mul_overflow:
+ case Builtin::BI__builtin_sub_overflow:
+ case Builtin::BI__builtin_add_overflow:
+ return getOverflowBuiltinResultType(Call);
+ default:
+ assert(false && "Unknown overflow builtin");
+ }
+}
+
class BuiltinFunctionChecker : public Checker<eval::Call> {
public:
bool evalCall(const CallEvent &Call, CheckerContext &C) const;
+ void HandleOverflowBuiltin(const CallEvent &Call, CheckerContext &C,
+ BinaryOperator::Opcode Op,
+ QualType ResultType) const;
private:
// From: clang/include/clang/Basic/Builtins.def
@@ -50,6 +101,44 @@ class BuiltinFunctionChecker : public Checker<eval::Call> {
} // namespace
+void BuiltinFunctionChecker::HandleOverflowBuiltin(const CallEvent &Call,
+ CheckerContext &C,
+ BinaryOperator::Opcode Op,
+ QualType ResultType) const {
+ // All __builtin_*_overflow functions take 3 argumets.
+ assert(Call.getNumArgs() == 3);
+
+ ProgramStateRef State = C.getState();
+ SValBuilder &SVB = C.getSValBuilder();
+ const Expr *CE = Call.getOriginExpr();
+
+ SVal Arg1 = Call.getArgSVal(0);
+ SVal Arg2 = Call.getArgSVal(1);
+
+ SVal RetVal = SVB.evalBinOp(State, Op, Arg1, Arg2, ResultType);
+
+ // TODO: Handle overflows with values that known to overflow. Like INT_MAX + 1
+ // should not produce state for non-overflow case and threat it as
+ // unreachable.
+
+ // Handle non-overflow case.
+ {
+ ProgramStateRef StateSuccess =
+ State->BindExpr(CE, C.getLocationContext(), SVB.makeTruthVal(false));
+
+ if (auto L = Call.getArgSVal(2).getAs<Loc>())
+ StateSuccess = StateSuccess->bindLoc(*L, RetVal, C.getLocationContext());
+
+ C.addTransition(StateSuccess);
+ }
+
+ // Handle overflow case.
+ {
+ C.addTransition(
+ State->BindExpr(CE, C.getLocationContext(), SVB.makeTruthVal(true)));
+ }
+}
+
bool BuiltinFunctionChecker::isBuiltinLikeFunction(
const CallEvent &Call) const {
const auto *FD = llvm::dyn_cast_or_null<FunctionDecl>(Call.getDecl());
@@ -82,10 +171,41 @@ bool BuiltinFunctionChecker::evalCall(const CallEvent &Call,
return true;
}
- switch (FD->getBuiltinID()) {
+ unsigned BI = FD->getBuiltinID();
+
+ switch (BI) {
default:
return false;
-
+ case Builtin::BI__builtin_mul_overflow:
+ case Builtin::BI__builtin_smul_overflow:
+ case Builtin::BI__builtin_smull_overflow:
+ case Builtin::BI__builtin_smulll_overflow:
+ case Builtin::BI__builtin_umul_overflow:
+ case Builtin::BI__builtin_umull_overflow:
+ case Builtin::BI__builtin_umulll_overflow:
+ HandleOverflowBuiltin(Call, C, BO_Mul,
+ getOverflowBuiltinResultType(Call, C, BI));
+ return true;
+ case Builtin::BI__builtin_sub_overflow:
+ case Builtin::BI__builtin_ssub_overflow:
+ case Builtin::BI__builtin_ssubl_overflow:
+ case Builtin::BI__builtin_ssubll_overflow:
+ case Builtin::BI__builtin_usub_overflow:
+ case Builtin::BI__builtin_usubl_overflow:
+ case Builtin::BI__builtin_usubll_overflow:
+ HandleOverflowBuiltin(Call, C, BO_Sub,
+ getOverflowBuiltinResultType(Call, C, BI));
+ return true;
+ case Builtin::BI__builtin_add_overflow:
+ case Builtin::BI__builtin_sadd_overflow:
+ case Builtin::BI__builtin_saddl_overflow:
+ case Builtin::BI__builtin_saddll_overflow:
+ case Builtin::BI__builtin_uadd_overflow:
+ case Builtin::BI__builtin_uaddl_overflow:
+ case Builtin::BI__builtin_uaddll_overflow:
+ HandleOverflowBuiltin(Call, C, BO_Add,
+ getOverflowBuiltinResultType(Call, C, BI));
+ return true;
case Builtin::BI__builtin_assume:
case Builtin::BI__assume: {
assert (Call.getNumArgs() > 0);
diff --git a/clang/test/Analysis/builtin_overflow.c b/clang/test/Analysis/builtin_overflow.c
new file mode 100644
index 00000000000000..3b8639aa450046
--- /dev/null
+++ b/clang/test/Analysis/builtin_overflow.c
@@ -0,0 +1,65 @@
+// RUN: %clang_analyze_cc1 -triple x86_64-unknown-unknown -verify %s \
+// RUN: -analyzer-checker=core,debug.ExprInspection
+
+#define NULL ((void *)0)
+#define INT_MAX __INT_MAX__
+
+void clang_analyzer_dump_int(int);
+
+void test1(void)
+{
+ int res;
+
+ if (__builtin_add_overflow(10, 20, &res)) {
+ clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}}
+ return;
+ }
+
+ clang_analyzer_dump_int(res); //expected-warning{{30}}
+}
+
+void test2(void)
+{
+ int res;
+
+ __builtin_add_overflow(10, 20, &res);
+ clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}} expected-warning{{S32b}}
+}
+
+void test3(void)
+{
+ int res;
+
+ if (__builtin_sub_overflow(10, 20, &res)) {
+ clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}}
+ return;
+ }
+
+ clang_analyzer_dump_int(res); //expected-warning{{-10}}
+}
+
+void test4(void)
+{
+ int res;
+
+ if (__builtin_sub_overflow(10, 20, &res)) {
+ clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}}
+ return;
+ }
+
+ if (res != -10) {
+ *(volatile char *)NULL; //no warning
+ }
+}
+
+void test5(void)
+{
+ int res;
+
+ if (__builtin_mul_overflow(10, 20, &res)) {
+ clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}}
+ return;
+ }
+
+ clang_analyzer_dump_int(res); //expected-warning{{200}}
+}
diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c b/clang/test/Analysis/out-of-bounds-diagnostics.c
index de70e483add1c0..73b56e7a8ba4db 100644
--- a/clang/test/Analysis/out-of-bounds-diagnostics.c
+++ b/clang/test/Analysis/out-of-bounds-diagnostics.c
@@ -278,6 +278,23 @@ int *mallocRegion(void) {
return mem;
}
+int *custom_calloc(size_t a, size_t b) {
+ size_t res;
+ if (__builtin_mul_overflow(a, b, &res))
+ return 0;
+
+ return malloc(res);
+}
+
+int *mallocRegionOverflow(void) {
+ int *mem = (int*)custom_calloc(4, 10);
+
+ mem[20] = 10;
+ // expected-warning@-1 {{Out of bound access to memory after the end of the heap area}}
+ // expected-note@-2 {{Access of the heap area at index 20, while it holds only 10 'int' elements}}
+ return mem;
+}
+
int *mallocRegionDeref(void) {
int *mem = (int*)malloc(2*sizeof(int));
|
SVal RetVal = SVB.evalBinOp(State, Op, Arg1, Arg2, ResultType); | ||
|
||
// TODO: Handle overflows with values that known to overflow. Like INT_MAX + 1 | ||
// should not produce state for non-overflow case and threat it as |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't find a way to check this. We could do old-style thing like res < 0 ? lhs > rhs : lhs < rhs
, but I guess, SVB
should take care of it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought about this for some time and checked the implementation of evalBinOp
(more precisely SValBuilder:evalBinOpNN
, the case that's relevant here), but unfortunately I don't see a clear solution.
You are right that SVB
should take care of this, but unfortunately that does not happen currently.
Currently the analyzer (and SValBuilder
in particular) has its own rigid opinion about overflow handling: it allows overflow on all numeric operations (including signed ones, where it's undefined behavior according to the standard) and models it without splitting the state, so e.g. if x
and y
are signed char
values within the range 50 <= x, y <= 100
, then it calculates that the possible values of x+y
are
It would be good to enhance this behavior by tagging the ranges within the RangeSet
with "did overflow happen?" bits that would let us perform a state split. This is theoretically doable (I don't see anything that would block this), but would be a complex refactoring effort (we'd need to pass along this "did we overflow?" bit through all the spaghetti code).
Without this refactoring, you can implement a workaround that calls SValBuilder::getMinValue()
and SValBuilder::getMaxValue()
to get the minimal and maximal values of the operands (as llvm::APSInt
values, that is, arbitrary precision signed integers) and then uses these to determine which of the two branches (overflow, no-overflow) are possible.
This workaround solution would be significantly better than the current "always act as if both branches were possible", but it is not sufficient for deducing the set of possible results on the no-overflow branch. (That is, if x
and y
are signed char
values within the range 50 <= x, y <= 100
, then this workaround would create two branches: one where __builtin_add_overflow
returns true, and and one where it returns false
and stores a symbol with range
(I know that this is a very complicated area, feel free to ask me if you need help with it!)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
evalBinOp models wrapping semantics - even for signed arithmetic (where it would be UB).
We don't have a similar evaluation model (operating on symbolic values, aka. SVals) which operates with mathematical (non-wrapping) semantics. This is unfortunate.
Initially I wanted to recommend SVB.getKnownValue(State, Val)
, but I believe it's not too likely that one would use malloc
with two concrete int params. So this wouldn't really enable many use-cases.
I can agree with @NagyDonat, about using getMinValue
and friends. That would enable us using the constraints we have in the State, and unlock symbolic cases.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So.... I've pushed what I came up with for handling overflow, but during test writing I found smth I don't understand. I've decided to push current state, since it's easier to show code than describe it =)
My current problem is following code:
if (a > 10)
return;
if (b > 10)
return;
// clang_analyzer_eval(a + b < 30); <--- Prints 1 and 0, but why ???
For some reason constraints do not work as expected. And because of that my overflow checker splits state where it shouldn't.... I'd really appreciate tips
My approach to check is to promote result type to x2 width, perform operation on it and then check if it overflows real result type. I've done 2 evalBinOp
, since I didn't find a way to cast between 2 results.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
clang_analyzer_eval(a + b < 30); <--- Prints 1 and 0, but why ???
Assuming that a
and b
are signed integers, they can be very negative, and then their sum can be a positive value above 30 (after an underflow). This means that both boolean values are possible for the expression a + b < 30
, and the analyzer represents this by printing both 1 and 0.
(If I understand this correctly, we get two definite numbers instead of one range because the on-by-default eagerlyAssume
mode causes a state split when it sees the comparison operator in a + b < 30
, despite the fact that this is not in a conditional expression.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Assuming that a and b are signed integers
Oh, sorry, I forgot to mention that a
and b
are unsigned. I've showed this in test_uadd_overflow_contraints
test in this pr
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did some digging in RangedConstraintManager
and it seems that algebraic expressions that involve two symbolic values are handled as black boxes by the analyzer: in your example it creates a SymSymExpr
that is known to be the sum of a
and b
, but we don't have any logic which would constrain the potential values of this SymSymExpr
based on the constraints known about the operands. (The analyzer can propagate information in the easier case when it knows the exact value of one of the operands.)
I don't know why do we have this shameful limitation... I'd guess that implementing the necessary RangeSet
operation wouldn't be prohibitively difficult, but there may be performance implications, and perhaps well-constrained (but not exactly known) values are not common enough to make this a valuable investment.
(It's also possible that we do have this logic, but it's tucked away somewhere else where I didn't find it; although your experimental results show that this is unlikely.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought the ConstraintAssignor does the decomposition of the SymSymExpr and tracks the potential value range. It's probably somewhat limited though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for adding support for these functions! The code LGTM overall, except for one mostly theoretical issue (about the use of assert
) which I described in an inline comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks pretty good!
However, for this to land, we need to try harder to prevent state-splits.
We must prove in more cases (at least in simple symbolic cases) that no overflow can happen.
But in principle, I like that we do a split - as the user expects this to overflow sometimes - given the use of the builtin.
Speaking of that, do we even propagate taint across such builtin calls? I think it would make sense to taint the result if any of the inputs are tainted. I'd expect such builtins used around user inputs, and critical code - so probably it worth propagating taint.
SVal RetVal = SVB.evalBinOp(State, Op, Arg1, Arg2, ResultType); | ||
|
||
// TODO: Handle overflows with values that known to overflow. Like INT_MAX + 1 | ||
// should not produce state for non-overflow case and threat it as |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
evalBinOp models wrapping semantics - even for signed arithmetic (where it would be UB).
We don't have a similar evaluation model (operating on symbolic values, aka. SVals) which operates with mathematical (non-wrapping) semantics. This is unfortunate.
Initially I wanted to recommend SVB.getKnownValue(State, Val)
, but I believe it's not too likely that one would use malloc
with two concrete int params. So this wouldn't really enable many use-cases.
I can agree with @NagyDonat, about using getMinValue
and friends. That would enable us using the constraints we have in the State, and unlock symbolic cases.
✅ With the latest revision this PR passed the C/C++ code formatter. |
Try using the Github merge workflow to avoid doing force-pushes. Those are destructive for inline comments done for the PR. On force-push, GH can't follow to which line it should migrate the existing inline comments, thus drops them. You should just do a fetch main, merge main, push branch (without force). |
Sorry again for fp =(. I am getting used to rebase+fp workflow, since I've never worked with gh flow. I am trying to switch my brain while developing llvm, but sometimes I mess up. |
Could you, please, check the lattest vestion, if you have time? |
I'm on vacation for some time now. |
Oh, sorry for ping then. PR is not urgent at all. Have a fun vacation! =) |
gentle ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks really good. I only had some notes w.r.t. note tags.
|
||
if (a != 10) | ||
return; | ||
if (b != 10) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Such constraints we call "perfect constraints" as they narrow down the value domain to a single value. After this, the engine will fold all uses of this into a concreteInt, eliminating the symbolic value.
Consequently, perfect constraints are different to common constraints. They should have separate tests.
That said, two symbolic values with half side constraints are missing. x>=(int max -2, y>= 10 for instance.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've tried to add smth like this, but it fails even for more simple constraint like if (a > 10 || b > 10) return
; for unsigned case. See #102602 (comment).
Hello! Is there anything else that stops this PR? Thanks! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I re-reviewed the commit and added two very minor remarks, otherwise LGTM.
Co-authored-by: Donát Nagy <[email protected]>
gentle ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks fantastic! I have no functional remarks. I only sprinkled it with some nits, but other than those it's perfect.
It's already good, I'll let you decide if you want to do the final touches or not as those may be more subjective suggestions.
SVal RetVal = SVB.evalBinOp(State, Op, Arg1, Arg2, ResultType); | ||
|
||
// TODO: Handle overflows with values that known to overflow. Like INT_MAX + 1 | ||
// should not produce state for non-overflow case and threat it as |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought the ConstraintAssignor does the decomposition of the SymSymExpr and tracks the potential value range. It's probably somewhat limited though.
{ | ||
int res; | ||
|
||
if (__builtin_add_overflow(a, b, &res)) // expected-note {{Assuming overflow does not happen}} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe it's just me, but I find this phrasing a bit weird.
To me it would read a lot more natural if it was Assuming overflow
or Assuming no overflow
. Maybe suffixed with ... happens
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree that the phrasing suggested by @steakhal would be better, but I think that the original is also acceptable.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed wording. I definitely like your suggestion
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also LGTM, sorry for not responding earlier. I agree with the minor suggestions of @steakhal .
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still LGTM
Ah, this dangling reference to rvalue... I do remember why I bind |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/11/builds/6049 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/18/builds/4864 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/6544 Here is the relevant piece of the build log for the reference
|
FYI this PR likely caused a crash, reported in #111147. |
Add basic support for
builtin_*_overflow
primitives.These helps a lot for checking custom calloc-like functions with inlinable body. Without such support code like
does not trigger any warnings.