Skip to content

Commit 639b29b

Browse files
[INLINER] allow inlining of blockaddresses if sole uses are callbrs
Summary: It was supposed that Ref LazyCallGraph::Edge's were being inserted by inlining, but that doesn't seem to be the case. Instead, it seems that there was no test for a blockaddress Constant in an instruction that referenced the function that contained the instruction. Ex: ``` define void @f() { %1 = alloca i8*, align 8 2: store i8* blockaddress(@f, %2), i8** %1, align 8 ret void } ``` When iterating blockaddresses, do not add the function they refer to back to the worklist if the blockaddress is referring to the contained function (as opposed to an external function). Because blockaddress has sligtly different semantics than GNU C's address of labels, there are 3 cases that can occur with blockaddress, where only 1 can happen in GNU C due to C's scoping rules: * blockaddress is within the function it refers to (possible in GNU C). * blockaddress is within a different function than the one it refers to (not possible in GNU C). * blockaddress is used in to declare a global (not possible in GNU C). The second case is tested in: ``` $ ./llvm/build/unittests/Analysis/AnalysisTests \ --gtest_filter=LazyCallGraphTest.HandleBlockAddress ``` This patch adjusts the iteration of blockaddresses in LazyCallGraph::visitReferences to not revisit the blockaddresses function in the first case. The Linux kernel contains code that's not semantically valid at -O0; specifically code passed to asm goto. It requires that asm goto be inline-able. This patch conservatively does not attempt to handle the more general case of inlining blockaddresses that have non-callbr users (pr/39560). https://bugs.llvm.org/show_bug.cgi?id=39560 https://bugs.llvm.org/show_bug.cgi?id=40722 ClangBuiltLinux/linux#6 https://reviews.llvm.org/rL212077 Reviewers: jyknight, eli.friedman, chandlerc Reviewed By: chandlerc Subscribers: george.burgess.iv, nathanchance, mgorny, craig.topper, mengxu.gatech, void, mehdi_amini, E5ten, chandlerc, efriedma, eraman, hiraditya, haicheng, pirama, llvm-commits, srhines Tags: #llvm Differential Revision: https://reviews.llvm.org/D58260 llvm-svn: 361173
1 parent 4d05a97 commit 639b29b

File tree

5 files changed

+199
-15
lines changed

5 files changed

+199
-15
lines changed

llvm/include/llvm/Analysis/LazyCallGraph.h

+20-5
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "llvm/ADT/DenseMap.h"
3939
#include "llvm/ADT/Optional.h"
4040
#include "llvm/ADT/PointerIntPair.h"
41+
#include "llvm/ADT/STLExtras.h"
4142
#include "llvm/ADT/SetVector.h"
4243
#include "llvm/ADT/SmallPtrSet.h"
4344
#include "llvm/ADT/SmallVector.h"
@@ -1082,12 +1083,26 @@ class LazyCallGraph {
10821083
continue;
10831084
}
10841085

1086+
// The blockaddress constant expression is a weird special case, we can't
1087+
// generically walk its operands the way we do for all other constants.
10851088
if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
1086-
// The blockaddress constant expression is a weird special case, we
1087-
// can't generically walk its operands the way we do for all other
1088-
// constants.
1089-
if (Visited.insert(BA->getFunction()).second)
1090-
Worklist.push_back(BA->getFunction());
1089+
// If we've already visited the function referred to by the block
1090+
// address, we don't need to revisit it.
1091+
if (Visited.count(BA->getFunction()))
1092+
continue;
1093+
1094+
// If all of the blockaddress' users are instructions within the
1095+
// referred to function, we don't need to insert a cycle.
1096+
if (llvm::all_of(BA->users(), [&](User *U) {
1097+
if (Instruction *I = dyn_cast<Instruction>(U))
1098+
return I->getFunction() == BA->getFunction();
1099+
return false;
1100+
}))
1101+
continue;
1102+
1103+
// Otherwise we should go visit the referred to function.
1104+
Visited.insert(BA->getFunction());
1105+
Worklist.push_back(BA->getFunction());
10911106
continue;
10921107
}
10931108

llvm/lib/Analysis/InlineCost.cpp

+17-10
Original file line numberDiff line numberDiff line change
@@ -1830,14 +1830,18 @@ InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
18301830
if (BB->empty())
18311831
continue;
18321832

1833-
// Disallow inlining a blockaddress. A blockaddress only has defined
1834-
// behavior for an indirect branch in the same function, and we do not
1835-
// currently support inlining indirect branches. But, the inliner may not
1836-
// see an indirect branch that ends up being dead code at a particular call
1837-
// site. If the blockaddress escapes the function, e.g., via a global
1838-
// variable, inlining may lead to an invalid cross-function reference.
1833+
// Disallow inlining a blockaddress with uses other than strictly callbr.
1834+
// A blockaddress only has defined behavior for an indirect branch in the
1835+
// same function, and we do not currently support inlining indirect
1836+
// branches. But, the inliner may not see an indirect branch that ends up
1837+
// being dead code at a particular call site. If the blockaddress escapes
1838+
// the function, e.g., via a global variable, inlining may lead to an
1839+
// invalid cross-function reference.
1840+
// FIXME: pr/39560: continue relaxing this overt restriction.
18391841
if (BB->hasAddressTaken())
1840-
return "blockaddress";
1842+
for (User *U : BlockAddress::get(&*BB)->users())
1843+
if (!isa<CallBrInst>(*U))
1844+
return "blockaddress used outside of callbr";
18411845

18421846
// Analyze the cost of this block. If we blow through the threshold, this
18431847
// returns false, and we can bail on out.
@@ -2081,13 +2085,16 @@ InlineCost llvm::getInlineCost(
20812085
InlineResult llvm::isInlineViable(Function &F) {
20822086
bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
20832087
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
2084-
// Disallow inlining of functions which contain indirect branches or
2085-
// blockaddresses.
2088+
// Disallow inlining of functions which contain indirect branches.
20862089
if (isa<IndirectBrInst>(BI->getTerminator()))
20872090
return "contains indirect branches";
20882091

2092+
// Disallow inlining of blockaddresses which are used by non-callbr
2093+
// instructions.
20892094
if (BI->hasAddressTaken())
2090-
return "uses block address";
2095+
for (User *U : BlockAddress::get(&*BI)->users())
2096+
if (!isa<CallBrInst>(*U))
2097+
return "blockaddress used outside of callbr";
20912098

20922099
for (auto &II : *BI) {
20932100
CallBase *Call = dyn_cast<CallBase>(&II);

llvm/test/Transforms/Inline/blockaddress.ll

+79
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,82 @@ bb:
4949
}
5050

5151
@run.bb = global [1 x i8*] zeroinitializer
52+
53+
; Check that a function referenced by a global blockaddress wont be inlined,
54+
; even if it contains a callbr. We might be able to relax this in the future
55+
; as long as the global blockaddress is updated correctly.
56+
@ba = internal global i8* blockaddress(@foo, %7), align 8
57+
define internal i32 @foo(i32) {
58+
%2 = alloca i32, align 4
59+
%3 = alloca i32, align 4
60+
store i32 %0, i32* %3, align 4
61+
%4 = load i32, i32* %3, align 4
62+
callbr void asm sideeffect "testl $0, $0; jne ${1:l};", "r,X,X,~{dirflag},~{fpsr},~{flags}"(i32 %4, i8* blockaddress(@foo, %7), i8* blockaddress(@foo, %6)) #1
63+
to label %5 [label %7, label %6]
64+
65+
; <label>:5: ; preds = %1
66+
store i32 0, i32* %2, align 4
67+
br label %8
68+
69+
; <label>:6: ; preds = %1
70+
store i32 1, i32* %2, align 4
71+
br label %8
72+
73+
; <label>:7: ; preds = %1
74+
store i32 2, i32* %2, align 4
75+
br label %8
76+
77+
; <label>:8: ; preds = %7, %6, %5
78+
%9 = load i32, i32* %2, align 4
79+
ret i32 %9
80+
}
81+
define dso_local i32 @bar() {
82+
%1 = call i32 @foo(i32 0)
83+
ret i32 %1
84+
}
85+
86+
; CHECK: define dso_local i32 @bar() {
87+
; CHECK: %1 = call i32 @foo(i32 0)
88+
; CHECK: ret i32 %1
89+
; CHECK: }
90+
91+
; Triple check that even with a global aggregate whose member is a blockaddress,
92+
; we still don't inline referred to functions.
93+
94+
%struct.foo = type { i8* }
95+
96+
@my_foo = dso_local global %struct.foo { i8* blockaddress(@baz, %7) }
97+
98+
define internal i32 @baz(i32) {
99+
%2 = alloca i32, align 4
100+
%3 = alloca i32, align 4
101+
store i32 %0, i32* %3, align 4
102+
%4 = load i32, i32* %3, align 4
103+
callbr void asm sideeffect "testl $0, $0; jne ${1:l};", "r,X,X,~{dirflag},~{fpsr},~{flags}"(i32 %4, i8* blockaddress(@baz, %7), i8* blockaddress(@baz, %6)) #1
104+
to label %5 [label %7, label %6]
105+
106+
; <label>:5: ; preds = %1
107+
store i32 0, i32* %2, align 4
108+
br label %8
109+
110+
; <label>:6: ; preds = %1
111+
store i32 1, i32* %2, align 4
112+
br label %8
113+
114+
; <label>:7: ; preds = %1
115+
store i32 2, i32* %2, align 4
116+
br label %8
117+
118+
; <label>:8: ; preds = %7, %6, %5
119+
%9 = load i32, i32* %2, align 4
120+
ret i32 %9
121+
}
122+
define dso_local i32 @quux() {
123+
%1 = call i32 @baz(i32 0)
124+
ret i32 %1
125+
}
126+
127+
; CHECK: define dso_local i32 @quux() {
128+
; CHECK: %1 = call i32 @baz(i32 0)
129+
; CHECK: ret i32 %1
130+
; CHECK: }

llvm/test/Transforms/Inline/callbr.ll

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; RUN: opt -inline -S < %s | FileCheck %s
2+
; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s
3+
4+
define dso_local i32 @main() #0 {
5+
%1 = alloca i32, align 4
6+
store i32 0, i32* %1, align 4
7+
%2 = call i32 @t32(i32 0)
8+
ret i32 %2
9+
}
10+
11+
define internal i32 @t32(i32) #0 {
12+
%2 = alloca i32, align 4
13+
%3 = alloca i32, align 4
14+
store i32 %0, i32* %3, align 4
15+
%4 = load i32, i32* %3, align 4
16+
callbr void asm sideeffect "testl $0, $0; jne ${1:l};", "r,X,X,~{dirflag},~{fpsr},~{flags}"(i32 %4, i8* blockaddress(@t32, %7), i8* blockaddress(@t32, %6)) #1
17+
to label %5 [label %7, label %6]
18+
19+
; <label>:5: ; preds = %1
20+
store i32 0, i32* %2, align 4
21+
br label %8
22+
23+
; <label>:6: ; preds = %1
24+
store i32 1, i32* %2, align 4
25+
br label %8
26+
27+
; <label>:7: ; preds = %1
28+
store i32 2, i32* %2, align 4
29+
br label %8
30+
31+
; <label>:8: ; preds = %7, %6, %5
32+
%9 = load i32, i32* %2, align 4
33+
ret i32 %9
34+
}
35+
36+
; Check that @t32 no longer exists after inlining, as it has now been inlined
37+
; into @main.
38+
39+
; CHECK-NOT: @t32
40+
; CHECK: define dso_local i32 @main
41+
; CHECK: callbr void asm sideeffect "testl $0, $0; jne ${1:l};", "r,X,X,~{dirflag},~{fpsr},~{flags}"(i32 %6, i8* blockaddress(@main, %9), i8* blockaddress(@main, %8))
42+
; CHECK: to label %7 [label %9, label %8]
43+
; CHECK: 7:
44+
; CHECK-NEXT: store i32 0, i32* %1, align 4
45+
; CHECK-NEXT: br label %t32.exit
46+
; CHECK: 8:
47+
; CHECK-NEXT: store i32 1, i32* %1, align 4
48+
; CHECK-NEXT: br label %t32.exit
49+
; CHECK: 9:
50+
; CHECK-NEXT: store i32 2, i32* %1, align 4
51+
; CHECK-NEXT: br label %t32.exit
52+
; CHECK: t32.exit:
53+
; CHECK-NEXT: %10 = load i32, i32* %1, align 4
54+
; CHECK: ret i32 %10

llvm/unittests/Analysis/LazyCallGraphTest.cpp

+29
Original file line numberDiff line numberDiff line change
@@ -1977,6 +1977,35 @@ TEST(LazyCallGraphTest, HandleBlockAddress) {
19771977
EXPECT_TRUE(GRC.isParentOf(FRC));
19781978
}
19791979

1980+
// Test that a blockaddress that refers to itself creates no new RefSCC
1981+
// connections. https://bugs.llvm.org/show_bug.cgi?id=40722
1982+
TEST(LazyCallGraphTest, HandleBlockAddress2) {
1983+
LLVMContext Context;
1984+
std::unique_ptr<Module> M =
1985+
parseAssembly(Context, "define void @f() {\n"
1986+
" ret void\n"
1987+
"}\n"
1988+
"define void @g(i8** %ptr) {\n"
1989+
"bb:\n"
1990+
" store i8* blockaddress(@g, %bb), i8** %ptr\n"
1991+
" ret void\n"
1992+
"}\n");
1993+
LazyCallGraph CG = buildCG(*M);
1994+
1995+
CG.buildRefSCCs();
1996+
auto I = CG.postorder_ref_scc_begin();
1997+
LazyCallGraph::RefSCC &GRC = *I++;
1998+
LazyCallGraph::RefSCC &FRC = *I++;
1999+
EXPECT_EQ(CG.postorder_ref_scc_end(), I);
2000+
2001+
LazyCallGraph::Node &F = *CG.lookup(lookupFunction(*M, "f"));
2002+
LazyCallGraph::Node &G = *CG.lookup(lookupFunction(*M, "g"));
2003+
EXPECT_EQ(&FRC, CG.lookupRefSCC(F));
2004+
EXPECT_EQ(&GRC, CG.lookupRefSCC(G));
2005+
EXPECT_FALSE(GRC.isParentOf(FRC));
2006+
EXPECT_FALSE(FRC.isParentOf(GRC));
2007+
}
2008+
19802009
TEST(LazyCallGraphTest, ReplaceNodeFunction) {
19812010
LLVMContext Context;
19822011
// A graph with several different kinds of edges pointing at a particular

0 commit comments

Comments
 (0)