Skip to content

Commit cf8b959

Browse files
committed
BitcodeReader: Fix some BlockAddress forward reference corner cases
`BlockAddress`es are interesting in that they can reference basic blocks from *outside* the block's function. Since basic blocks are not global values, this presents particular challenges for lazy parsing. One corner case was found in PR11677 and fixed in r147425. In that case, a global variable references a block address. It's necessary to load the relevant function to resolve the forward reference before doing anything with the module. By inspection, I found (and have fixed here) two other cases: - An instruction from one function references a block address from another function, and only the first function is lazily loaded. I fixed this the same way as PR11677: by eagerly loading the referenced function. - A function whose block address is taken is dematerialized, leaving invalid references to it. I fixed this by refusing to dematerialize functions whose block addresses are taken (if you have to load it, you can't unload it). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214559 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent fc15e8a commit cf8b959

File tree

4 files changed

+134
-9
lines changed

4 files changed

+134
-9
lines changed

include/llvm/Bitcode/ReaderWriter.h

+1
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ namespace llvm {
160160
InvalidMultipleBlocks, // We found multiple blocks of a kind that should
161161
// have only one
162162
NeverResolvedValueFoundInFunction,
163+
NeverResolvedFunctionFromBlockAddress,
163164
InvalidValue // Invalid version, inst number, attr number, etc
164165
};
165166
inline std::error_code make_error_code(BitcodeError E) {

lib/Bitcode/Reader/BitcodeReader.cpp

+53-6
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,31 @@ enum {
3131
SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex
3232
};
3333

34-
void BitcodeReader::materializeForwardReferencedFunctions() {
34+
std::error_code BitcodeReader::materializeForwardReferencedFunctions() {
35+
if (WillMaterializeAllForwardRefs)
36+
return std::error_code();
37+
38+
// Prevent recursion.
39+
WillMaterializeAllForwardRefs = true;
40+
3541
while (!BlockAddrFwdRefs.empty()) {
3642
Function *F = BlockAddrFwdRefs.begin()->first;
37-
F->Materialize();
43+
assert(F && "Expected valid function");
44+
// Check for a function that isn't materializable to prevent an infinite
45+
// loop. When parsing a blockaddress stored in a global variable, there
46+
// isn't a trivial way to check if a function will have a body without a
47+
// linear search through FunctionsWithBodies, so just check it here.
48+
if (!F->isMaterializable())
49+
return Error(BitcodeError::NeverResolvedFunctionFromBlockAddress);
50+
51+
// Try to materialize F.
52+
if (std::error_code EC = Materialize(F))
53+
return EC;
3854
}
55+
56+
// Reset state.
57+
WillMaterializeAllForwardRefs = false;
58+
return std::error_code();
3959
}
4060

4161
void BitcodeReader::FreeState() {
@@ -1587,6 +1607,9 @@ std::error_code BitcodeReader::ParseConstants() {
15871607
if (!Fn)
15881608
return Error(BitcodeError::InvalidRecord);
15891609

1610+
// Don't let Fn get dematerialized.
1611+
BlockAddressesTaken.insert(Fn);
1612+
15901613
// If the function is already parsed we can insert the block address right
15911614
// away.
15921615
if (!Fn->empty()) {
@@ -3274,13 +3297,21 @@ std::error_code BitcodeReader::Materialize(GlobalValue *GV) {
32743297
}
32753298
}
32763299

3277-
return std::error_code();
3300+
// Bring in any functions that this function forward-referenced via
3301+
// blockaddresses.
3302+
return materializeForwardReferencedFunctions();
32783303
}
32793304

32803305
bool BitcodeReader::isDematerializable(const GlobalValue *GV) const {
32813306
const Function *F = dyn_cast<Function>(GV);
32823307
if (!F || F->isDeclaration())
32833308
return false;
3309+
3310+
// Dematerializing F would leave dangling references that wouldn't be
3311+
// reconnected on re-materialization.
3312+
if (BlockAddressesTaken.count(F))
3313+
return false;
3314+
32843315
return DeferredFunctionInfo.count(const_cast<Function*>(F));
32853316
}
32863317

@@ -3299,6 +3330,10 @@ void BitcodeReader::Dematerialize(GlobalValue *GV) {
32993330
std::error_code BitcodeReader::MaterializeModule(Module *M) {
33003331
assert(M == TheModule &&
33013332
"Can only Materialize the Module this BitcodeReader is attached to.");
3333+
3334+
// Promise to materialize all forward references.
3335+
WillMaterializeAllForwardRefs = true;
3336+
33023337
// Iterate over the module, deserializing any functions that are still on
33033338
// disk.
33043339
for (Module::iterator F = TheModule->begin(), E = TheModule->end();
@@ -3314,6 +3349,11 @@ std::error_code BitcodeReader::MaterializeModule(Module *M) {
33143349
if (NextUnreadBit)
33153350
ParseModule(true);
33163351

3352+
// Check that all block address forward references got resolved (as we
3353+
// promised above).
3354+
if (!BlockAddrFwdRefs.empty())
3355+
return Error(BitcodeError::NeverResolvedFunctionFromBlockAddress);
3356+
33173357
// Upgrade any intrinsic calls that slipped through (should not happen!) and
33183358
// delete the old functions to clean up. We can't do this unless the entire
33193359
// module is materialized because there could always be another function body
@@ -3431,6 +3471,8 @@ class BitcodeErrorCategoryType : public std::error_category {
34313471
return "Invalid multiple blocks";
34323472
case BitcodeError::NeverResolvedValueFoundInFunction:
34333473
return "Never resolved value found in function";
3474+
case BitcodeError::NeverResolvedFunctionFromBlockAddress:
3475+
return "Never resolved function from blockaddress";
34343476
case BitcodeError::InvalidValue:
34353477
return "Invalid value";
34363478
}
@@ -3455,13 +3497,18 @@ ErrorOr<Module *> llvm::getLazyBitcodeModule(MemoryBuffer *Buffer,
34553497
Module *M = new Module(Buffer->getBufferIdentifier(), Context);
34563498
BitcodeReader *R = new BitcodeReader(Buffer, Context);
34573499
M->setMaterializer(R);
3458-
if (std::error_code EC = R->ParseBitcodeInto(M)) {
3500+
3501+
auto cleanupOnError = [&](std::error_code EC) {
34593502
R->releaseBuffer(); // Never take ownership on error.
34603503
delete M; // Also deletes R.
34613504
return EC;
3462-
}
3505+
};
3506+
3507+
if (std::error_code EC = R->ParseBitcodeInto(M))
3508+
return cleanupOnError(EC);
34633509

3464-
R->materializeForwardReferencedFunctions();
3510+
if (std::error_code EC = R->materializeForwardReferencedFunctions())
3511+
return cleanupOnError(EC);
34653512

34663513
return M;
34673514
}

lib/Bitcode/Reader/BitcodeReader.h

+12-3
Original file line numberDiff line numberDiff line change
@@ -193,20 +193,29 @@ class BitcodeReader : public GVMaterializer {
193193
/// not need this flag.
194194
bool UseRelativeIDs;
195195

196+
/// True if all functions will be materialized, negating the need to process
197+
/// (e.g.) blockaddress forward references.
198+
bool WillMaterializeAllForwardRefs;
199+
200+
/// Functions that have block addresses taken. This is usually empty.
201+
SmallPtrSet<const Function *, 4> BlockAddressesTaken;
202+
196203
public:
197204
std::error_code Error(BitcodeError E) { return make_error_code(E); }
198205

199206
explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
200207
: Context(C), TheModule(nullptr), Buffer(buffer), LazyStreamer(nullptr),
201208
NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C),
202-
MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {}
209+
MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false),
210+
WillMaterializeAllForwardRefs(false) {}
203211
explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C)
204212
: Context(C), TheModule(nullptr), Buffer(nullptr), LazyStreamer(streamer),
205213
NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C),
206-
MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {}
214+
MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false),
215+
WillMaterializeAllForwardRefs(false) {}
207216
~BitcodeReader() { FreeState(); }
208217

209-
void materializeForwardReferencedFunctions();
218+
std::error_code materializeForwardReferencedFunctions();
210219

211220
void FreeState();
212221

unittests/Bitcode/BitReaderTest.cpp

+68
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,74 @@ TEST(BitReaderTest, MaterializeFunctionsForBlockAddr) { // PR11677
7070
" unreachable\n"
7171
"}\n");
7272
EXPECT_FALSE(verifyModule(*M, &dbgs()));
73+
74+
// Try (and fail) to dematerialize @func.
75+
M->getFunction("func")->Dematerialize();
76+
EXPECT_FALSE(M->getFunction("func")->empty());
77+
}
78+
79+
TEST(BitReaderTest, MaterializeFunctionsForBlockAddrInFunctionBefore) {
80+
SmallString<1024> Mem;
81+
82+
LLVMContext Context;
83+
std::unique_ptr<Module> M = getLazyModuleFromAssembly(
84+
Context, Mem, "define i8* @before() {\n"
85+
" ret i8* blockaddress(@func, %bb)\n"
86+
"}\n"
87+
"define void @other() {\n"
88+
" unreachable\n"
89+
"}\n"
90+
"define void @func() {\n"
91+
" unreachable\n"
92+
"bb:\n"
93+
" unreachable\n"
94+
"}\n");
95+
EXPECT_TRUE(M->getFunction("before")->empty());
96+
EXPECT_TRUE(M->getFunction("func")->empty());
97+
EXPECT_FALSE(verifyModule(*M, &dbgs()));
98+
99+
// Materialize @before, pulling in @func.
100+
EXPECT_FALSE(M->getFunction("before")->Materialize());
101+
EXPECT_FALSE(M->getFunction("func")->empty());
102+
EXPECT_TRUE(M->getFunction("other")->empty());
103+
EXPECT_FALSE(verifyModule(*M, &dbgs()));
104+
105+
// Try (and fail) to dematerialize @func.
106+
M->getFunction("func")->Dematerialize();
107+
EXPECT_FALSE(M->getFunction("func")->empty());
108+
EXPECT_FALSE(verifyModule(*M, &dbgs()));
109+
}
110+
111+
TEST(BitReaderTest, MaterializeFunctionsForBlockAddrInFunctionAfter) {
112+
SmallString<1024> Mem;
113+
114+
LLVMContext Context;
115+
std::unique_ptr<Module> M = getLazyModuleFromAssembly(
116+
Context, Mem, "define void @func() {\n"
117+
" unreachable\n"
118+
"bb:\n"
119+
" unreachable\n"
120+
"}\n"
121+
"define void @other() {\n"
122+
" unreachable\n"
123+
"}\n"
124+
"define i8* @after() {\n"
125+
" ret i8* blockaddress(@func, %bb)\n"
126+
"}\n");
127+
EXPECT_TRUE(M->getFunction("after")->empty());
128+
EXPECT_TRUE(M->getFunction("func")->empty());
129+
EXPECT_FALSE(verifyModule(*M, &dbgs()));
130+
131+
// Materialize @after, pulling in @func.
132+
EXPECT_FALSE(M->getFunction("after")->Materialize());
133+
EXPECT_FALSE(M->getFunction("func")->empty());
134+
EXPECT_TRUE(M->getFunction("other")->empty());
135+
EXPECT_FALSE(verifyModule(*M, &dbgs()));
136+
137+
// Try (and fail) to dematerialize @func.
138+
M->getFunction("func")->Dematerialize();
139+
EXPECT_FALSE(M->getFunction("func")->empty());
140+
EXPECT_FALSE(verifyModule(*M, &dbgs()));
73141
}
74142

75143
} // end namespace

0 commit comments

Comments
 (0)