Skip to content

Commit df1a74a

Browse files
committed
[IR] Support importing modules with invalid data layouts.
Use the existing mechanism to change the data layout using callbacks. Before this patch, we had a callback type DataLayoutCallbackTy that receives a single StringRef specifying the target triple, and optionally returns the data layout string to be used. Module loaders (both IR and BC) then apply the callback to potentially override the module's data layout, after first having imported and parsed the data layout from the file. We can't do the same to fix invalid data layouts, because the import will already fail, before the callback has a chance to fix it. Instead, module loaders now tentatively parse the data layout into a string, wait until the target triple has been parsed, apply the override callback to the imported string and only then parse the tentative string as a data layout. Moreover, add the old data layout string S as second argument to the callback, in addition to the already existing target triple argument. S is either the default data layout string in case none is specified, or the data layout string specified in the module, possibly after auto-upgrades (for the BitcodeReader). This allows callbacks to inspect the old data layout string, and fix it instead of setting a fixed data layout. Also allow to pass data layout override callbacks to lazy bitcode module loader functions. Differential Revision: https://reviews.llvm.org/D140985
1 parent 0285656 commit df1a74a

File tree

16 files changed

+167
-79
lines changed

16 files changed

+167
-79
lines changed

llvm/include/llvm/AsmParser/LLParser.h

+6-4
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,10 @@ namespace llvm {
179179
Lex(F, SM, Err, Context), M(M), Index(Index), Slots(Slots),
180180
BlockAddressPFS(nullptr) {}
181181
bool Run(
182-
bool UpgradeDebugInfo, DataLayoutCallbackTy DataLayoutCallback =
183-
[](StringRef) { return std::nullopt; });
182+
bool UpgradeDebugInfo,
183+
DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
184+
return std::nullopt;
185+
});
184186

185187
bool parseStandaloneConstantValue(Constant *&C, const SlotMapping *Slots);
186188

@@ -318,8 +320,8 @@ namespace llvm {
318320
bool parseTopLevelEntities();
319321
bool validateEndOfModule(bool UpgradeDebugInfo);
320322
bool validateEndOfIndex();
321-
bool parseTargetDefinitions();
322-
bool parseTargetDefinition();
323+
bool parseTargetDefinitions(DataLayoutCallbackTy DataLayoutCallback);
324+
bool parseTargetDefinition(std::string &TentativeDLStr, LocTy &DLStrLoc);
323325
bool parseModuleAsm();
324326
bool parseSourceFileName();
325327
bool parseUnnamedType();

llvm/include/llvm/AsmParser/Parser.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ struct SlotMapping;
2929
class SMDiagnostic;
3030
class Type;
3131

32-
typedef llvm::function_ref<std::optional<std::string>(StringRef)>
32+
typedef llvm::function_ref<std::optional<std::string>(StringRef, StringRef)>
3333
DataLayoutCallbackTy;
3434

3535
/// This function is a main interface to the LLVM Assembly Parser. It parses
@@ -86,7 +86,7 @@ struct ParsedModuleAndIndex {
8686
ParsedModuleAndIndex parseAssemblyFileWithIndex(
8787
StringRef Filename, SMDiagnostic &Err, LLVMContext &Context,
8888
SlotMapping *Slots = nullptr,
89-
DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
89+
DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
9090
return std::nullopt;
9191
});
9292

@@ -127,7 +127,7 @@ parseSummaryIndexAssemblyString(StringRef AsmString, SMDiagnostic &Err);
127127
std::unique_ptr<Module> parseAssembly(
128128
MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context,
129129
SlotMapping *Slots = nullptr,
130-
DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
130+
DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
131131
return std::nullopt;
132132
});
133133

@@ -169,7 +169,7 @@ parseSummaryIndexAssembly(MemoryBufferRef F, SMDiagnostic &Err);
169169
bool parseAssemblyInto(
170170
MemoryBufferRef F, Module *M, ModuleSummaryIndex *Index, SMDiagnostic &Err,
171171
SlotMapping *Slots = nullptr,
172-
DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
172+
DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
173173
return std::nullopt;
174174
});
175175

llvm/include/llvm/Bitcode/BitcodeReader.h

+21-11
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,11 @@ class Module;
3434
class MemoryBuffer;
3535
class ModuleSummaryIndex;
3636

37-
typedef llvm::function_ref<std::optional<std::string>(StringRef)>
37+
// Callback to override the data layout string of an imported bitcode module.
38+
// The first argument is the target triple, the second argument the data layout
39+
// string from the input, or a default string. It will be used if the callback
40+
// returns std::nullopt.
41+
typedef llvm::function_ref<std::optional<std::string>(StringRef, StringRef)>
3842
DataLayoutCallbackTy;
3943

4044
// These functions are for converting Expected/Error values to
@@ -101,14 +105,18 @@ typedef llvm::function_ref<std::optional<std::string>(StringRef)>
101105
/// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
102106
/// If IsImporting is true, this module is being parsed for ThinLTO
103107
/// importing into another module.
104-
Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context,
105-
bool ShouldLazyLoadMetadata,
106-
bool IsImporting);
108+
Expected<std::unique_ptr<Module>> getLazyModule(
109+
LLVMContext &Context, bool ShouldLazyLoadMetadata, bool IsImporting,
110+
DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
111+
return std::nullopt;
112+
});
107113

108114
/// Read the entire bitcode module and return it.
109115
Expected<std::unique_ptr<Module>> parseModule(
110-
LLVMContext &Context, DataLayoutCallbackTy DataLayoutCallback =
111-
[](StringRef) { return std::nullopt; });
116+
LLVMContext &Context,
117+
DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
118+
return std::nullopt;
119+
});
112120

113121
/// Returns information about the module to be used for LTO: whether to
114122
/// compile with ThinLTO, and whether it has a summary.
@@ -145,10 +153,12 @@ typedef llvm::function_ref<std::optional<std::string>(StringRef)>
145153
/// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
146154
/// lazily load metadata as well. If IsImporting is true, this module is
147155
/// being parsed for ThinLTO importing into another module.
148-
Expected<std::unique_ptr<Module>>
149-
getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
150-
bool ShouldLazyLoadMetadata = false,
151-
bool IsImporting = false);
156+
Expected<std::unique_ptr<Module>> getLazyBitcodeModule(
157+
MemoryBufferRef Buffer, LLVMContext &Context,
158+
bool ShouldLazyLoadMetadata = false, bool IsImporting = false,
159+
DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
160+
return std::nullopt;
161+
});
152162

153163
/// Like getLazyBitcodeModule, except that the module takes ownership of
154164
/// the memory buffer if successful. If successful, this moves Buffer. On
@@ -175,7 +185,7 @@ typedef llvm::function_ref<std::optional<std::string>(StringRef)>
175185
/// Read the specified bitcode file, returning the module.
176186
Expected<std::unique_ptr<Module>> parseBitcodeFile(
177187
MemoryBufferRef Buffer, LLVMContext &Context,
178-
DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
188+
DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
179189
return std::nullopt;
180190
});
181191

llvm/include/llvm/CodeGen/MIRParser/MIRParser.h

+3-4
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class MachineModuleInfo;
3434
class SMDiagnostic;
3535
class StringRef;
3636

37-
typedef llvm::function_ref<std::optional<std::string>(StringRef)>
37+
typedef llvm::function_ref<std::optional<std::string>(StringRef, StringRef)>
3838
DataLayoutCallbackTy;
3939

4040
/// This class initializes machine functions by applying the state loaded from
@@ -52,9 +52,8 @@ class MIRParser {
5252
/// A new, empty module is created if the LLVM IR isn't present.
5353
/// \returns nullptr if a parsing error occurred.
5454
std::unique_ptr<Module>
55-
parseIRModule(DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
56-
return std::nullopt;
57-
});
55+
parseIRModule(DataLayoutCallbackTy DataLayoutCallback =
56+
[](StringRef, StringRef) { return std::nullopt; });
5857

5958
/// Parses MachineFunctions in the MIR file and add them to the given
6059
/// MachineModuleInfo \p MMI.

llvm/include/llvm/IRReader/IRReader.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class Module;
2727
class SMDiagnostic;
2828
class LLVMContext;
2929

30-
typedef llvm::function_ref<std::optional<std::string>(StringRef)>
30+
typedef llvm::function_ref<std::optional<std::string>(StringRef, StringRef)>
3131
DataLayoutCallbackTy;
3232

3333
/// If the given MemoryBuffer holds a bitcode image, return a Module
@@ -55,7 +55,7 @@ getLazyIRFileModule(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context,
5555
/// \param DataLayoutCallback Override datalayout in the llvm assembly.
5656
std::unique_ptr<Module> parseIR(
5757
MemoryBufferRef Buffer, SMDiagnostic &Err, LLVMContext &Context,
58-
DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
58+
DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
5959
return std::nullopt;
6060
});
6161

@@ -65,7 +65,7 @@ std::unique_ptr<Module> parseIR(
6565
/// \param DataLayoutCallback Override datalayout in the llvm assembly.
6666
std::unique_ptr<Module> parseIRFile(
6767
StringRef Filename, SMDiagnostic &Err, LLVMContext &Context,
68-
DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
68+
DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
6969
return std::nullopt;
7070
});
7171
}

llvm/lib/AsmParser/LLParser.cpp

+29-15
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,8 @@ bool LLParser::Run(bool UpgradeDebugInfo,
9595
"Can't read textual IR with a Context that discards named Values");
9696

9797
if (M) {
98-
if (parseTargetDefinitions())
98+
if (parseTargetDefinitions(DataLayoutCallback))
9999
return true;
100-
101-
if (auto LayoutOverride = DataLayoutCallback(M->getTargetTriple()))
102-
M->setDataLayout(*LayoutOverride);
103100
}
104101

105102
return parseTopLevelEntities() || validateEndOfModule(UpgradeDebugInfo) ||
@@ -353,21 +350,41 @@ bool LLParser::validateEndOfIndex() {
353350
// Top-Level Entities
354351
//===----------------------------------------------------------------------===//
355352

356-
bool LLParser::parseTargetDefinitions() {
357-
while (true) {
353+
bool LLParser::parseTargetDefinitions(DataLayoutCallbackTy DataLayoutCallback) {
354+
// Delay parsing of the data layout string until the target triple is known.
355+
// Then, pass both the the target triple and the tentative data layout string
356+
// to DataLayoutCallback, allowing to override the DL string.
357+
// This enables importing modules with invalid DL strings.
358+
std::string TentativeDLStr = M->getDataLayoutStr();
359+
LocTy DLStrLoc;
360+
361+
bool Done = false;
362+
while (!Done) {
358363
switch (Lex.getKind()) {
359364
case lltok::kw_target:
360-
if (parseTargetDefinition())
365+
if (parseTargetDefinition(TentativeDLStr, DLStrLoc))
361366
return true;
362367
break;
363368
case lltok::kw_source_filename:
364369
if (parseSourceFileName())
365370
return true;
366371
break;
367372
default:
368-
return false;
373+
Done = true;
369374
}
370375
}
376+
// Run the override callback to potentially change the data layout string, and
377+
// parse the data layout string.
378+
if (auto LayoutOverride =
379+
DataLayoutCallback(M->getTargetTriple(), TentativeDLStr)) {
380+
TentativeDLStr = *LayoutOverride;
381+
DLStrLoc = {};
382+
}
383+
Expected<DataLayout> MaybeDL = DataLayout::parse(TentativeDLStr);
384+
if (!MaybeDL)
385+
return error(DLStrLoc, toString(MaybeDL.takeError()));
386+
M->setDataLayout(MaybeDL.get());
387+
return false;
371388
}
372389

373390
bool LLParser::parseTopLevelEntities() {
@@ -471,7 +488,8 @@ bool LLParser::parseModuleAsm() {
471488
/// toplevelentity
472489
/// ::= 'target' 'triple' '=' STRINGCONSTANT
473490
/// ::= 'target' 'datalayout' '=' STRINGCONSTANT
474-
bool LLParser::parseTargetDefinition() {
491+
bool LLParser::parseTargetDefinition(std::string &TentativeDLStr,
492+
LocTy &DLStrLoc) {
475493
assert(Lex.getKind() == lltok::kw_target);
476494
std::string Str;
477495
switch (Lex.Lex()) {
@@ -488,13 +506,9 @@ bool LLParser::parseTargetDefinition() {
488506
Lex.Lex();
489507
if (parseToken(lltok::equal, "expected '=' after target datalayout"))
490508
return true;
491-
LocTy Loc = Lex.getLoc();
492-
if (parseStringConstant(Str))
509+
DLStrLoc = Lex.getLoc();
510+
if (parseStringConstant(TentativeDLStr))
493511
return true;
494-
Expected<DataLayout> MaybeDL = DataLayout::parse(Str);
495-
if (!MaybeDL)
496-
return error(Loc, toString(MaybeDL.takeError()));
497-
M->setDataLayout(MaybeDL.get());
498512
return false;
499513
}
500514
}

llvm/lib/AsmParser/Parser.cpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,10 @@ ParsedModuleAndIndex llvm::parseAssemblyWithIndex(MemoryBufferRef F,
9191
SMDiagnostic &Err,
9292
LLVMContext &Context,
9393
SlotMapping *Slots) {
94-
return ::parseAssemblyWithIndex(F, Err, Context, Slots,
95-
/*UpgradeDebugInfo*/ true,
96-
[](StringRef) { return std::nullopt; });
94+
return ::parseAssemblyWithIndex(
95+
F, Err, Context, Slots,
96+
/*UpgradeDebugInfo*/ true,
97+
[](StringRef, StringRef) { return std::nullopt; });
9798
}
9899

99100
static ParsedModuleAndIndex
@@ -150,7 +151,7 @@ static bool parseSummaryIndexAssemblyInto(MemoryBufferRef F,
150151
// index, but we need to initialize it.
151152
LLVMContext unusedContext;
152153
return LLParser(F.getBuffer(), SM, Err, nullptr, &Index, unusedContext)
153-
.Run(true, [](StringRef) { return std::nullopt; });
154+
.Run(true, [](StringRef, StringRef) { return std::nullopt; });
154155
}
155156

156157
std::unique_ptr<ModuleSummaryIndex>

0 commit comments

Comments
 (0)