Skip to content

Commit bd24fe8

Browse files
committed
Bitcode: Serialize (and recover) use-list order
Predict and serialize use-list order in bitcode. This makes the option `-preserve-bc-use-list-order` work *most* of the time, but this is still experimental. - Builds a full value-table up front in the writer, sets up a list of use-list orders to write out, and discards the table. This is a simpler first step than determining the order from the various overlapping IDs of values on-the-fly. - The shuffles stored in the use-list order list have an unnecessarily large memory footprint. - `blockaddress` expressions cause functions to be materialized out-of-order. For now I've ignored this problem, so use-list orders will be wrong for constants used by functions that have block addresses taken. There are a couple of ways to fix this, but I don't have a concrete plan yet. - When materializing functions lazily, the use-lists for constants will not be correct. This use case is out of scope: what should the use-list order be, if it's incomplete? This is part of PR5680. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214125 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 0aed6e7 commit bd24fe8

File tree

9 files changed

+288
-104
lines changed

9 files changed

+288
-104
lines changed

include/llvm/Bitcode/LLVMBitCodes.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,8 @@ namespace bitc {
330330
};
331331

332332
enum UseListCodes {
333-
USELIST_CODE_ENTRY = 1 // USELIST_CODE_ENTRY: TBD.
333+
USELIST_CODE_DEFAULT = 1, // DEFAULT: [index..., value-id]
334+
USELIST_CODE_BB = 2 // BB: [index..., bb-id]
334335
};
335336

336337
enum AttributeKindCodes {

include/llvm/IR/UseListOrder.h

+13
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,23 @@
1616
#define LLVM_IR_USELISTORDER_H
1717

1818
#include "llvm/ADT/ArrayRef.h"
19+
#include "llvm/ADT/SmallVector.h"
20+
#include <vector>
1921

2022
namespace llvm {
2123

2224
class Module;
25+
class Function;
26+
class Value;
27+
28+
/// \brief Structure to hold a use-list order.
29+
struct UseListOrder {
30+
const Function *F;
31+
const Value *V;
32+
SmallVector<unsigned, 8> Shuffle;
33+
};
34+
35+
typedef std::vector<UseListOrder> UseListOrderStack;
2336

2437
/// \brief Whether to preserve use-list ordering.
2538
bool shouldPreserveBitcodeUseListOrder();

lib/Bitcode/Reader/BitcodeReader.cpp

+37-6
Original file line numberDiff line numberDiff line change
@@ -1620,9 +1620,8 @@ std::error_code BitcodeReader::ParseUseLists() {
16201620
if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
16211621
return Error(InvalidRecord);
16221622

1623-
SmallVector<uint64_t, 64> Record;
1624-
16251623
// Read all the records.
1624+
SmallVector<uint64_t, 64> Record;
16261625
while (1) {
16271626
BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
16281627

@@ -1639,14 +1638,42 @@ std::error_code BitcodeReader::ParseUseLists() {
16391638

16401639
// Read a use list record.
16411640
Record.clear();
1641+
bool IsBB = false;
16421642
switch (Stream.readRecord(Entry.ID, Record)) {
16431643
default: // Default behavior: unknown type.
16441644
break;
1645-
case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD.
1645+
case bitc::USELIST_CODE_BB:
1646+
IsBB = true;
1647+
// fallthrough
1648+
case bitc::USELIST_CODE_DEFAULT: {
16461649
unsigned RecordLength = Record.size();
1647-
if (RecordLength < 1)
1648-
return Error(InvalidRecord);
1649-
UseListRecords.push_back(Record);
1650+
if (RecordLength < 3)
1651+
// Records should have at least an ID and two indexes.
1652+
return Error(InvalidRecord);
1653+
unsigned ID = Record.back();
1654+
Record.pop_back();
1655+
1656+
Value *V;
1657+
if (IsBB) {
1658+
assert(ID < FunctionBBs.size() && "Basic block not found");
1659+
V = FunctionBBs[ID];
1660+
} else
1661+
V = ValueList[ID];
1662+
unsigned NumUses = 0;
1663+
SmallDenseMap<const Use *, unsigned, 16> Order;
1664+
for (const Use &U : V->uses()) {
1665+
if (NumUses > Record.size())
1666+
break;
1667+
Order[&U] = Record[NumUses++];
1668+
}
1669+
if (Order.size() != Record.size() || NumUses > Record.size())
1670+
// Mismatches can happen if the functions are being materialized lazily
1671+
// (out-of-order), or a value has been upgraded.
1672+
break;
1673+
1674+
V->sortUseList([&](const Use &L, const Use &R) {
1675+
return Order.lookup(&L) < Order.lookup(&R);
1676+
});
16501677
break;
16511678
}
16521679
}
@@ -2298,6 +2325,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
22982325
if (std::error_code EC = ParseMetadata())
22992326
return EC;
23002327
break;
2328+
case bitc::USELIST_BLOCK_ID:
2329+
if (std::error_code EC = ParseUseLists())
2330+
return EC;
2331+
break;
23012332
}
23022333
continue;
23032334

lib/Bitcode/Reader/BitcodeReader.h

-1
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,6 @@ class BitcodeReader : public GVMaterializer {
138138
BitcodeReaderMDValueList MDValueList;
139139
std::vector<Comdat *> ComdatList;
140140
SmallVector<Instruction *, 64> InstructionList;
141-
SmallVector<SmallVector<uint64_t, 64>, 64> UseListRecords;
142141

143142
std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
144143
std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;

lib/Bitcode/Writer/BitcodeWriter.cpp

+37-94
Original file line numberDiff line numberDiff line change
@@ -1602,6 +1602,39 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST,
16021602
Stream.ExitBlock();
16031603
}
16041604

1605+
static void WriteUseList(ValueEnumerator &VE, UseListOrder &&Order,
1606+
BitstreamWriter &Stream) {
1607+
assert(Order.Shuffle.size() >= 2 && "Shuffle too small");
1608+
unsigned Code;
1609+
if (isa<BasicBlock>(Order.V))
1610+
Code = bitc::USELIST_CODE_BB;
1611+
else
1612+
Code = bitc::USELIST_CODE_DEFAULT;
1613+
1614+
SmallVector<uint64_t, 64> Record;
1615+
for (unsigned I : Order.Shuffle)
1616+
Record.push_back(I);
1617+
Record.push_back(VE.getValueID(Order.V));
1618+
Stream.EmitRecord(Code, Record);
1619+
}
1620+
1621+
static void WriteUseListBlock(const Function *F, ValueEnumerator &VE,
1622+
BitstreamWriter &Stream) {
1623+
auto hasMore = [&]() {
1624+
return !VE.UseListOrders.empty() && VE.UseListOrders.back().F == F;
1625+
};
1626+
if (!hasMore())
1627+
// Nothing to do.
1628+
return;
1629+
1630+
Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3);
1631+
while (hasMore()) {
1632+
WriteUseList(VE, std::move(VE.UseListOrders.back()), Stream);
1633+
VE.UseListOrders.pop_back();
1634+
}
1635+
Stream.ExitBlock();
1636+
}
1637+
16051638
/// WriteFunction - Emit a function body to the module stream.
16061639
static void WriteFunction(const Function &F, ValueEnumerator &VE,
16071640
BitstreamWriter &Stream) {
@@ -1670,6 +1703,8 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
16701703

16711704
if (NeedsMetadataAttachment)
16721705
WriteMetadataAttachment(F, VE, Stream);
1706+
if (shouldPreserveBitcodeUseListOrder())
1707+
WriteUseListBlock(&F, VE, Stream);
16731708
VE.purgeFunction();
16741709
Stream.ExitBlock();
16751710
}
@@ -1835,98 +1870,6 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
18351870
Stream.ExitBlock();
18361871
}
18371872

1838-
// Sort the Users based on the order in which the reader parses the bitcode
1839-
// file.
1840-
static bool bitcodereader_order(const User *lhs, const User *rhs) {
1841-
// TODO: Implement.
1842-
return true;
1843-
}
1844-
1845-
static void WriteUseList(const Value *V, const ValueEnumerator &VE,
1846-
BitstreamWriter &Stream) {
1847-
1848-
// One or zero uses can't get out of order.
1849-
if (V->use_empty() || V->hasNUses(1))
1850-
return;
1851-
1852-
// Make a copy of the in-memory use-list for sorting.
1853-
SmallVector<const User*, 8> UserList(V->user_begin(), V->user_end());
1854-
1855-
// Sort the copy based on the order read by the BitcodeReader.
1856-
std::sort(UserList.begin(), UserList.end(), bitcodereader_order);
1857-
1858-
// TODO: Generate a diff between the BitcodeWriter in-memory use-list and the
1859-
// sorted list (i.e., the expected BitcodeReader in-memory use-list).
1860-
1861-
// TODO: Emit the USELIST_CODE_ENTRYs.
1862-
}
1863-
1864-
static void WriteFunctionUseList(const Function *F, ValueEnumerator &VE,
1865-
BitstreamWriter &Stream) {
1866-
VE.incorporateFunction(*F);
1867-
1868-
for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
1869-
AI != AE; ++AI)
1870-
WriteUseList(AI, VE, Stream);
1871-
for (Function::const_iterator BB = F->begin(), FE = F->end(); BB != FE;
1872-
++BB) {
1873-
WriteUseList(BB, VE, Stream);
1874-
for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
1875-
++II) {
1876-
WriteUseList(II, VE, Stream);
1877-
for (User::const_op_iterator OI = II->op_begin(), E = II->op_end();
1878-
OI != E; ++OI) {
1879-
if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
1880-
isa<InlineAsm>(*OI))
1881-
WriteUseList(*OI, VE, Stream);
1882-
}
1883-
}
1884-
}
1885-
VE.purgeFunction();
1886-
}
1887-
1888-
// Emit use-lists.
1889-
static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE,
1890-
BitstreamWriter &Stream) {
1891-
Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3);
1892-
1893-
// XXX: this modifies the module, but in a way that should never change the
1894-
// behavior of any pass or codegen in LLVM. The problem is that GVs may
1895-
// contain entries in the use_list that do not exist in the Module and are
1896-
// not stored in the .bc file.
1897-
for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
1898-
I != E; ++I)
1899-
I->removeDeadConstantUsers();
1900-
1901-
// Write the global variables.
1902-
for (Module::const_global_iterator GI = M->global_begin(),
1903-
GE = M->global_end(); GI != GE; ++GI) {
1904-
WriteUseList(GI, VE, Stream);
1905-
1906-
// Write the global variable initializers.
1907-
if (GI->hasInitializer())
1908-
WriteUseList(GI->getInitializer(), VE, Stream);
1909-
}
1910-
1911-
// Write the functions.
1912-
for (Module::const_iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) {
1913-
WriteUseList(FI, VE, Stream);
1914-
if (!FI->isDeclaration())
1915-
WriteFunctionUseList(FI, VE, Stream);
1916-
if (FI->hasPrefixData())
1917-
WriteUseList(FI->getPrefixData(), VE, Stream);
1918-
}
1919-
1920-
// Write the aliases.
1921-
for (Module::const_alias_iterator AI = M->alias_begin(), AE = M->alias_end();
1922-
AI != AE; ++AI) {
1923-
WriteUseList(AI, VE, Stream);
1924-
WriteUseList(AI->getAliasee(), VE, Stream);
1925-
}
1926-
1927-
Stream.ExitBlock();
1928-
}
1929-
19301873
/// WriteModule - Emit the specified module to the bitstream.
19311874
static void WriteModule(const Module *M, BitstreamWriter &Stream) {
19321875
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
@@ -1969,9 +1912,9 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
19691912
// Emit names for globals/functions etc.
19701913
WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
19711914

1972-
// Emit use-lists.
1915+
// Emit module-level use-lists.
19731916
if (shouldPreserveBitcodeUseListOrder())
1974-
WriteModuleUseLists(M, VE, Stream);
1917+
WriteUseListBlock(nullptr, VE, Stream);
19751918

19761919
// Emit function bodies.
19771920
for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)

0 commit comments

Comments
 (0)