|
| 1 | +//===- BlockDisambiguate.cpp - Unambiguous block mapping for yk ----===// |
| 2 | +// |
| 3 | +// This pass ensures that yk is able to unambiguously map machine blocks back |
| 4 | +// to LLVM IR blocks. |
| 5 | +// |
| 6 | +// In the JIT runtime, the mapping stage converts the *machine* basic blocks of |
| 7 | +// a trace back to high-level basic blocks (the ones in LLVM IR). A problem |
| 8 | +// arises when the mapper encounters two consecutive machine basic block |
| 9 | +// entries that map back to the same high-level block. If after mapping machine |
| 10 | +// blocks, the high-level trace contains the sequence `[bbA, bbA]`, then it may |
| 11 | +// not clear if: |
| 12 | +// |
| 13 | +// - the program executed the block `bbA` twice, or |
| 14 | +// - `bbA` executed only once, but is composed of more than one machine block. |
| 15 | +// |
| 16 | +// This pass disambiguates the two cases by adding extra "disambiguation |
| 17 | +// blocks" for high-level IR blocks that have an edge straight back to |
| 18 | +// themselves. The new blocks make it clear when execution has left and |
| 19 | +// re-entered the same high-level block. |
| 20 | +// |
| 21 | +// For a concrete example, suppose that we have a high-level LLVM IR basic |
| 22 | +// block, `bbA`, as shown in Fig. 1a. |
| 23 | +// |
| 24 | +// ┌──►│ |
| 25 | +// │ ▼ |
| 26 | +// ┌──►│ │ ┌────┐ ┌────┐ |
| 27 | +// │ ▼ │ │bb.0│──►│bb.1│ |
| 28 | +// │ ┌───┐ │ └────┘ └────┘ |
| 29 | +// │ │bbA│ │ │ │ |
| 30 | +// │ └───┘ │ ▼ ▼ |
| 31 | +// └───┤ │ ┌────┐ ┌────┐ |
| 32 | +// ▼ │ │bb.3│◄──│bb.2│ |
| 33 | +// │ └────┘ └────┘ |
| 34 | +// └───┤ |
| 35 | +// ▼ |
| 36 | +// |
| 37 | +// (Fig. 1a) (Fig. 1b) |
| 38 | +// High-level LLVM IR block. Lowered machine blocks. |
| 39 | +// |
| 40 | +// Now suppose that during code-gen `bbA` lowers to the four machine blocks, as |
| 41 | +// shown in Fig. 1b. This is entirely possible: any given LLVM instruction can |
| 42 | +// lower to any number of machine blocks and there can be arbitrary control flow |
| 43 | +// between them [0]. |
| 44 | +// |
| 45 | +// Let's look at two ways that execution can enter the machine blocks of `bbA` |
| 46 | +// and flow through them before exiting to the machine blocks elsewhere: |
| 47 | +// |
| 48 | +// - `[bb.0, bb.3, bb.0, bb.3]` |
| 49 | +// i.e. `bbA` is executed twice. |
| 50 | +// |
| 51 | +// - `[bb.0, bb.1, bb.2, bb.3]` |
| 52 | +// i.e. `bbA` is executed once, taking a longer path. |
| 53 | +// |
| 54 | +// Since `bb.0` through `bb.3` all belong to the high-level block `bbA`, a |
| 55 | +// naive mapping back to high-level IR would give a trace of `[bbA, bbA, bbA, |
| 56 | +// bbA]` for both of the above paths, and we have no way of knowing whether |
| 57 | +// `bbA` executed once or twice. |
| 58 | +// |
| 59 | +// The pass implemented in this file resolves this ambiguity by ensuring that |
| 60 | +// no high-level IR block can branch straight back to itself. With this |
| 61 | +// property in-place the mapper can safely assume that repeated consecutive |
| 62 | +// entries for the same high-level block, means that execution is within the |
| 63 | +// confines of the same high-level block, and that the block is not being |
| 64 | +// re-executed. |
| 65 | +// |
| 66 | +// For our worked example, this pass would change the high-level IR as shown in |
| 67 | +// Fig. 2a. |
| 68 | +// |
| 69 | +// ``` |
| 70 | +// ┌──►│ |
| 71 | +// │ ▼ |
| 72 | +// │ ┌────┐ ┌────┐ |
| 73 | +// ┌──►│ │ │bb.0│──►│bb.1│ |
| 74 | +// │ ▼ │ └────┘ └────┘ |
| 75 | +// │ ┌───┐ │ │ │ |
| 76 | +// │ │bbA│ │ ▼ ▼ |
| 77 | +// │ └───┘ │ ┌────┐ ┌────┐ |
| 78 | +// │ │ │ │bb.3│◄──│bb.2│ |
| 79 | +// │ ▼ │ └────┘ └────┘ |
| 80 | +// │ ┌───┐ │ │ │ |
| 81 | +// └─│bbB│ disambiguation │ ▼ └───┐ |
| 82 | +// └───┘ block │ ┌────┐ │ |
| 83 | +// │ └─│bb.4│ │ |
| 84 | +// ▼ └────┘ │ |
| 85 | +// ▼ |
| 86 | +// |
| 87 | +// (Fig. 2a) (Fig. 2b) |
| 88 | +// High-level blocks after Machine blocks after |
| 89 | +// disambiguation pass. disambiguation pass. |
| 90 | +// ``` |
| 91 | +// |
| 92 | +// Now if `bbA` is re-executed control flow must go via the "disambiguation |
| 93 | +// block" `bbB` and our example paths would now be: |
| 94 | +// |
| 95 | +// - `[bb.0, bb.3, bb.4, bb.0, bb.3, bb.4]` |
| 96 | +// - `[bb.0, bb.1, bb.2, bb.3]` |
| 97 | +// |
| 98 | +// And their initial mappings are: |
| 99 | +// |
| 100 | +// - `[bbA, bbA, bbB, bbA, bbA, bbB]` |
| 101 | +// - `[bbA, bbA, bbA, bbA]` |
| 102 | +// |
| 103 | +// And consecutive repeated entries can be collapsed giving: |
| 104 | +// |
| 105 | +// - `[bbA, bbB, bbA, bbB]` |
| 106 | +// - `[bbA]` |
| 107 | +// |
| 108 | +// The former unambiguously expresses that `bbA` was executed twice. The latter |
| 109 | +// unambiguously expresses that `bbA` was executed only once. |
| 110 | +// |
| 111 | +// The pass runs after high-level IR optimisations (and requires some backend |
| 112 | +// optimisations disabled) to ensure that LLVM doesn't undo our work, by |
| 113 | +// folding the machine block for `bbB` back into its predecessor in `bbA`. |
| 114 | +// |
| 115 | +// Alternative approaches that we dismissed, and why: |
| 116 | +// |
| 117 | +// - Consider branches back to the entry machine block of a high-level block |
| 118 | +// as a re-execution of the high-level block. Even assuming that we can |
| 119 | +// identify the entry machine block for a high-level block, this is flawed. |
| 120 | +// As can be seen in the example above, both internal and non-internal |
| 121 | +// control flow can branch back to the entry block. Additionally, there may |
| 122 | +// not be a unique entry machine basic block. |
| 123 | +// |
| 124 | +// - Mark (in the machine IR) which branches are exits to the high-level IR |
| 125 | +// block and encode this is the basic block map somehow. This is more |
| 126 | +// complicated, but may work. We may revisit this approach later: |
| 127 | +// https://github.com/ykjit/yk/issues/435 |
| 128 | +// |
| 129 | +// - Try to make it so that high-level IR blocks lower to exactly one machine |
| 130 | +// block. It will be difficult to find all of the (platform specific) cases |
| 131 | +// where a high-level block can lower to many machine blocks, and it's |
| 132 | +// likely that some LLVM IR constructs require internal control flow for |
| 133 | +// correct semantics. |
| 134 | +// |
| 135 | +// Footnotes: |
| 136 | +// |
| 137 | +// [0]: For some targets, a single high-level LLVM IR instruction can even |
| 138 | +// lower to a machine-IR-level loop, for example `cmpxchng` on some ARM |
| 139 | +// targets, and integer division on targets which have no dedicated |
| 140 | +// division instructions (e.g. AVR). A high-level instruction lowered to |
| 141 | +// a machine-level loop presents a worst-case scenario for ambiguity, as |
| 142 | +// a potentially unbounded number of machine blocks can be executed |
| 143 | +// within the confines of a single high-level basic block. |
| 144 | +// |
| 145 | +//===----------------------------------------------------------------------===// |
| 146 | + |
| 147 | +#include "llvm/Transforms/Yk/BlockDisambiguate.h" |
| 148 | +#include "llvm/IR/BasicBlock.h" |
| 149 | +#include "llvm/IR/Function.h" |
| 150 | +#include "llvm/IR/Instructions.h" |
| 151 | +#include "llvm/IR/Module.h" |
| 152 | +#include "llvm/InitializePasses.h" |
| 153 | +#include "llvm/Pass.h" |
| 154 | +#include <llvm/IR/Dominators.h> |
| 155 | +#include <llvm/IR/IRBuilder.h> |
| 156 | +#include <llvm/IR/Verifier.h> |
| 157 | + |
| 158 | +using namespace llvm; |
| 159 | + |
| 160 | +#define DEBUG_TYPE "yk-block-disambiguate" |
| 161 | + |
| 162 | +namespace llvm { |
| 163 | +void initializeYkBlockDisambiguatePass(PassRegistry &); |
| 164 | +} |
| 165 | + |
| 166 | +namespace { |
| 167 | +class YkBlockDisambiguate : public ModulePass { |
| 168 | +public: |
| 169 | + static char ID; |
| 170 | + YkBlockDisambiguate() : ModulePass(ID) { |
| 171 | + initializeYkBlockDisambiguatePass(*PassRegistry::getPassRegistry()); |
| 172 | + } |
| 173 | + bool runOnModule(Module &M) override { |
| 174 | + LLVMContext &Context = M.getContext(); |
| 175 | + for (Function &F : M) |
| 176 | + processFunction(Context, F); |
| 177 | + return true; |
| 178 | + } |
| 179 | + |
| 180 | +private: |
| 181 | + BasicBlock *makeDisambiguationBB(LLVMContext &Context, BasicBlock *BB, |
| 182 | + std::vector<BasicBlock *> &NewBBs) { |
| 183 | + BasicBlock *DBB = BasicBlock::Create(Context, ""); |
| 184 | + NewBBs.push_back(DBB); |
| 185 | + IRBuilder<> Builder(DBB); |
| 186 | + Builder.CreateBr(BB); |
| 187 | + return DBB; |
| 188 | + } |
| 189 | + |
| 190 | + void processFunction(LLVMContext &Context, Function &F) { |
| 191 | + std::vector<BasicBlock *> NewBBs; |
| 192 | + for (BasicBlock &BB : F) { |
| 193 | + Instruction *TI = BB.getTerminator(); |
| 194 | + assert(!isa<IndirectBrInst>(TI)); // YKFIXME: not implemented. |
| 195 | + if (isa<BranchInst>(TI)) { |
| 196 | + BranchInst *BI = cast<BranchInst>(TI); |
| 197 | + for (unsigned SuccIdx = 0; SuccIdx < BI->getNumSuccessors(); |
| 198 | + SuccIdx++) { |
| 199 | + BasicBlock *SuccBB = BI->getSuccessor(SuccIdx); |
| 200 | + if (SuccBB == &BB) { |
| 201 | + BasicBlock *DBB = makeDisambiguationBB(Context, &BB, NewBBs); |
| 202 | + BI->setSuccessor(SuccIdx, DBB); |
| 203 | + } |
| 204 | + } |
| 205 | + } else if (isa<SwitchInst>(TI)) { |
| 206 | + SwitchInst *SI = cast<SwitchInst>(TI); |
| 207 | + for (unsigned SuccIdx = 0; SuccIdx < SI->getNumSuccessors(); |
| 208 | + SuccIdx++) { |
| 209 | + BasicBlock *SuccBB = SI->getSuccessor(SuccIdx); |
| 210 | + if (SuccBB == &BB) { |
| 211 | + BasicBlock *DBB = makeDisambiguationBB(Context, &BB, NewBBs); |
| 212 | + SI->setSuccessor(SuccIdx, DBB); |
| 213 | + } |
| 214 | + } |
| 215 | + } |
| 216 | + } |
| 217 | + |
| 218 | + // Insert new blocks at the end, so as to not iterate and mutate the |
| 219 | + // function's basic block list simultaneously. |
| 220 | + for (BasicBlock *BB : NewBBs) |
| 221 | + BB->insertInto(&F); |
| 222 | + } |
| 223 | +}; |
| 224 | +} // namespace |
| 225 | + |
| 226 | +char YkBlockDisambiguate::ID = 0; |
| 227 | +INITIALIZE_PASS(YkBlockDisambiguate, DEBUG_TYPE, "yk block disambiguation", |
| 228 | + false, false) |
| 229 | +ModulePass *llvm::createYkBlockDisambiguatePass() { |
| 230 | + return new YkBlockDisambiguate(); |
| 231 | +} |
0 commit comments