-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[SPIR-V] Add SPIR-V structurizer #107408
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPIR-V] Add SPIR-V structurizer #107408
Conversation
@llvm/pr-subscribers-backend-spir-v @llvm/pr-subscribers-clang Author: Nathan Gauër (Keenuts) ChangesThis commit adds an initial SPIR-V structurizer. The first part does a branch cleanup (simplifying switches, and legalizing them), then merge instructions are added to cycles, convergent and later divergent blocks.
This PR leverages the merged SPIR-V simulator for testing, as long as spirv-val. For now, most DXC structurization tests are passing. The unsupported ones are either caused by unsupported features like switches on boolean types, or switches in region exits, because the MergeExit pass doesn't support those yet (there is a FIXME). This PR is quite large, and the addition not trivial, so I tried to keep it simple. E.G: as soon as the CFG changes, I recompute the dominator trees and other structures instead of updating them. Patch is 195.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/107408.diff 53 Files Affected:
diff --git a/clang/test/CodeGenHLSL/convergence/cf.for.plain.hlsl b/clang/test/CodeGenHLSL/convergence/cf.for.plain.hlsl
new file mode 100644
index 00000000000000..2f08854f84d955
--- /dev/null
+++ b/clang/test/CodeGenHLSL/convergence/cf.for.plain.hlsl
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN: spirv-pc-vulkan-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+int process() {
+// CHECK: entry:
+// CHECK: %[[#entry_token:]] = call token @llvm.experimental.convergence.entry()
+ int val = 0;
+
+// CHECK: for.cond:
+// CHECK-NEXT: %[[#]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %[[#entry_token]]) ]
+// CHECK: br i1 {{.*}}, label %for.body, label %for.end
+ for (int i = 0; i < 10; ++i) {
+
+// CHECK: for.body:
+// CHECK: br label %for.inc
+ val = i;
+
+// CHECK: for.inc:
+// CHECK: br label %for.cond
+ }
+
+// CHECK: for.end:
+// CHECK: br label %for.cond1
+
+ // Infinite loop
+ for ( ; ; ) {
+// CHECK: for.cond1:
+// CHECK-NEXT: %[[#]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %[[#entry_token]]) ]
+// CHECK: br label %for.cond1
+ val = 0;
+ }
+
+// CHECK-NEXT: }
+// This loop in unreachable. Not generated.
+ // Null body
+ for (int j = 0; j < 10; ++j)
+ ;
+ return val;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+ process();
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index cbf6e04f2844d6..21b3d2ec8b9649 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -31,6 +31,8 @@ let TargetPrefix = "spv" in {
def int_spv_bitcast : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
def int_spv_ptrcast : Intrinsic<[llvm_any_ty], [llvm_any_ty, llvm_metadata_ty, llvm_i32_ty], [ImmArg<ArgIndex<2>>]>;
def int_spv_switch : Intrinsic<[], [llvm_any_ty, llvm_vararg_ty]>;
+ def int_spv_loop_merge : Intrinsic<[], [llvm_vararg_ty]>;
+ def int_spv_selection_merge : Intrinsic<[], [llvm_vararg_ty]>;
def int_spv_cmpxchg : Intrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_vararg_ty]>;
def int_spv_unreachable : Intrinsic<[], []>;
def int_spv_alloca : Intrinsic<[llvm_any_ty], []>;
diff --git a/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.cpp b/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.cpp
index 25e285e35f9336..cc6daf7ef34426 100644
--- a/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.cpp
@@ -203,7 +203,8 @@ class ConvergenceRegionAnalyzer {
private:
bool isBackEdge(const BasicBlock *From, const BasicBlock *To) const {
- assert(From != To && "From == To. This is awkward.");
+ if (From == To)
+ return true;
// We only handle loop in the simplified form. This means:
// - a single back-edge, a single latch.
@@ -230,6 +231,7 @@ class ConvergenceRegionAnalyzer {
auto *Terminator = From->getTerminator();
for (unsigned i = 0; i < Terminator->getNumSuccessors(); ++i) {
auto *To = Terminator->getSuccessor(i);
+ // Ignore back edges.
if (isBackEdge(From, To))
continue;
@@ -276,7 +278,6 @@ class ConvergenceRegionAnalyzer {
while (ToProcess.size() != 0) {
auto *L = ToProcess.front();
ToProcess.pop();
- assert(L->isLoopSimplifyForm());
auto CT = getConvergenceToken(L->getHeader());
SmallPtrSet<BasicBlock *, 8> RegionBlocks(L->block_begin(),
diff --git a/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.h b/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.h
index f9e30e4effa1d9..e435c88c919c9c 100644
--- a/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.h
+++ b/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.h
@@ -130,6 +130,9 @@ class ConvergenceRegionInfo {
}
const ConvergenceRegion *getTopLevelRegion() const { return TopLevelRegion; }
+ ConvergenceRegion *getWritableTopLevelRegion() const {
+ return TopLevelRegion;
+ }
};
} // namespace SPIRV
diff --git a/llvm/lib/Target/SPIRV/CMakeLists.txt b/llvm/lib/Target/SPIRV/CMakeLists.txt
index 5f8aea5fc8d84d..198483e03a46d7 100644
--- a/llvm/lib/Target/SPIRV/CMakeLists.txt
+++ b/llvm/lib/Target/SPIRV/CMakeLists.txt
@@ -31,6 +31,7 @@ add_llvm_target(SPIRVCodeGen
SPIRVMCInstLower.cpp
SPIRVMetadata.cpp
SPIRVModuleAnalysis.cpp
+ SPIRVStructurizer.cpp
SPIRVPreLegalizer.cpp
SPIRVPostLegalizer.cpp
SPIRVPrepareFunctions.cpp
diff --git a/llvm/lib/Target/SPIRV/SPIRV.h b/llvm/lib/Target/SPIRV/SPIRV.h
index 6c35a467f53bef..384133e7b4bd18 100644
--- a/llvm/lib/Target/SPIRV/SPIRV.h
+++ b/llvm/lib/Target/SPIRV/SPIRV.h
@@ -20,6 +20,7 @@ class InstructionSelector;
class RegisterBankInfo;
ModulePass *createSPIRVPrepareFunctionsPass(const SPIRVTargetMachine &TM);
+FunctionPass *createSPIRVStructurizerPass();
FunctionPass *createSPIRVMergeRegionExitTargetsPass();
FunctionPass *createSPIRVStripConvergenceIntrinsicsPass();
FunctionPass *createSPIRVRegularizerPass();
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index fed82b904af4f7..44f2da11edb051 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -2296,6 +2296,19 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
}
return MIB.constrainAllUses(TII, TRI, RBI);
}
+ case Intrinsic::spv_loop_merge:
+ case Intrinsic::spv_selection_merge: {
+ const auto Opcode = IID == Intrinsic::spv_selection_merge
+ ? SPIRV::OpSelectionMerge
+ : SPIRV::OpLoopMerge;
+ auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(Opcode));
+ for (unsigned i = 1; i < I.getNumExplicitOperands(); ++i) {
+ assert(I.getOperand(i).isMBB());
+ MIB.addMBB(I.getOperand(i).getMBB());
+ }
+ MIB.addImm(SPIRV::SelectionControl::None);
+ return MIB.constrainAllUses(TII, TRI, RBI);
+ }
case Intrinsic::spv_cmpxchg:
return selectAtomicCmpXchg(ResVReg, ResType, I);
case Intrinsic::spv_unreachable:
diff --git a/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp b/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp
index 0747dd1bbaf40a..9930d067173df7 100644
--- a/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp
@@ -133,7 +133,7 @@ class SPIRVMergeRegionExitTargets : public FunctionPass {
// Run the pass on the given convergence region, ignoring the sub-regions.
// Returns true if the CFG changed, false otherwise.
bool runOnConvergenceRegionNoRecurse(LoopInfo &LI,
- const SPIRV::ConvergenceRegion *CR) {
+ SPIRV::ConvergenceRegion *CR) {
// Gather all the exit targets for this region.
SmallPtrSet<BasicBlock *, 4> ExitTargets;
for (BasicBlock *Exit : CR->Exits) {
@@ -198,14 +198,19 @@ class SPIRVMergeRegionExitTargets : public FunctionPass {
for (auto Exit : CR->Exits)
replaceBranchTargets(Exit, ExitTargets, NewExitTarget);
+ CR = CR->Parent;
+ while (CR) {
+ CR->Blocks.insert(NewExitTarget);
+ CR = CR->Parent;
+ }
+
return true;
}
/// Run the pass on the given convergence region and sub-regions (DFS).
/// Returns true if a region/sub-region was modified, false otherwise.
/// This returns as soon as one region/sub-region has been modified.
- bool runOnConvergenceRegion(LoopInfo &LI,
- const SPIRV::ConvergenceRegion *CR) {
+ bool runOnConvergenceRegion(LoopInfo &LI, SPIRV::ConvergenceRegion *CR) {
for (auto *Child : CR->Children)
if (runOnConvergenceRegion(LI, Child))
return true;
@@ -235,10 +240,10 @@ class SPIRVMergeRegionExitTargets : public FunctionPass {
virtual bool runOnFunction(Function &F) override {
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- const auto *TopLevelRegion =
+ auto *TopLevelRegion =
getAnalysis<SPIRVConvergenceRegionAnalysisWrapperPass>()
.getRegionInfo()
- .getTopLevelRegion();
+ .getWritableTopLevelRegion();
// FIXME: very inefficient method: each time a region is modified, we bubble
// back up, and recompute the whole convergence region tree. Once the
@@ -246,9 +251,6 @@ class SPIRVMergeRegionExitTargets : public FunctionPass {
// to be efficient instead of simple.
bool modified = false;
while (runOnConvergenceRegion(LI, TopLevelRegion)) {
- TopLevelRegion = getAnalysis<SPIRVConvergenceRegionAnalysisWrapperPass>()
- .getRegionInfo()
- .getTopLevelRegion();
modified = true;
}
@@ -262,6 +264,8 @@ class SPIRVMergeRegionExitTargets : public FunctionPass {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<SPIRVConvergenceRegionAnalysisWrapperPass>();
+
+ AU.addPreserved<SPIRVConvergenceRegionAnalysisWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
}
};
diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
index df1b75bc1cb9eb..1784f00be600dd 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
@@ -744,79 +744,139 @@ static void insertSpirvDecorations(MachineFunction &MF, MachineIRBuilder MIB) {
MI->eraseFromParent();
}
-// Find basic blocks of the switch and replace registers in spv_switch() by its
-// MBB equivalent.
-static void processSwitches(MachineFunction &MF, SPIRVGlobalRegistry *GR,
- MachineIRBuilder MIB) {
- DenseMap<const BasicBlock *, MachineBasicBlock *> BB2MBB;
- SmallVector<std::pair<MachineInstr *, SmallVector<MachineInstr *, 8>>>
- Switches;
+// LLVM allows the switches to use registers as cases, while SPIR-V required
+// those to be immediate values. This function replaces such operands with the
+// equivalent immediate constant.
+static void processSwitchesConstants(MachineFunction &MF,
+ SPIRVGlobalRegistry *GR,
+ MachineIRBuilder MIB) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
for (MachineBasicBlock &MBB : MF) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- BB2MBB[MBB.getBasicBlock()] = &MBB;
for (MachineInstr &MI : MBB) {
if (!isSpvIntrinsic(MI, Intrinsic::spv_switch))
continue;
- // Calls to spv_switch intrinsics representing IR switches.
- SmallVector<MachineInstr *, 8> NewOps;
- for (unsigned i = 2; i < MI.getNumOperands(); ++i) {
+
+ SmallVector<MachineOperand, 8> NewOperands;
+ NewOperands.push_back(MI.getOperand(0)); // Opcode
+ NewOperands.push_back(MI.getOperand(1)); // Condition
+ NewOperands.push_back(MI.getOperand(2)); // Default
+ for (unsigned i = 3; i < MI.getNumOperands(); i += 2) {
Register Reg = MI.getOperand(i).getReg();
- if (i % 2 == 1) {
- MachineInstr *ConstInstr = getDefInstrMaybeConstant(Reg, &MRI);
- NewOps.push_back(ConstInstr);
- } else {
- MachineInstr *BuildMBB = MRI.getVRegDef(Reg);
- assert(BuildMBB &&
- BuildMBB->getOpcode() == TargetOpcode::G_BLOCK_ADDR &&
- BuildMBB->getOperand(1).isBlockAddress() &&
- BuildMBB->getOperand(1).getBlockAddress());
- NewOps.push_back(BuildMBB);
- }
+ MachineInstr *ConstInstr = getDefInstrMaybeConstant(Reg, &MRI);
+ NewOperands.push_back(
+ MachineOperand::CreateCImm(ConstInstr->getOperand(1).getCImm()));
+
+ NewOperands.push_back(MI.getOperand(i + 1));
}
- Switches.push_back(std::make_pair(&MI, NewOps));
+
+ assert(MI.getNumOperands() == NewOperands.size());
+ while (MI.getNumOperands() > 0)
+ MI.removeOperand(0);
+ for (auto &MO : NewOperands)
+ MI.addOperand(MO);
}
}
+}
+// Some instructions are used during CodeGen but should never be emitted.
+// Cleaning up those.
+static void cleanupHelperInstructions(MachineFunction &MF) {
SmallPtrSet<MachineInstr *, 8> ToEraseMI;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (isSpvIntrinsic(MI, Intrinsic::spv_track_constant) ||
+ MI.getOpcode() == TargetOpcode::G_BRINDIRECT)
+ ToEraseMI.insert(&MI);
+ }
+ }
+
+ for (MachineInstr *MI : ToEraseMI)
+ MI->eraseFromParent();
+}
+
+// Find all usages of G_BLOCK_ADDR in our intrinsics and replace those
+// operands/registers by the actual MBB it references.
+static void processBlockAddr(MachineFunction &MF, SPIRVGlobalRegistry *GR,
+ MachineIRBuilder MIB) {
+ // Gather the reverse-mapping BB -> MBB.
+ DenseMap<const BasicBlock *, MachineBasicBlock *> BB2MBB;
+ for (MachineBasicBlock &MBB : MF)
+ BB2MBB[MBB.getBasicBlock()] = &MBB;
+
+ // Gather instructions requiring patching. For now, only those can use
+ // G_BLOCK_ADDR.
+ SmallVector<MachineInstr *, 8> InstructionsToPatch;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (isSpvIntrinsic(MI, Intrinsic::spv_switch) ||
+ isSpvIntrinsic(MI, Intrinsic::spv_loop_merge) ||
+ isSpvIntrinsic(MI, Intrinsic::spv_selection_merge))
+ InstructionsToPatch.push_back(&MI);
+ }
+ }
+
+ // For each instruction to fix, we replace all the G_BLOCK_ADDR operands by
+ // the actual MBB it references. Once those references updated, we can cleanup
+ // remaining G_BLOCK_ADDR references.
SmallPtrSet<MachineBasicBlock *, 8> ClearAddressTaken;
- for (auto &SwIt : Switches) {
- MachineInstr &MI = *SwIt.first;
- MachineBasicBlock *MBB = MI.getParent();
- SmallVector<MachineInstr *, 8> &Ins = SwIt.second;
+ SmallPtrSet<MachineInstr *, 8> ToEraseMI;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (MachineInstr *MI : InstructionsToPatch) {
SmallVector<MachineOperand, 8> NewOps;
- for (unsigned i = 0; i < Ins.size(); ++i) {
- if (Ins[i]->getOpcode() == TargetOpcode::G_BLOCK_ADDR) {
- BasicBlock *CaseBB =
- Ins[i]->getOperand(1).getBlockAddress()->getBasicBlock();
- auto It = BB2MBB.find(CaseBB);
- if (It == BB2MBB.end())
- report_fatal_error("cannot find a machine basic block by a basic "
- "block in a switch statement");
- MachineBasicBlock *Succ = It->second;
- ClearAddressTaken.insert(Succ);
- NewOps.push_back(MachineOperand::CreateMBB(Succ));
- if (!llvm::is_contained(MBB->successors(), Succ))
- MBB->addSuccessor(Succ);
- ToEraseMI.insert(Ins[i]);
- } else {
- NewOps.push_back(
- MachineOperand::CreateCImm(Ins[i]->getOperand(1).getCImm()));
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
+ // The operand is not a register, keep as-is.
+ if (!MI->getOperand(i).isReg()) {
+ NewOps.push_back(MI->getOperand(i));
+ continue;
+ }
+
+ Register Reg = MI->getOperand(i).getReg();
+ MachineInstr *BuildMBB = MRI.getVRegDef(Reg);
+ // The register is not the result of G_BLOCK_ADDR, keep as-is.
+ if (!BuildMBB || BuildMBB->getOpcode() != TargetOpcode::G_BLOCK_ADDR) {
+ NewOps.push_back(MI->getOperand(i));
+ continue;
}
+
+ assert(BuildMBB && BuildMBB->getOpcode() == TargetOpcode::G_BLOCK_ADDR &&
+ BuildMBB->getOperand(1).isBlockAddress() &&
+ BuildMBB->getOperand(1).getBlockAddress());
+ BasicBlock *BB =
+ BuildMBB->getOperand(1).getBlockAddress()->getBasicBlock();
+ auto It = BB2MBB.find(BB);
+ if (It == BB2MBB.end())
+ report_fatal_error("cannot find a machine basic block by a basic block "
+ "in a switch statement");
+ MachineBasicBlock *ReferencedBlock = It->second;
+ NewOps.push_back(MachineOperand::CreateMBB(ReferencedBlock));
+
+ ClearAddressTaken.insert(ReferencedBlock);
+ ToEraseMI.insert(BuildMBB);
}
- for (unsigned i = MI.getNumOperands() - 1; i > 1; --i)
- MI.removeOperand(i);
+
+ // Replace the operands.
+ assert(MI->getNumOperands() == NewOps.size());
+ while (MI->getNumOperands() > 0)
+ MI->removeOperand(0);
for (auto &MO : NewOps)
- MI.addOperand(MO);
- if (MachineInstr *Next = MI.getNextNode()) {
+ MI->addOperand(MO);
+
+ if (MachineInstr *Next = MI->getNextNode()) {
if (isSpvIntrinsic(*Next, Intrinsic::spv_track_constant)) {
ToEraseMI.insert(Next);
- Next = MI.getNextNode();
+ Next = MI->getNextNode();
}
if (Next && Next->getOpcode() == TargetOpcode::G_BRINDIRECT)
ToEraseMI.insert(Next);
}
}
+ // BlockAddress operands were used to keep information between passes,
+ // let's undo the "address taken" status to reflect that Succ doesn't
+ // actually correspond to an IR-level basic block.
+ for (MachineBasicBlock *Succ : ClearAddressTaken)
+ Succ->setAddressTakenIRBlock(nullptr);
+
// If we just delete G_BLOCK_ADDR instructions with BlockAddress operands,
// this leaves their BasicBlock counterparts in a "address taken" status. This
// would make AsmPrinter to generate a series of unneeded labels of a "Address
@@ -835,12 +895,6 @@ static void processSwitches(MachineFunction &MF, SPIRVGlobalRegistry *GR,
}
BlockAddrI->eraseFromParent();
}
-
- // BlockAddress operands were used to keep information between passes,
- // let's undo the "address taken" status to reflect that Succ doesn't
- // actually correspond to an IR-level basic block.
- for (MachineBasicBlock *Succ : ClearAddressTaken)
- Succ->setAddressTakenIRBlock(nullptr);
}
static bool isImplicitFallthrough(MachineBasicBlock &MBB) {
@@ -891,7 +945,11 @@ bool SPIRVPreLegalizer::runOnMachineFunction(MachineFunction &MF) {
foldConstantsIntoIntrinsics(MF, TrackedConstRegs);
insertBitcasts(MF, GR, MIB);
generateAssignInstrs(MF, GR, MIB, TargetExtConstTypes);
- processSwitches(MF, GR, MIB);
+
+ processSwitchesConstants(MF, GR, MIB);
+ processBlockAddr(MF, GR, MIB);
+ cleanupHelperInstructions(MF);
+
processInstrsWithTypeFolding(MF, GR, MIB);
removeImplicitFallthroughs(MF, MIB);
insertSpirvDecorations(MF, MIB);
diff --git a/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp b/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp
new file mode 100644
index 00000000000000..f663b7f427e235
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp
@@ -0,0 +1,1410 @@
+//===-- SPIRVStructurizer.cpp ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Analysis/SPIRVConvergenceRegionAnalysis.h"
+#include "SPIRV.h"
+#include "SPIRVSubtarget.h"
+#include "SPIRVTargetMachine.h"
+#include "SPIRVUtils.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsSPIRV.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
+#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
+#include <queue>
+#include <stack>
+
+using namespace llvm;
+using namespace SPIRV;
+
+namespace llvm {
+
+void initializeSPIRVStructurizerPass(PassRegistry &);
+
+namespace {
+
+using BlockSet = std::unordered_set<BasicBlock *>;
+using Edge = std::pair<BasicBlock *, BasicBlock *>;
+
+// This class implements a partial ordering visitor, which visits a cyclic graph
+// in natural topological-like ordering. Topological or...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks good. No major issue. There are a few typos, and a couple smaller issues that we can follow up on in another PR.
edd878b
to
c913cfb
Compare
7570721
to
949e0c5
Compare
Given the ongoing discussion around spirv-sim, I updated all the hlsl tests to be llvm-ir -> SPIR-V tests. All tests now use both FileCheck and spirv-sim. |
I didn't dive deeply into the subject, but I'd be eager to assist in a somewhat indirect manner. I've tried the PR locally to check this with SYCL and machine verifier (expensive checks: on). The former luckily didn't notice any changes, but the latter gives some insights. Probably I see one potential issue to address with respect to the validity of code between passes, namely after SPIRV pre legalizer. 37 of 63 test cases in the 'structurizer' folder are failing when running with 'llc -verify-machineinstrs', and it looks like the repeat offender is "Virtual register defs don't dominate all uses", like in
No existing tests with '-verify-machineinstrs' start to fail, so I guess there are no problems with code modifications wrt. the general/existing branching logic, but rather with the new use case introduced by the PR/added instructions. Another weird thing is that llvm specifically built with expensive checks on does hang now (or maybe hits a very long timeout, I haven't yet a patience to check) while running our test suite, so I'm not quite sure if this PR introduce new issues for Machine Verifier in general, in the part of existing test cases where we haven't yet inserted '-verify-machineinstrs'. |
To be absolutely sure I've just cleanly rebuilt llvm with |
The culprit is |
Thanks for looking into this.
Need to investigate this. |
Ok, so I've figured out 3 issues:
Fixing those 3 allowed me to get later in the verification process. I now have 2 more types of issues:
I'd be tempted to fix the issue 1 (the actual iterator issue), but leave the rest unfixed in this PR (which is already huge). |
That's really great, thank you.
In lib/Target/SPIRV/SPIRVInstrInfo.td lines 625-632 we actually set isTerminator=1 for both of those, but let's double check then.
Sure! |
Hopefully, the fix may be as simple as to change lib/Target/SPIRV/SPIRVInstrInfo.td line 620-621 |
Oh thanks! |
df2a2a0
to
b360f92
Compare
Rebased on main.
To satisfy both the SPIR-V spec and the MIR requirements, I had to be stricter on the block-ordering. The initial sorting I had was incomplete, and I had to use the partial topological sorting. But this sorting had a slight issue: when 2 BB had the same order, their order was not defined (same rank). I added the traversal index in addition to consistently compare those blocks. @VyacheslavLevytskyy There is one test I disabled: |
Thank you @Keenuts! I'll check it today against my test suites and get back to you. I'd like to to check what's wrong with llvm/test/CodeGen/SPIRV/instructions/ret-type.ll very briefly. Meanwhile one more question, just out of curiosity. Between the pass of SPIRVPrepareFunctions.cpp with its new |
Thanks!
I'm not sure either. I'd be tempted to say we should put the |
All looks good! After this is merged I plan to have a look into:
|
@michalpaszkowski let me know when I can merge this (I see no warning on my local build, neither on the CI) |
// adding an unreachable merge block. | ||
if (Merge == nullptr) { | ||
BranchInst *Br = cast<BranchInst>(BB.getTerminator()); | ||
assert(cast<BranchInst>(BB.getTerminator())->isUnconditional()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After loop unrolling or unswitching, we may have loops with SwitchInst
(instead of BranchInst
) in the loop header. Should we expect just BranchInst
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's a good point. And to be honest I'm not entirely sure right now.
I added an assert, explaining I made this assumption, but it might be something to revisit.
In addition to that, switches are a bit of a specific case I need to iterate on. Some scenarios are not completely covered.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, that's okay, thank you for clarifying!
@Keenuts Apologies for the delay! The patch looks good to me. I added a couple of minor comments (above), but if the pass would be extended in subsequent commits, please feel free to address these in the next pull request or reply later. Thank you for working on this, this is really exciting work! |
Thanks! No worries! |
This commit adds an initial SPIR-V structurizer. It leverages the previously merged passes, and the convergence region analysis to determine the correct merge and continue blocks for SPIR-V. The first part does a branch cleanup (simplifying switches, and legalizing them), then merge instructions are added to cycles, convergent and later divergent blocks. Then comes the important part: splitting critical edges, and making sure the divergent construct boundaries don't cross. - we split blocks with multiple headers into 2 blocks. - we split blocks that are a merge blocks for 2 or more constructs: SPIR-V spec disallow a merge block to be shared by 2 loop/switch/condition construct. - we split merge & continue blocks: SPIR-V spec disallow a basic block to be both a continue block, and a merge block. - we remove superfluous headers: when a header doesn't bring more info than the parent on the divergence state, it must be removed. This PR leverages the merged SPIR-V simulator for testing, as long as spirv-val. For now, most DXC structurization tests are passing. The unsupported ones are either caused by unsupported features like switches on boolean types, or switches in region exits, because the MergeExit pass doesn't support those yet (there is a FIXME). This PR is quite large, and the addition not trivial, so I tried to keep it simple. E.G: as soon as the CFG changes, I recompute the dominator trees and other structures instead of updating them.
Signed-off-by: Nathan Gauër <[email protected]>
Signed-off-by: Nathan Gauër <[email protected]>
Signed-off-by: Nathan Gauër <[email protected]>
Signed-off-by: Nathan Gauër <[email protected]>
Signed-off-by: Nathan Gauër <[email protected]>
This reverts commit 35efc27.
Signed-off-by: Nathan Gauër <[email protected]>
Basic block ordering was not completely correct, as it could end up placing a post-dominating block before the block it post-dominates. Using the partial ordering is more correct, but this was a bit unstable due to the unspecified order when 2 blocks shared the same rank. Fixed the iterator to be stable. Signed-off-by: Nathan Gauër <[email protected]>
The partial ordering visitor is used for block sorting and structurizing, and the sorting is required for both structurizing and normal SPIR-V emission. The reason why we sort after structurizing, then all target in prepare functions is that LLVM requires the MIR to be correct after each pass. So if we don't sort after the structurizer, then we fail this requirement. But structurizing is only required for vulkan env, meaning we also need a path to sort non-vulkan SPIR-V, as the sorting is also required in the general SPIR-V spec. Signed-off-by: Nathan Gauër <[email protected]>
All OpBranch should be marked as IsBarrier so MIR knows it can be used as the last instruction of a BB, and Op*Merge operands should be marked as "unknown" as the backend transforms those to spirv labels, which meaning the MIR verified doesn't interpret as BB labels. Signed-off-by: Nathan Gauër <[email protected]>
Signed-off-by: Nathan Gauër <[email protected]>
017395e
to
e00a2ba
Compare
rebased. Will merge tomorrow morning if CI is green. |
The uses of spirv-sim aren't gated by LLVM_INCLUDE_SPIRV_TOOLS_TESTS, so the substitution shouldn't be gated either. Fixes tests after llvm#107408
The uses of spirv-sim aren't gated by LLVM_INCLUDE_SPIRV_TOOLS_TESTS, so the substitution shouldn't be gated either. Fixes tests after #107408
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/146/builds/1200 Here is the relevant piece of the build log for the reference
|
This commit adds an initial SPIR-V structurizer.
It leverages the previously merged passes, and the convergence region analysis to determine the correct merge and continue blocks for SPIR-V.
The first part does a branch cleanup (simplifying switches, and legalizing them), then merge instructions are added to cycles, convergent and later divergent blocks.
Then comes the important part: splitting critical edges, and making sure the divergent construct boundaries don't cross.
This PR leverages the merged SPIR-V simulator for testing, as long as spirv-val. For now, most DXC structurization tests are passing. The unsupported ones are either caused by unsupported features like switches on boolean types, or switches in region exits, because the MergeExit pass doesn't support those yet (there is a FIXME).
This PR is quite large, and the addition not trivial, so I tried to keep it simple. E.G: as soon as the CFG changes, I recompute the dominator trees and other structures instead of updating them.