diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 99f37aa5c286b..99bf6171c3d15 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" @@ -63,6 +64,12 @@ static cl::opt UseShortPointersOpt( "Use 32-bit pointers for accessing const/local/shared address spaces."), cl::init(false), cl::Hidden); +static cl::opt + UseIPSCCPO0("use-ipsccp-nvptx-O0", + cl::desc("Use IPSCCP pass at O0 as a temp solution for " + "nvvm-reflect dead-code errors."), + cl::init(true), cl::Hidden); + namespace llvm { void initializeLocalAccessorToSharedMemoryPass(PassRegistry &); @@ -327,6 +334,10 @@ void NVPTXPassConfig::addIRPasses() { const NVPTXSubtarget &ST = *getTM().getSubtargetImpl(); addPass(createNVVMReflectPass(ST.getSmVersion())); + if (getOptLevel() == CodeGenOpt::None && UseIPSCCPO0) { + addPass(createIPSCCPPass()); + } + // FIXME: should the target triple check be done by the pass itself? // See createNVPTXLowerArgsPass as an example if (getTM().getTargetTriple().getOS() == Triple::CUDA) { diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll index 099a26afb940b..c04dd8a5eb54a 100644 --- a/llvm/test/CodeGen/NVPTX/param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll @@ -1,5 +1,5 @@ ; Verifies correctness of load/store of parameters and return values. -; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs -use-ipsccp-nvptx-O0=false | FileCheck -allow-deprecated-dag-overlap %s %s_i1 = type { i1 } %s_i8 = type { i8 } diff --git a/sycl/doc/GetStartedGuide.md b/sycl/doc/GetStartedGuide.md index 1f53b9f4697c8..1c1ad8087e302 100644 --- a/sycl/doc/GetStartedGuide.md +++ b/sycl/doc/GetStartedGuide.md @@ -825,6 +825,18 @@ which contains all the symbols required. significantly slower but matches the default precision used by `nvcc`, and this `clang++` flag is equivalent to the `nvcc` `-prec-sqrt` flag, except that it defaults to `false`. +* No Opt (O0) uses the IPSCCP compiler pass by default, although the IPSCCP pass + can be switched off at O0 using the `-mllvm -use-ipsccp-nvptx-O0=false` flag at + the user's discretion. + The reason that the IPSCCP pass is used by default even at O0 is that there is + currently an unresolved issue with the nvvm-reflect compiler pass: This pass is + used to pick the correct branches depending on the SM version which can be + optionally specified by the `--cuda-gpu-arch` flag. + If the arch flag is not specified by the user, the default value, SM 50, is used. + Without the execution of the IPSCCP pass at -O0 when using a low SM version, + dead instructions which require a higher SM version can remain. Since + corresponding issues occur in other backends future work will aim for a + universal solution to these issues. ### HIP back-end limitations