diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index 00b7b696eb4f9..246b58dbede5f 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -19,6 +19,7 @@ #include "flang/Runtime/iostat-consts.h" #include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/LLVMIR/NVVMDialect.h" #include "mlir/Dialect/Math/IR/Math.h" #include @@ -448,9 +449,8 @@ struct IntrinsicLibrary { llvm::ArrayRef args); fir::ExtendedValue genUnpack(mlir::Type, llvm::ArrayRef); fir::ExtendedValue genVerify(mlir::Type, llvm::ArrayRef); - mlir::Value genVoteAllSync(mlir::Type, llvm::ArrayRef); - mlir::Value genVoteAnySync(mlir::Type, llvm::ArrayRef); - mlir::Value genVoteBallotSync(mlir::Type, llvm::ArrayRef); + template + mlir::Value genVoteSync(mlir::Type, llvm::ArrayRef); /// Implement all conversion functions like DBLE, the first argument is /// the value to convert. There may be an additional KIND arguments that diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 702a55a49c953..eca88044501cd 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -48,7 +48,6 @@ #include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" -#include "mlir/Dialect/LLVMIR/NVVMDialect.h" #include "mlir/Dialect/Math/IR/Math.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "llvm/Support/CommandLine.h" @@ -262,7 +261,7 @@ static constexpr IntrinsicHandler handlers[]{ {{{"mask", asAddr}, {"dim", asValue}}}, /*isElemental=*/false}, {"all_sync", - &I::genVoteAllSync, + &I::genVoteSync, {{{"mask", asValue}, {"pred", asValue}}}, /*isElemental=*/false}, {"allocated", @@ -275,7 +274,7 @@ static constexpr IntrinsicHandler handlers[]{ {{{"mask", asAddr}, {"dim", asValue}}}, /*isElemental=*/false}, {"any_sync", - &I::genVoteAnySync, + &I::genVoteSync, {{{"mask", asValue}, {"pred", asValue}}}, /*isElemental=*/false}, {"asind", &I::genAsind}, @@ -341,7 +340,7 @@ static constexpr IntrinsicHandler handlers[]{ {"atomicsubl", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false}, {"atomicxori", &I::genAtomicXor, {{{"a", asAddr}, {"v", asValue}}}, false}, {"ballot_sync", - &I::genVoteBallotSync, + &I::genVoteSync, {{{"mask", asValue}, {"pred", asValue}}}, /*isElemental=*/false}, {"bessel_jn", @@ -6579,46 +6578,20 @@ IntrinsicLibrary::genMatchAllSync(mlir::Type resultType, return value; } -static mlir::Value genVoteSync(fir::FirOpBuilder &builder, mlir::Location loc, - llvm::StringRef funcName, mlir::Type resTy, - llvm::ArrayRef args) { - mlir::MLIRContext *context = builder.getContext(); - mlir::Type i32Ty = builder.getI32Type(); - mlir::Type i1Ty = builder.getI1Type(); - mlir::FunctionType ftype = - mlir::FunctionType::get(context, {i32Ty, i1Ty}, {resTy}); - auto funcOp = builder.createFunction(loc, funcName, ftype); - llvm::SmallVector filteredArgs; - return builder.create(loc, funcOp, args).getResult(0); -} - -// ALL_SYNC -mlir::Value IntrinsicLibrary::genVoteAllSync(mlir::Type resultType, - llvm::ArrayRef args) { - assert(args.size() == 2); - return genVoteSync(builder, loc, "llvm.nvvm.vote.all.sync", - builder.getI1Type(), args); -} - -// ANY_SYNC -mlir::Value IntrinsicLibrary::genVoteAnySync(mlir::Type resultType, - llvm::ArrayRef args) { - assert(args.size() == 2); - return genVoteSync(builder, loc, "llvm.nvvm.vote.any.sync", - builder.getI1Type(), args); -} - -// BALLOT_SYNC -mlir::Value -IntrinsicLibrary::genVoteBallotSync(mlir::Type resultType, - llvm::ArrayRef args) { +// ALL_SYNC, ANY_SYNC, BALLOT_SYNC +template +mlir::Value IntrinsicLibrary::genVoteSync(mlir::Type resultType, + llvm::ArrayRef args) { assert(args.size() == 2); mlir::Value arg1 = builder.create(loc, builder.getI1Type(), args[1]); - return builder - .create(loc, resultType, args[0], arg1, - mlir::NVVM::VoteSyncKind::ballot) - .getResult(); + mlir::Type resTy = kind == mlir::NVVM::VoteSyncKind::ballot + ? builder.getI32Type() + : builder.getI1Type(); + auto voteRes = + builder.create(loc, resTy, args[0], arg1, kind) + .getResult(); + return builder.create(loc, resultType, voteRes); } // MATCH_ANY_SYNC diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 7d6d920dfb2e8..8f5e6dd36da4e 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -301,8 +301,8 @@ attributes(device) subroutine testVote() end subroutine ! CHECK-LABEL: func.func @_QPtestvote() -! CHECK: fir.call @llvm.nvvm.vote.all.sync -! CHECK: fir.call @llvm.nvvm.vote.any.sync +! CHECK: %{{.*}} = nvvm.vote.sync all %{{.*}}, %{{.*}} -> i1 +! CHECK: %{{.*}} = nvvm.vote.sync any %{{.*}}, %{{.*}} -> i1 ! CHECK: %{{.*}} = nvvm.vote.sync ballot %{{.*}}, %{{.*}} -> i32 ! CHECK-DAG: func.func private @__ldca_i4x4_(!fir.ref>, !fir.ref>)