[flang][cuda] Use the nvvm.vote.sync op for all and any #134433

clementval · 2025-04-04T18:54:31Z

NVVM operations are now available for all and any as well. Use the op and clean up the generation function to handle all the 3 vote sync kinds.

llvmbot · 2025-04-04T18:55:09Z

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタインクレメン) (clementval)

Changes

NVVM operations are now available for all and any as well. Use the op and clean up the generation function to handle all the 3 vote sync kinds.

Full diff: https://github.com/llvm/llvm-project/pull/134433.diff

3 Files Affected:

(modified) flang/include/flang/Optimizer/Builder/IntrinsicCall.h (+3-3)
(modified) flang/lib/Optimizer/Builder/IntrinsicCall.cpp (+14-41)
(modified) flang/test/Lower/CUDA/cuda-device-proc.cuf (+2-2)

diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 00b7b696eb4f9..246b58dbede5f 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -19,6 +19,7 @@
 #include "flang/Runtime/iostat-consts.h"
 #include "mlir/Dialect/Complex/IR/Complex.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
 #include "mlir/Dialect/Math/IR/Math.h"
 #include <optional>
 
@@ -448,9 +449,8 @@ struct IntrinsicLibrary {
                                llvm::ArrayRef<fir::ExtendedValue> args);
   fir::ExtendedValue genUnpack(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genVerify(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
-  mlir::Value genVoteAllSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
-  mlir::Value genVoteAnySync(mlir::Type, llvm::ArrayRef<mlir::Value>);
-  mlir::Value genVoteBallotSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
+  template <mlir::NVVM::VoteSyncKind kind>
+  mlir::Value genVoteSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
 
   /// Implement all conversion functions like DBLE, the first argument is
   /// the value to convert. There may be an additional KIND arguments that
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 702a55a49c953..eca88044501cd 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -48,7 +48,6 @@
 #include "mlir/Dialect/Complex/IR/Complex.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMTypes.h"
-#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
 #include "mlir/Dialect/Math/IR/Math.h"
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "llvm/Support/CommandLine.h"
@@ -262,7 +261,7 @@ static constexpr IntrinsicHandler handlers[]{
      {{{"mask", asAddr}, {"dim", asValue}}},
      /*isElemental=*/false},
     {"all_sync",
-     &I::genVoteAllSync,
+     &I::genVoteSync<mlir::NVVM::VoteSyncKind::all>,
      {{{"mask", asValue}, {"pred", asValue}}},
      /*isElemental=*/false},
     {"allocated",
@@ -275,7 +274,7 @@ static constexpr IntrinsicHandler handlers[]{
      {{{"mask", asAddr}, {"dim", asValue}}},
      /*isElemental=*/false},
     {"any_sync",
-     &I::genVoteAnySync,
+     &I::genVoteSync<mlir::NVVM::VoteSyncKind::any>,
      {{{"mask", asValue}, {"pred", asValue}}},
      /*isElemental=*/false},
     {"asind", &I::genAsind},
@@ -341,7 +340,7 @@ static constexpr IntrinsicHandler handlers[]{
     {"atomicsubl", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false},
     {"atomicxori", &I::genAtomicXor, {{{"a", asAddr}, {"v", asValue}}}, false},
     {"ballot_sync",
-     &I::genVoteBallotSync,
+     &I::genVoteSync<mlir::NVVM::VoteSyncKind::ballot>,
      {{{"mask", asValue}, {"pred", asValue}}},
      /*isElemental=*/false},
     {"bessel_jn",
@@ -6579,46 +6578,20 @@ IntrinsicLibrary::genMatchAllSync(mlir::Type resultType,
   return value;
 }
 
-static mlir::Value genVoteSync(fir::FirOpBuilder &builder, mlir::Location loc,
-                               llvm::StringRef funcName, mlir::Type resTy,
-                               llvm::ArrayRef<mlir::Value> args) {
-  mlir::MLIRContext *context = builder.getContext();
-  mlir::Type i32Ty = builder.getI32Type();
-  mlir::Type i1Ty = builder.getI1Type();
-  mlir::FunctionType ftype =
-      mlir::FunctionType::get(context, {i32Ty, i1Ty}, {resTy});
-  auto funcOp = builder.createFunction(loc, funcName, ftype);
-  llvm::SmallVector<mlir::Value> filteredArgs;
-  return builder.create<fir::CallOp>(loc, funcOp, args).getResult(0);
-}
-
-// ALL_SYNC
-mlir::Value IntrinsicLibrary::genVoteAllSync(mlir::Type resultType,
-                                             llvm::ArrayRef<mlir::Value> args) {
-  assert(args.size() == 2);
-  return genVoteSync(builder, loc, "llvm.nvvm.vote.all.sync",
-                     builder.getI1Type(), args);
-}
-
-// ANY_SYNC
-mlir::Value IntrinsicLibrary::genVoteAnySync(mlir::Type resultType,
-                                             llvm::ArrayRef<mlir::Value> args) {
-  assert(args.size() == 2);
-  return genVoteSync(builder, loc, "llvm.nvvm.vote.any.sync",
-                     builder.getI1Type(), args);
-}
-
-// BALLOT_SYNC
-mlir::Value
-IntrinsicLibrary::genVoteBallotSync(mlir::Type resultType,
-                                    llvm::ArrayRef<mlir::Value> args) {
+// ALL_SYNC, ANY_SYNC, BALLOT_SYNC
+template <mlir::NVVM::VoteSyncKind kind>
+mlir::Value IntrinsicLibrary::genVoteSync(mlir::Type resultType,
+                                          llvm::ArrayRef<mlir::Value> args) {
   assert(args.size() == 2);
   mlir::Value arg1 =
       builder.create<fir::ConvertOp>(loc, builder.getI1Type(), args[1]);
-  return builder
-      .create<mlir::NVVM::VoteSyncOp>(loc, resultType, args[0], arg1,
-                                      mlir::NVVM::VoteSyncKind::ballot)
-      .getResult();
+  mlir::Type resTy = kind == mlir::NVVM::VoteSyncKind::ballot
+                         ? builder.getI32Type()
+                         : builder.getI1Type();
+  auto voteRes =
+      builder.create<mlir::NVVM::VoteSyncOp>(loc, resTy, args[0], arg1, kind)
+          .getResult();
+  return builder.create<fir::ConvertOp>(loc, resultType, voteRes);
 }
 
 // MATCH_ANY_SYNC
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 7d6d920dfb2e8..8f5e6dd36da4e 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -301,8 +301,8 @@ attributes(device) subroutine testVote()
 end subroutine
 
 ! CHECK-LABEL: func.func @_QPtestvote()
-! CHECK: fir.call @llvm.nvvm.vote.all.sync
-! CHECK: fir.call @llvm.nvvm.vote.any.sync
+! CHECK: %{{.*}} = nvvm.vote.sync all %{{.*}}, %{{.*}} -> i1
+! CHECK: %{{.*}} = nvvm.vote.sync any %{{.*}}, %{{.*}} -> i1
 ! CHECK: %{{.*}} = nvvm.vote.sync ballot %{{.*}}, %{{.*}} -> i32
 
 ! CHECK-DAG: func.func private @__ldca_i4x4_(!fir.ref<!fir.array<4xi32>>, !fir.ref<!fir.array<4xi32>>)

[flang][cuda] Use the nvvm.vote.sync op for all and any

2367c0a

clementval requested a review from wangzpgi April 4, 2025 18:54

llvmbot added flang Flang issues not falling into any other category flang:fir-hlfir labels Apr 4, 2025

wangzpgi approved these changes Apr 4, 2025

View reviewed changes

clementval merged commit 24dfcc0 into llvm:main Apr 4, 2025
14 checks passed

clementval deleted the cuf_use_vote_op branch April 4, 2025 20:45

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[flang][cuda] Use the nvvm.vote.sync op for all and any #134433

[flang][cuda] Use the nvvm.vote.sync op for all and any #134433

clementval commented Apr 4, 2025

llvmbot commented Apr 4, 2025

[flang][cuda] Use the nvvm.vote.sync op for all and any #134433

[flang][cuda] Use the nvvm.vote.sync op for all and any #134433

Conversation

clementval commented Apr 4, 2025

llvmbot commented Apr 4, 2025