[mlir] Lower math dialect later in gpu-lower-to-nvvm-pipeline (#81489)

grypp · web-flow · commit d7f59c8fb83c · 2024-02-13T08:31:42.000+01:00
This PR moves lowering of math dialect later in the pipeline. Because math dialect is lowered correctly by createConvertGpuOpsToNVVMOps for GPU target, and it needs to run it first. Reland #78556
diff --git a/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp b/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp
@@ -51,7 +51,6 @@ void buildCommonPassPipeline(
   pm.addPass(createConvertVectorToSCFPass());
   pm.addPass(createConvertSCFToCFPass());
   pm.addPass(createConvertNVVMToLLVMPass());
-  pm.addPass(createConvertMathToLLVMPass());
   pm.addPass(createConvertFuncToLLVMPass());
   pm.addPass(memref::createExpandStridedMetadataPass());
 
@@ -98,6 +97,7 @@ void buildHostPostPipeline(OpPassManager &pm,
   GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
   gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat;
   pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions));
+  pm.addPass(createConvertMathToLLVMPass());
   pm.addPass(createCanonicalizerPass());
   pm.addPass(createCSEPass());
   pm.addPass(createReconcileUnrealizedCastsPass());
diff --git a/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir b/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
@@ -0,0 +1,30 @@
+// REQUIRES: host-supports-nvptx
+// RUN: mlir-opt %s \
+// RUN:  | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=isa" \
+// RUN:   | FileCheck %s
+
+// RUN: mlir-opt %s \
+// RUN:  | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=isa" \
+// RUN:    --mlir-print-ir-after=convert-gpu-to-nvvm 2>&1 \
+// RUN:  | FileCheck %s --check-prefixes=CHECK-NVVM
+
+// This test checks whether the GPU region is compiled correctly to PTX by 
+// pipeline. It doesn't test IR for GPU side, but it can test Host IR and 
+// generated PTX.
+
+// CHECK-LABEL: llvm.func @test_math(%arg0: f32) {
+func.func @test_math(%arg0 : f32) {
+    %c2 = arith.constant 2 : index
+    %c1 = arith.constant 1 : index
+    // CHECK: gpu.launch_func  @test_math_kernel::@test_math_kernel
+    // CHECK: gpu.binary @test_math_kernel  [#gpu.object<#nvvm.target
+    gpu.launch 
+        blocks(%0, %1, %2) in (%3 = %c1, %4 = %c1, %5 = %c1) 
+        threads(%6, %7, %8) in (%9 = %c2, %10 = %c1, %11 = %c1) { 
+        // CHECK-NVVM: __nv_expf 
+        %s1 = math.exp %arg0 : f32
+        gpu.printf "%f" %s1 : f32
+        gpu.terminator
+    }
+    return
+}