Skip to content

Commit d7f59c8

Browse files
authored
[mlir] Lower math dialect later in gpu-lower-to-nvvm-pipeline (#81489)
This PR moves lowering of math dialect later in the pipeline. Because math dialect is lowered correctly by createConvertGpuOpsToNVVMOps for GPU target, and it needs to run it first. Reland #78556
1 parent 785eddd commit d7f59c8

File tree

2 files changed

+31
-1
lines changed

2 files changed

+31
-1
lines changed

mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ void buildCommonPassPipeline(
5151
pm.addPass(createConvertVectorToSCFPass());
5252
pm.addPass(createConvertSCFToCFPass());
5353
pm.addPass(createConvertNVVMToLLVMPass());
54-
pm.addPass(createConvertMathToLLVMPass());
5554
pm.addPass(createConvertFuncToLLVMPass());
5655
pm.addPass(memref::createExpandStridedMetadataPass());
5756

@@ -98,6 +97,7 @@ void buildHostPostPipeline(OpPassManager &pm,
9897
GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
9998
gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat;
10099
pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions));
100+
pm.addPass(createConvertMathToLLVMPass());
101101
pm.addPass(createCanonicalizerPass());
102102
pm.addPass(createCSEPass());
103103
pm.addPass(createReconcileUnrealizedCastsPass());
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// REQUIRES: host-supports-nvptx
2+
// RUN: mlir-opt %s \
3+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=isa" \
4+
// RUN: | FileCheck %s
5+
6+
// RUN: mlir-opt %s \
7+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=isa" \
8+
// RUN: --mlir-print-ir-after=convert-gpu-to-nvvm 2>&1 \
9+
// RUN: | FileCheck %s --check-prefixes=CHECK-NVVM
10+
11+
// This test checks whether the GPU region is compiled correctly to PTX by
12+
// pipeline. It doesn't test IR for GPU side, but it can test Host IR and
13+
// generated PTX.
14+
15+
// CHECK-LABEL: llvm.func @test_math(%arg0: f32) {
16+
func.func @test_math(%arg0 : f32) {
17+
%c2 = arith.constant 2 : index
18+
%c1 = arith.constant 1 : index
19+
// CHECK: gpu.launch_func @test_math_kernel::@test_math_kernel
20+
// CHECK: gpu.binary @test_math_kernel [#gpu.object<#nvvm.target
21+
gpu.launch
22+
blocks(%0, %1, %2) in (%3 = %c1, %4 = %c1, %5 = %c1)
23+
threads(%6, %7, %8) in (%9 = %c2, %10 = %c1, %11 = %c1) {
24+
// CHECK-NVVM: __nv_expf
25+
%s1 = math.exp %arg0 : f32
26+
gpu.printf "%f" %s1 : f32
27+
gpu.terminator
28+
}
29+
return
30+
}

0 commit comments

Comments
 (0)