Skip to content

Commit 46a6600

Browse files
authored
[SYCL][CUDA][HIP] Propagate -foffload-fp32-prec-sqrt (#17044)
-foffload-fp32-prec-sqrt and -fsycl-fp32-prec-sqrt options should be merged together as they have the same purpose. In this patch ability of -fsycl-fp32-prec-sqrt to pass appropriate options to CUDA and HIP compilers was added to -foffload-fp32-prec-sqrt to allow such merge in the future. --------- Signed-off-by: Sidorov, Dmitry <[email protected]>
1 parent 928ed3e commit 46a6600

File tree

5 files changed

+28
-3
lines changed

5 files changed

+28
-3
lines changed

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1056,7 +1056,8 @@ llvm::SmallVector<std::string, 12> ROCMToolChain::getCommonDeviceLibNames(
10561056
bool CorrectSqrt = false;
10571057
if (DeviceOffloadingKind == Action::OFK_SYCL) {
10581058
// When using SYCL, sqrt is only correctly rounded if the flag is specified
1059-
CorrectSqrt = DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt);
1059+
CorrectSqrt = DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt) ||
1060+
DriverArgs.hasArg(options::OPT_foffload_fp32_prec_sqrt);
10601061
} else
10611062
CorrectSqrt = DriverArgs.hasFlag(
10621063
options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -965,7 +965,8 @@ void CudaToolChain::addClangTargetOptions(
965965
if (DeviceOffloadingKind == Action::OFK_SYCL) {
966966
SYCLInstallation.addSYCLIncludeArgs(DriverArgs, CC1Args);
967967

968-
if (DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt))
968+
if (DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt) ||
969+
DriverArgs.hasArg(options::OPT_foffload_fp32_prec_sqrt))
969970
CC1Args.push_back("-fcuda-prec-sqrt");
970971

971972
bool FastRelaxedMath = DriverArgs.hasFlag(

clang/test/Driver/sycl-amdgcn-sqrt.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,14 @@
99
// RUN: %s \
1010
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CORRECT %s
1111

12+
// RUN: %clang -### \
13+
// RUN: -fsycl -fsycl-targets=amdgcn-amd-amdhsa -fno-sycl-libspirv \
14+
// RUN: -Xsycl-target-backend --offload-arch=gfx900 \
15+
// RUN: -foffload-fp32-prec-sqrt \
16+
// RUN: --rocm-path=%S/Inputs/rocm \
17+
// RUN: %s \
18+
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CORRECT %s
19+
1220
// CHECK-CORRECT: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc"
1321

1422
// RUN: %clang -### \
@@ -28,6 +36,14 @@
2836
// RUN: %s \
2937
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CONFLICT %s
3038

39+
// RUN: %clang -### \
40+
// RUN: -fsycl -fsycl-targets=amdgcn-amd-amdhsa -fno-sycl-libspirv \
41+
// RUN: -Xsycl-target-backend --offload-arch=gfx900 \
42+
// RUN: -foffload-fp32-prec-sqrt -fno-hip-fp32-correctly-rounded-divide-sqrt \
43+
// RUN: --rocm-path=%S/Inputs/rocm \
44+
// RUN: %s \
45+
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CONFLICT %s
46+
3147
// CHECK-CONFLICT: warning: argument unused during compilation: '-fno-hip-fp32-correctly-rounded-divide-sqrt'
3248
// CHECK-CONFLICT: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc"
3349

clang/test/Driver/sycl-nvptx-sqrt.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@
66
// RUN: %s \
77
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CORRECT %s
88

9+
// RUN: %clang -### -nocudalib \
10+
// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda \
11+
// RUN: -foffload-fp32-prec-sqrt \
12+
// RUN: %s \
13+
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CORRECT %s
14+
915
// CHECK-CORRECT: "-fcuda-prec-sqrt"
1016

1117
// RUN: %clang -### -nocudalib \

llvm/docs/NVPTXUsage.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1049,8 +1049,9 @@ The following sets the ftz flag to 1, and the precise sqrt flag to 1.
10491049

10501050
.. code-block:: llvm
10511051
1052-
!llvm.module.flags = !{!0}
1052+
!llvm.module.flags = !{!0, !1}
10531053
!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
1054+
!1 = !{i32 4, !"nvvm-reflect-prec-sqrt", i32 1}
10541055
10551056
(``i32 4`` indicates that the value set here overrides the value in another
10561057
module we link with. See the `LangRef <LangRef.html#module-flags-metadata>`

0 commit comments

Comments
 (0)