Skip to content

Commit 1d84c99

Browse files
authored
[SYCL][CUDA][libclc] Add support for atomic fp exchange and compare exchange (#5937)
Adds support for float and double exchange and compare exchange atomic operations in CUDA libclc. Currently these are used only in `sycl::atomic`, which has no tests in the test suite, so I am not adding any for this change either. Closes #5096.
1 parent 66e207e commit 1d84c99

File tree

8 files changed

+1048
-6
lines changed

8 files changed

+1048
-6
lines changed

clang/include/clang/Basic/BuiltinsNVPTX.def

Lines changed: 144 additions & 0 deletions
Large diffs are not rendered by default.

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 218 additions & 0 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/builtins-nvptx.c

Lines changed: 236 additions & 0 deletions
Large diffs are not rendered by default.

libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ __CLC_NVVM_ATOMIC_CAS(int, i, int, i, cas, CompareExchange)
8787
__CLC_NVVM_ATOMIC_CAS(long, l, long, l, cas, CompareExchange)
8888
__CLC_NVVM_ATOMIC_CAS(unsigned int, j, int, i, cas, CompareExchange)
8989
__CLC_NVVM_ATOMIC_CAS(unsigned long, m, long, l, cas, CompareExchange)
90+
__CLC_NVVM_ATOMIC_CAS(float, f, float, f, cas, CompareExchange)
91+
__CLC_NVVM_ATOMIC_CAS(double, d, double, d, cas, CompareExchange)
9092

9193
#undef __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER
9294
#undef __CLC_NVVM_ATOMIC_CAS

libclc/ptx-nvidiacl/libspirv/atomic/atomic_xchg.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ __CLC_NVVM_ATOMIC(int, i, int, i, xchg, _Z22__spirv_AtomicExchange)
1414
__CLC_NVVM_ATOMIC(long, l, long, l, xchg, _Z22__spirv_AtomicExchange)
1515
__CLC_NVVM_ATOMIC(unsigned int, j, int, i, xchg, _Z22__spirv_AtomicExchange)
1616
__CLC_NVVM_ATOMIC(unsigned long, m, long, l, xchg, _Z22__spirv_AtomicExchange)
17+
__CLC_NVVM_ATOMIC(float, f, float, f, xchg, _Z22__spirv_AtomicExchange)
18+
__CLC_NVVM_ATOMIC(double, d, double, d, xchg, _Z22__spirv_AtomicExchange)
1719

1820
#undef __CLC_NVVM_ATOMIC_TYPES
1921
#undef __CLC_NVVM_ATOMIC

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1375,50 +1375,54 @@ let TargetPrefix = "nvvm" in {
13751375
defm _f: PTXAtomicWithScope2_sem<llvm_anyfloat_ty>;
13761376
defm _i: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
13771377
}
1378+
multiclass PTXAtomicWithScope3_fi {
1379+
defm _f: PTXAtomicWithScope3_sem<llvm_anyfloat_ty>;
1380+
defm _i: PTXAtomicWithScope3_sem<llvm_anyint_ty>;
1381+
}
13781382
defm int_nvvm_ld_gen : PTXLdWithScope_fi;
13791383
defm int_nvvm_st_gen : PTXStWithScope_fi;
13801384
defm int_nvvm_atomic_add_gen : PTXAtomicWithScope2_fi;
13811385
defm int_nvvm_atomic_inc_gen_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
13821386
defm int_nvvm_atomic_dec_gen_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
1383-
defm int_nvvm_atomic_exch_gen_i: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
1387+
defm int_nvvm_atomic_exch_gen: PTXAtomicWithScope2_fi;
13841388
defm int_nvvm_atomic_xor_gen_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
13851389
defm int_nvvm_atomic_max_gen_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
13861390
defm int_nvvm_atomic_max_gen_ui: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
13871391
defm int_nvvm_atomic_min_gen_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
13881392
defm int_nvvm_atomic_min_gen_ui: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
13891393
defm int_nvvm_atomic_or_gen_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
13901394
defm int_nvvm_atomic_and_gen_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
1391-
defm int_nvvm_atomic_cas_gen_i : PTXAtomicWithScope3_sem<llvm_anyint_ty>;
1395+
defm int_nvvm_atomic_cas_gen : PTXAtomicWithScope3_fi;
13921396

13931397
defm int_nvvm_ld_shared : PTXLdWithScope_fi;
13941398
defm int_nvvm_st_shared : PTXStWithScope_fi;
13951399
defm int_nvvm_atomic_add_shared : PTXAtomicWithScope2_fi;
13961400
defm int_nvvm_atomic_inc_shared_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
13971401
defm int_nvvm_atomic_dec_shared_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
1398-
defm int_nvvm_atomic_exch_shared_i: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
1402+
defm int_nvvm_atomic_exch_shared: PTXAtomicWithScope2_fi;
13991403
defm int_nvvm_atomic_xor_shared_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
14001404
defm int_nvvm_atomic_max_shared_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
14011405
defm int_nvvm_atomic_max_shared_ui: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
14021406
defm int_nvvm_atomic_min_shared_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
14031407
defm int_nvvm_atomic_min_shared_ui: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
14041408
defm int_nvvm_atomic_or_shared_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
14051409
defm int_nvvm_atomic_and_shared_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
1406-
defm int_nvvm_atomic_cas_shared_i : PTXAtomicWithScope3_sem<llvm_anyint_ty>;
1410+
defm int_nvvm_atomic_cas_shared : PTXAtomicWithScope3_fi;
14071411

14081412
defm int_nvvm_ld_global : PTXLdWithScope_fi;
14091413
defm int_nvvm_st_global : PTXStWithScope_fi;
14101414
defm int_nvvm_atomic_add_global : PTXAtomicWithScope2_fi;
14111415
defm int_nvvm_atomic_inc_global_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
14121416
defm int_nvvm_atomic_dec_global_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
1413-
defm int_nvvm_atomic_exch_global_i: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
1417+
defm int_nvvm_atomic_exch_global: PTXAtomicWithScope2_fi;
14141418
defm int_nvvm_atomic_xor_global_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
14151419
defm int_nvvm_atomic_max_global_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
14161420
defm int_nvvm_atomic_max_global_ui: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
14171421
defm int_nvvm_atomic_min_global_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
14181422
defm int_nvvm_atomic_min_global_ui: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
14191423
defm int_nvvm_atomic_or_global_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
14201424
defm int_nvvm_atomic_and_global_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
1421-
defm int_nvvm_atomic_cas_global_i : PTXAtomicWithScope3_sem<llvm_anyint_ty>;
1425+
defm int_nvvm_atomic_cas_global : PTXAtomicWithScope3_fi;
14221426

14231427
// Bar.Sync
14241428

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2264,6 +2264,8 @@ multiclass ATOM2_bitwise_impl<string OpStr> {
22642264
multiclass ATOM2_exch_impl<string OpStr> {
22652265
defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
22662266
defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
2267+
defm _f32 : ATOM2S_impl<OpStr, "f", "b32", Float32Regs, f32imm, fpimm, f32, []>;
2268+
defm _f64 : ATOM2S_impl<OpStr, "f", "b64", Float64Regs, f64imm, fpimm, f64, []>;
22672269
}
22682270

22692271
// atom.{min,max}
@@ -2286,6 +2288,8 @@ multiclass ATOM2_incdec_impl<string OpStr> {
22862288
multiclass ATOM3_cas_impl<string OpStr> {
22872289
defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
22882290
defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
2291+
defm _f32 : ATOM3S_impl<OpStr, "f", "b32", Float32Regs, f32imm, fpimm, f32, []>;
2292+
defm _f64 : ATOM3S_impl<OpStr, "f", "b64", Float64Regs, f64imm, fpimm, f64, []>;
22892293
}
22902294

22912295
defm INT_PTX_LD : ATOM_ld_impl;

0 commit comments

Comments
 (0)