Skip to content

[SYCL][CUDA][libclc] Add support for atomic fp exchange and compare exchange #5937

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions clang/include/clang/Basic/BuiltinsNVPTX.def

Large diffs are not rendered by default.

218 changes: 218 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp

Large diffs are not rendered by default.

236 changes: 236 additions & 0 deletions clang/test/CodeGen/builtins-nvptx.c

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ __CLC_NVVM_ATOMIC_CAS(int, i, int, i, cas, CompareExchange)
__CLC_NVVM_ATOMIC_CAS(long, l, long, l, cas, CompareExchange)
__CLC_NVVM_ATOMIC_CAS(unsigned int, j, int, i, cas, CompareExchange)
__CLC_NVVM_ATOMIC_CAS(unsigned long, m, long, l, cas, CompareExchange)
__CLC_NVVM_ATOMIC_CAS(float, f, float, f, cas, CompareExchange)
__CLC_NVVM_ATOMIC_CAS(double, d, double, d, cas, CompareExchange)

#undef __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER
#undef __CLC_NVVM_ATOMIC_CAS
Expand Down
2 changes: 2 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/atomic/atomic_xchg.cl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ __CLC_NVVM_ATOMIC(int, i, int, i, xchg, _Z22__spirv_AtomicExchange)
__CLC_NVVM_ATOMIC(long, l, long, l, xchg, _Z22__spirv_AtomicExchange)
__CLC_NVVM_ATOMIC(unsigned int, j, int, i, xchg, _Z22__spirv_AtomicExchange)
__CLC_NVVM_ATOMIC(unsigned long, m, long, l, xchg, _Z22__spirv_AtomicExchange)
__CLC_NVVM_ATOMIC(float, f, float, f, xchg, _Z22__spirv_AtomicExchange)
__CLC_NVVM_ATOMIC(double, d, double, d, xchg, _Z22__spirv_AtomicExchange)

#undef __CLC_NVVM_ATOMIC_TYPES
#undef __CLC_NVVM_ATOMIC
Expand Down
16 changes: 10 additions & 6 deletions llvm/include/llvm/IR/IntrinsicsNVVM.td
Original file line number Diff line number Diff line change
Expand Up @@ -1375,50 +1375,54 @@ let TargetPrefix = "nvvm" in {
defm _f: PTXAtomicWithScope2_sem<llvm_anyfloat_ty>;
defm _i: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
}
multiclass PTXAtomicWithScope3_fi {
defm _f: PTXAtomicWithScope3_sem<llvm_anyfloat_ty>;
defm _i: PTXAtomicWithScope3_sem<llvm_anyint_ty>;
}
defm int_nvvm_ld_gen : PTXLdWithScope_fi;
defm int_nvvm_st_gen : PTXStWithScope_fi;
defm int_nvvm_atomic_add_gen : PTXAtomicWithScope2_fi;
defm int_nvvm_atomic_inc_gen_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_dec_gen_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_exch_gen_i: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_exch_gen: PTXAtomicWithScope2_fi;
defm int_nvvm_atomic_xor_gen_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_max_gen_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_max_gen_ui: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_min_gen_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_min_gen_ui: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_or_gen_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_and_gen_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_cas_gen_i : PTXAtomicWithScope3_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_cas_gen : PTXAtomicWithScope3_fi;

defm int_nvvm_ld_shared : PTXLdWithScope_fi;
defm int_nvvm_st_shared : PTXStWithScope_fi;
defm int_nvvm_atomic_add_shared : PTXAtomicWithScope2_fi;
defm int_nvvm_atomic_inc_shared_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_dec_shared_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_exch_shared_i: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_exch_shared: PTXAtomicWithScope2_fi;
defm int_nvvm_atomic_xor_shared_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_max_shared_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_max_shared_ui: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_min_shared_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_min_shared_ui: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_or_shared_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_and_shared_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_cas_shared_i : PTXAtomicWithScope3_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_cas_shared : PTXAtomicWithScope3_fi;

defm int_nvvm_ld_global : PTXLdWithScope_fi;
defm int_nvvm_st_global : PTXStWithScope_fi;
defm int_nvvm_atomic_add_global : PTXAtomicWithScope2_fi;
defm int_nvvm_atomic_inc_global_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_dec_global_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_exch_global_i: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_exch_global: PTXAtomicWithScope2_fi;
defm int_nvvm_atomic_xor_global_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_max_global_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_max_global_ui: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_min_global_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_min_global_ui: PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_or_global_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_and_global_i : PTXAtomicWithScope2_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_cas_global_i : PTXAtomicWithScope3_sem<llvm_anyint_ty>;
defm int_nvvm_atomic_cas_global : PTXAtomicWithScope3_fi;

// Bar.Sync

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -2264,6 +2264,8 @@ multiclass ATOM2_bitwise_impl<string OpStr> {
multiclass ATOM2_exch_impl<string OpStr> {
defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
defm _f32 : ATOM2S_impl<OpStr, "f", "b32", Float32Regs, f32imm, fpimm, f32, []>;
defm _f64 : ATOM2S_impl<OpStr, "f", "b64", Float64Regs, f64imm, fpimm, f64, []>;
}

// atom.{min,max}
Expand All @@ -2286,6 +2288,8 @@ multiclass ATOM2_incdec_impl<string OpStr> {
multiclass ATOM3_cas_impl<string OpStr> {
defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
defm _f32 : ATOM3S_impl<OpStr, "f", "b32", Float32Regs, f32imm, fpimm, f32, []>;
defm _f64 : ATOM3S_impl<OpStr, "f", "b64", Float64Regs, f64imm, fpimm, f64, []>;
}

defm INT_PTX_LD : ATOM_ld_impl;
Expand Down
Loading