Skip to content

Commit 0ce40f4

Browse files
authored
[SYCL] [NATIVECPU] Implement generic atomic store for generic target (#13428)
Implements `__spirv_AtomicStore` similarly to #13249. Note that the `IMPL` macro has been extended to take in a `SUB` parameter, similarly to what happens for [amdgcn](https://github.com/intel/llvm/blob/a5a0e1296269195de90949537597b2788bb5e836/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_store.cl#L13) and [ptx](https://github.com/intel/llvm/blob/a5a0e1296269195de90949537597b2788bb5e836/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl#L39).
1 parent 642e8d0 commit 0ce40f4

File tree

2 files changed

+49
-20
lines changed

2 files changed

+49
-20
lines changed

libclc/generic/libspirv/atomic/atomic_load.cl

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ TYPE __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_##MEM_ORDER(volatile AS c
3232

3333
#define IMPL_AS(TYPE, TYPE_MANGLED, PREFIX, BYTE_SIZE) \
3434
IMPL(TYPE, TYPE_MANGLED, global, U3AS1, PREFIX, BYTE_SIZE) \
35-
IMPL(TYPE, TYPE_MANGLED, local, U3AS3, PREFIX, BYTE_SIZE) \
36-
IMPL(TYPE, TYPE_MANGLED, , , PREFIX, BYTE_SIZE)
35+
IMPL(TYPE, TYPE_MANGLED, local, U3AS3, PREFIX, BYTE_SIZE)
3736

3837
IMPL_AS(int, i, , 4)
3938
IMPL_AS(unsigned int, j, u, 4)
@@ -43,6 +42,21 @@ IMPL_AS(long, l, , 8)
4342
IMPL_AS(unsigned long, m, u, 8)
4443
#endif
4544

45+
#if _CLC_GENERIC_AS_SUPPORTED
46+
47+
#define IMPL_GENERIC(TYPE, TYPE_MANGLED, PREFIX, BYTE_SIZE) \
48+
IMPL(TYPE, TYPE_MANGLED, , , PREFIX, BYTE_SIZE)
49+
50+
IMPL_GENERIC(int, i, , 4)
51+
IMPL_GENERIC(unsigned int, j, u, 4)
52+
53+
#ifdef cl_khr_int64_base_atomics
54+
IMPL_GENERIC(long, l, , 8)
55+
IMPL_GENERIC(unsigned long, m, u, 8)
56+
#endif
57+
58+
#endif //_CLC_GENERIC_AS_SUPPORTED
59+
4660
#undef FDECL
4761
#undef IMPL_AS
4862
#undef IMPL

libclc/generic/libspirv/atomic/atomic_store.cl

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -29,26 +29,26 @@ _Z19__spirv_AtomicStorePU3AS3fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE
2929
#define FDECL(TYPE, PREFIX, AS, BYTE_SIZE, MEM_ORDER) \
3030
TYPE __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_##MEM_ORDER(volatile AS const TYPE *, TYPE);
3131

32-
#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, PREFIX, BYTE_SIZE) \
33-
FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \
34-
FDECL(TYPE, PREFIX, AS, BYTE_SIZE, release) \
35-
FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \
36-
_CLC_DEF void \
37-
_Z19__spirv_AtomicStorePU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \
38-
volatile AS TYPE *p, enum Scope scope, \
39-
enum MemorySemanticsMask semantics, TYPE val) { \
40-
if (semantics == Release) { \
41-
__clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_release(p, val); \
42-
} else if (semantics == SequentiallyConsistent) { \
43-
__clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_seq_cst(p, val); \
44-
} else { \
45-
__clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_unordered(p, val); \
46-
} \
32+
#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, PREFIX, BYTE_SIZE) \
33+
FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \
34+
FDECL(TYPE, PREFIX, AS, BYTE_SIZE, release) \
35+
FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \
36+
_CLC_DEF void \
37+
_Z19__spirv_AtomicStoreP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \
38+
volatile AS TYPE *p, enum Scope scope, \
39+
enum MemorySemanticsMask semantics, TYPE val) { \
40+
if (semantics == Release) { \
41+
__clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_release(p, val); \
42+
} else if (semantics == SequentiallyConsistent) { \
43+
__clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_seq_cst(p, val); \
44+
} else { \
45+
__clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_unordered(p, val); \
46+
} \
4747
}
4848

49-
#define IMPL_AS(TYPE, TYPE_MANGLED, PREFIX, BYTE_SIZE) \
50-
IMPL(TYPE, TYPE_MANGLED, global, AS1, PREFIX, BYTE_SIZE) \
51-
IMPL(TYPE, TYPE_MANGLED, local, AS3, PREFIX, BYTE_SIZE)
49+
#define IMPL_AS(TYPE, TYPE_MANGLED, PREFIX, BYTE_SIZE) \
50+
IMPL(TYPE, TYPE_MANGLED, global, U3AS1, 1, PREFIX, BYTE_SIZE) \
51+
IMPL(TYPE, TYPE_MANGLED, local, U3AS3, 1, PREFIX, BYTE_SIZE)
5252

5353
IMPL_AS(int, i, , 4)
5454
IMPL_AS(unsigned int, j, u, 4)
@@ -58,6 +58,21 @@ IMPL_AS(long, l, , 8)
5858
IMPL_AS(unsigned long, m, u, 8)
5959
#endif
6060

61+
#if _CLC_GENERIC_AS_SUPPORTED
62+
63+
#define IMPL_GENERIC(TYPE, TYPE_MANGLED, PREFIX, BYTE_SIZE) \
64+
IMPL(TYPE, TYPE_MANGLED, , , 0, PREFIX, BYTE_SIZE)
65+
66+
IMPL_GENERIC(int, i, , 4)
67+
IMPL_GENERIC(unsigned int, j, u, 4)
68+
69+
#ifdef cl_khr_int64_base_atomics
70+
IMPL_GENERIC(long, l, , 8)
71+
IMPL_GENERIC(unsigned long, m, u, 8)
72+
#endif
73+
74+
#endif //_CLC_GENERIC_AS_SUPPORTED
75+
6176
#undef FDECL
6277
#undef IMPL_AS
6378
#undef IMPL

0 commit comments

Comments
 (0)