@@ -104,6 +104,47 @@ void add_fetch_test(queue q, size_t N) {
104
104
assert (std::unique (output.begin (), output.end ()) == output.end ());
105
105
}
106
106
107
+ template <template <typename , memory_order, memory_scope, access::address_space>
108
+ class AtomicRef ,
109
+ access::address_space space, typename T, typename Difference = T,
110
+ memory_order order = memory_order::relaxed,
111
+ memory_scope scope = memory_scope::device>
112
+ void add_fetch_test_usm_shared (queue q, size_t N) {
113
+ T *sum = malloc_shared<T>(1 , q);
114
+ T *output = malloc_shared<T>(N, q);
115
+ T *output_begin = &output[0 ], *output_end = &output[N];
116
+ sum[0 ] = T (0 );
117
+ std::fill (output_begin, output_end, T (0 ));
118
+ {
119
+ q.submit ([&](handler &cgh) {
120
+ cgh.parallel_for (range<1 >(N), [=](item<1 > it) {
121
+ int gid = it.get_id (0 );
122
+ auto atm = AtomicRef < T,
123
+ (order == memory_order::acquire || order == memory_order::release)
124
+ ? memory_order::relaxed
125
+ : order,
126
+ scope, space > (sum[0 ]);
127
+ output[gid] = atm.fetch_add (Difference (1 ), order);
128
+ });
129
+ }).wait_and_throw ();
130
+ }
131
+
132
+ // All work-items increment by 1, so final value should be equal to N.
133
+ assert (sum[0 ] == T (N));
134
+
135
+ // Fetch returns original value: will be in [0, N-1].
136
+ auto min_e = std::min_element (output_begin, output_end);
137
+ auto max_e = std::max_element (output_begin, output_end);
138
+ assert (*min_e == T (0 ) && *max_e == T (N - 1 ));
139
+
140
+ // Intermediate values should be unique.
141
+ std::sort (output_begin, output_end);
142
+ assert (std::unique (output_begin, output_end) == output_end);
143
+
144
+ free (sum, q);
145
+ free (output, q);
146
+ }
147
+
107
148
template <template <typename , memory_order, memory_scope, access::address_space>
108
149
class AtomicRef ,
109
150
access::address_space space, typename T, typename Difference = T,
@@ -146,6 +187,47 @@ void add_plus_equal_test(queue q, size_t N) {
146
187
assert (std::unique (output.begin (), output.end ()) == output.end ());
147
188
}
148
189
190
+ template <template <typename , memory_order, memory_scope, access::address_space>
191
+ class AtomicRef ,
192
+ access::address_space space, typename T, typename Difference = T,
193
+ memory_order order = memory_order::relaxed,
194
+ memory_scope scope = memory_scope::device>
195
+ void add_plus_equal_test_usm_shared (queue q, size_t N) {
196
+ T *sum = malloc_shared<T>(1 , q);
197
+ T *output = malloc_shared<T>(N, q);
198
+ T *output_begin = &output[0 ], *output_end = &output[N];
199
+ sum[0 ] = T (0 );
200
+ std::fill (output_begin, output_end, T (0 ));
201
+ {
202
+ q.submit ([&](handler &cgh) {
203
+ cgh.parallel_for (range<1 >(N), [=](item<1 > it) {
204
+ int gid = it.get_id (0 );
205
+ auto atm = AtomicRef < T,
206
+ (order == memory_order::acquire || order == memory_order::release)
207
+ ? memory_order::relaxed
208
+ : order,
209
+ scope, space > (sum[0 ]);
210
+ output[gid] = atm += Difference (1 );
211
+ });
212
+ }).wait_and_throw ();
213
+ }
214
+
215
+ // All work-items increment by 1, so final value should be equal to N.
216
+ assert (sum[0 ] == T (N));
217
+
218
+ // += returns updated value: will be in [1, N].
219
+ auto min_e = std::min_element (output_begin, output_end);
220
+ auto max_e = std::max_element (output_begin, output_end);
221
+ assert (*min_e == T (1 ) && *max_e == T (N));
222
+
223
+ // Intermediate values should be unique.
224
+ std::sort (output_begin, output_end);
225
+ assert (std::unique (output_begin, output_end) == output_end);
226
+
227
+ free (sum, q);
228
+ free (output, q);
229
+ }
230
+
149
231
template <template <typename , memory_order, memory_scope, access::address_space>
150
232
class AtomicRef ,
151
233
access::address_space space, typename T, typename Difference = T,
@@ -188,6 +270,46 @@ void add_pre_inc_test(queue q, size_t N) {
188
270
assert (std::unique (output.begin (), output.end ()) == output.end ());
189
271
}
190
272
273
+ template <template <typename , memory_order, memory_scope, access::address_space>
274
+ class AtomicRef ,
275
+ access::address_space space, typename T, typename Difference = T,
276
+ memory_order order = memory_order::relaxed,
277
+ memory_scope scope = memory_scope::device>
278
+ void add_pre_inc_test_usm_shared (queue q, size_t N) {
279
+ T *sum = malloc_shared<T>(1 , q);
280
+ T *output = malloc_shared<T>(N, q);
281
+ T *output_begin = &output[0 ], *output_end = &output[N];
282
+ sum[0 ] = T (0 );
283
+ {
284
+ q.submit ([&](handler &cgh) {
285
+ cgh.parallel_for (range<1 >(N), [=](item<1 > it) {
286
+ int gid = it.get_id (0 );
287
+ auto atm = AtomicRef < T,
288
+ (order == memory_order::acquire || order == memory_order::release)
289
+ ? memory_order::relaxed
290
+ : order,
291
+ scope, space > (sum[0 ]);
292
+ output[gid] = ++atm;
293
+ });
294
+ }).wait_and_throw ();
295
+ }
296
+
297
+ // All work-items increment by 1, so final value should be equal to N.
298
+ assert (sum[0 ] == T (N));
299
+
300
+ // Pre-increment returns updated value: will be in [1, N].
301
+ auto min_e = std::min_element (output_begin, output_end);
302
+ auto max_e = std::max_element (output_begin, output_end);
303
+ assert (*min_e == T (1 ) && *max_e == T (N));
304
+
305
+ // Intermediate values should be unique.
306
+ std::sort (output_begin, output_end);
307
+ assert (std::unique (output_begin, output_end) == output_end);
308
+
309
+ free (sum, q);
310
+ free (output, q);
311
+ }
312
+
191
313
template <template <typename , memory_order, memory_scope, access::address_space>
192
314
class AtomicRef ,
193
315
access::address_space space, typename T, typename Difference = T,
@@ -230,6 +352,46 @@ void add_post_inc_test(queue q, size_t N) {
230
352
assert (std::unique (output.begin (), output.end ()) == output.end ());
231
353
}
232
354
355
+ template <template <typename , memory_order, memory_scope, access::address_space>
356
+ class AtomicRef ,
357
+ access::address_space space, typename T, typename Difference = T,
358
+ memory_order order = memory_order::relaxed,
359
+ memory_scope scope = memory_scope::device>
360
+ void add_post_inc_test_usm_shared (queue q, size_t N) {
361
+ T *sum = malloc_shared<T>(1 , q);
362
+ T *output = malloc_shared<T>(N, q);
363
+ T *output_begin = &output[0 ], *output_end = &output[N];
364
+ sum[0 ] = T (0 );
365
+ {
366
+ q.submit ([&](handler &cgh) {
367
+ cgh.parallel_for (range<1 >(N), [=](item<1 > it) {
368
+ int gid = it.get_id (0 );
369
+ auto atm = AtomicRef < T,
370
+ (order == memory_order::acquire || order == memory_order::release)
371
+ ? memory_order::relaxed
372
+ : order,
373
+ scope, space > (sum[0 ]);
374
+ output[gid] = atm++;
375
+ });
376
+ }).wait_and_throw ();
377
+ }
378
+
379
+ // All work-items increment by 1, so final value should be equal to N.
380
+ assert (sum[0 ] == T (N));
381
+
382
+ // Post-increment returns original value: will be in [0, N-1].
383
+ auto min_e = std::min_element (output_begin, output_end);
384
+ auto max_e = std::max_element (output_begin, output_end);
385
+ assert (*min_e == T (0 ) && *max_e == T (N - 1 ));
386
+
387
+ // Intermediate values should be unique.
388
+ std::sort (output_begin, output_end);
389
+ assert (std::unique (output_begin, output_end) == output_end);
390
+
391
+ free (sum, q);
392
+ free (output, q);
393
+ }
394
+
233
395
template <access::address_space space, typename T, typename Difference = T,
234
396
memory_order order = memory_order::relaxed,
235
397
memory_scope scope = memory_scope::device>
@@ -257,25 +419,41 @@ void add_test(queue q, size_t N) {
257
419
if constexpr (do_ext_tests) {
258
420
add_fetch_test<::sycl::ext::oneapi::atomic_ref, space, T, Difference,
259
421
order, scope>(q, N);
422
+ add_fetch_test_usm_shared<::sycl::ext::oneapi::atomic_ref, space, T,
423
+ Difference, order, scope>(q, N);
260
424
add_plus_equal_test<::sycl::ext::oneapi::atomic_ref, space, T, Difference,
261
425
order, scope>(q, N);
426
+ add_plus_equal_test_usm_shared<::sycl::ext::oneapi::atomic_ref, space, T,
427
+ Difference, order, scope>(q, N);
262
428
if constexpr (!std::is_floating_point_v<T>) {
263
429
add_pre_inc_test<::sycl::ext::oneapi::atomic_ref, space, T, Difference,
264
430
order, scope>(q, N);
431
+ add_pre_inc_test_usm_shared<::sycl::ext::oneapi::atomic_ref, space, T,
432
+ Difference, order, scope>(q, N);
265
433
add_post_inc_test<::sycl::ext::oneapi::atomic_ref, space, T, Difference,
266
434
order, scope>(q, N);
435
+ add_post_inc_test_usm_shared<::sycl::ext::oneapi::atomic_ref, space, T,
436
+ Difference, order, scope>(q, N);
267
437
}
268
438
}
269
439
#else
270
440
add_fetch_test<::sycl::atomic_ref, space, T, Difference, order, scope>(q,
271
441
N);
442
+ add_fetch_test_usm_shared<::sycl::atomic_ref, space, T, Difference, order,
443
+ scope>(q, N);
272
444
add_plus_equal_test<::sycl::atomic_ref, space, T, Difference, order, scope>(
273
445
q, N);
446
+ add_plus_equal_test_usm_shared<::sycl::atomic_ref, space, T, Difference,
447
+ order, scope>(q, N);
274
448
if constexpr (!std::is_floating_point_v<T>) {
275
449
add_pre_inc_test<::sycl::atomic_ref, space, T, Difference, order, scope>(
276
450
q, N);
451
+ add_pre_inc_test_usm_shared<::sycl::atomic_ref, space, T, Difference,
452
+ order, scope>(q, N);
277
453
add_post_inc_test<::sycl::atomic_ref, space, T, Difference, order, scope>(
278
454
q, N);
455
+ add_post_inc_test_usm_shared<::sycl::atomic_ref, space, T, Difference,
456
+ order, scope>(q, N);
279
457
}
280
458
#endif
281
459
}
0 commit comments