@@ -104,6 +104,47 @@ void add_fetch_test(queue q, size_t N) {
104
104
assert (std::unique (output.begin (), output.end ()) == output.end ());
105
105
}
106
106
107
+ template <template <typename , memory_order, memory_scope, access::address_space>
108
+ class AtomicRef ,
109
+ access::address_space space, typename T, typename Difference = T,
110
+ memory_order order = memory_order::relaxed,
111
+ memory_scope scope = memory_scope::device>
112
+ void add_fetch_test_usm_shared (queue q, size_t N) {
113
+ T *sum = malloc_shared<T>(1 , q);
114
+ T *output = malloc_shared<T>(N, q);
115
+ T *output_begin = &output[0 ], *output_end = &output[N];
116
+ sum[0 ] = T (0 );
117
+ std::fill (output_begin, output_end, T (0 ));
118
+ {
119
+ q.submit ([&](handler &cgh) {
120
+ cgh.parallel_for (range<1 >(N), [=](item<1 > it) {
121
+ int gid = it.get_id (0 );
122
+ auto atm = AtomicRef < T,
123
+ (order == memory_order::acquire || order == memory_order::release)
124
+ ? memory_order::relaxed
125
+ : order,
126
+ scope, space > (sum[0 ]);
127
+ output[gid] = atm.fetch_add (Difference (1 ), order);
128
+ });
129
+ }).wait_and_throw ();
130
+ }
131
+
132
+ // All work-items increment by 1, so final value should be equal to N.
133
+ assert (sum[0 ] == T (N));
134
+
135
+ // Fetch returns original value: will be in [0, N-1].
136
+ auto min_e = std::min_element (output_begin, output_end);
137
+ auto max_e = std::max_element (output_begin, output_end);
138
+ assert (*min_e == T (0 ) && *max_e == T (N - 1 ));
139
+
140
+ // Intermediate values should be unique.
141
+ std::sort (output_begin, output_end);
142
+ assert (std::unique (output_begin, output_end) == output_end);
143
+
144
+ free (sum, q);
145
+ free (output, q);
146
+ }
147
+
107
148
template <template <typename , memory_order, memory_scope, access::address_space>
108
149
class AtomicRef ,
109
150
access::address_space space, typename T, typename Difference = T,
@@ -146,6 +187,47 @@ void add_plus_equal_test(queue q, size_t N) {
146
187
assert (std::unique (output.begin (), output.end ()) == output.end ());
147
188
}
148
189
190
+ template <template <typename , memory_order, memory_scope, access::address_space>
191
+ class AtomicRef ,
192
+ access::address_space space, typename T, typename Difference = T,
193
+ memory_order order = memory_order::relaxed,
194
+ memory_scope scope = memory_scope::device>
195
+ void add_plus_equal_test_usm_shared (queue q, size_t N) {
196
+ T *sum = malloc_shared<T>(1 , q);
197
+ T *output = malloc_shared<T>(N, q);
198
+ T *output_begin = &output[0 ], *output_end = &output[N];
199
+ sum[0 ] = T (0 );
200
+ std::fill (output_begin, output_end, T (0 ));
201
+ {
202
+ q.submit ([&](handler &cgh) {
203
+ cgh.parallel_for (range<1 >(N), [=](item<1 > it) {
204
+ int gid = it.get_id (0 );
205
+ auto atm = AtomicRef < T,
206
+ (order == memory_order::acquire || order == memory_order::release)
207
+ ? memory_order::relaxed
208
+ : order,
209
+ scope, space > (sum[0 ]);
210
+ output[gid] = atm += Difference (1 );
211
+ });
212
+ }).wait_and_throw ();
213
+ }
214
+
215
+ // All work-items increment by 1, so final value should be equal to N.
216
+ assert (sum[0 ] == T (N));
217
+
218
+ // += returns updated value: will be in [1, N].
219
+ auto min_e = std::min_element (output_begin, output_end);
220
+ auto max_e = std::max_element (output_begin, output_end);
221
+ assert (*min_e == T (1 ) && *max_e == T (N));
222
+
223
+ // Intermediate values should be unique.
224
+ std::sort (output_begin, output_end);
225
+ assert (std::unique (output_begin, output_end) == output_end);
226
+
227
+ free (sum, q);
228
+ free (output, q);
229
+ }
230
+
149
231
template <template <typename , memory_order, memory_scope, access::address_space>
150
232
class AtomicRef ,
151
233
access::address_space space, typename T, typename Difference = T,
@@ -188,6 +270,46 @@ void add_pre_inc_test(queue q, size_t N) {
188
270
assert (std::unique (output.begin (), output.end ()) == output.end ());
189
271
}
190
272
273
+ template <template <typename , memory_order, memory_scope, access::address_space>
274
+ class AtomicRef ,
275
+ access::address_space space, typename T, typename Difference = T,
276
+ memory_order order = memory_order::relaxed,
277
+ memory_scope scope = memory_scope::device>
278
+ void add_pre_inc_test_usm_shared (queue q, size_t N) {
279
+ T *sum = malloc_shared<T>(1 , q);
280
+ T *output = malloc_shared<T>(N, q);
281
+ T *output_begin = &output[0 ], *output_end = &output[N];
282
+ sum[0 ] = T (0 );
283
+ {
284
+ q.submit ([&](handler &cgh) {
285
+ cgh.parallel_for (range<1 >(N), [=](item<1 > it) {
286
+ int gid = it.get_id (0 );
287
+ auto atm = AtomicRef < T,
288
+ (order == memory_order::acquire || order == memory_order::release)
289
+ ? memory_order::relaxed
290
+ : order,
291
+ scope, space > (sum[0 ]);
292
+ output[gid] = ++atm;
293
+ });
294
+ }).wait_and_throw ();
295
+ }
296
+
297
+ // All work-items increment by 1, so final value should be equal to N.
298
+ assert (sum[0 ] == T (N));
299
+
300
+ // Pre-increment returns updated value: will be in [1, N].
301
+ auto min_e = std::min_element (output_begin, output_end);
302
+ auto max_e = std::max_element (output_begin, output_end);
303
+ assert (*min_e == T (1 ) && *max_e == T (N));
304
+
305
+ // Intermediate values should be unique.
306
+ std::sort (output_begin, output_end);
307
+ assert (std::unique (output_begin, output_end) == output_end);
308
+
309
+ free (sum, q);
310
+ free (output, q);
311
+ }
312
+
191
313
template <template <typename , memory_order, memory_scope, access::address_space>
192
314
class AtomicRef ,
193
315
access::address_space space, typename T, typename Difference = T,
@@ -230,6 +352,46 @@ void add_post_inc_test(queue q, size_t N) {
230
352
assert (std::unique (output.begin (), output.end ()) == output.end ());
231
353
}
232
354
355
+ template <template <typename , memory_order, memory_scope, access::address_space>
356
+ class AtomicRef ,
357
+ access::address_space space, typename T, typename Difference = T,
358
+ memory_order order = memory_order::relaxed,
359
+ memory_scope scope = memory_scope::device>
360
+ void add_post_inc_test_usm_shared (queue q, size_t N) {
361
+ T *sum = malloc_shared<T>(1 , q);
362
+ T *output = malloc_shared<T>(N, q);
363
+ T *output_begin = &output[0 ], *output_end = &output[N];
364
+ sum[0 ] = T (0 );
365
+ {
366
+ q.submit ([&](handler &cgh) {
367
+ cgh.parallel_for (range<1 >(N), [=](item<1 > it) {
368
+ int gid = it.get_id (0 );
369
+ auto atm = AtomicRef < T,
370
+ (order == memory_order::acquire || order == memory_order::release)
371
+ ? memory_order::relaxed
372
+ : order,
373
+ scope, space > (sum[0 ]);
374
+ output[gid] = atm++;
375
+ });
376
+ }).wait_and_throw ();
377
+ }
378
+
379
+ // All work-items increment by 1, so final value should be equal to N.
380
+ assert (sum[0 ] == T (N));
381
+
382
+ // Post-increment returns original value: will be in [0, N-1].
383
+ auto min_e = std::min_element (output_begin, output_end);
384
+ auto max_e = std::max_element (output_begin, output_end);
385
+ assert (*min_e == T (0 ) && *max_e == T (N - 1 ));
386
+
387
+ // Intermediate values should be unique.
388
+ std::sort (output_begin, output_end);
389
+ assert (std::unique (output_begin, output_end) == output_end);
390
+
391
+ free (sum, q);
392
+ free (output, q);
393
+ }
394
+
233
395
template <access::address_space space, typename T, typename Difference = T,
234
396
memory_order order = memory_order::relaxed,
235
397
memory_scope scope = memory_scope::device>
@@ -241,6 +403,7 @@ void add_test(queue q, size_t N) {
241
403
space == access ::address_space::global_space ||
242
404
(space == access ::address_space::generic_space && !TEST_GENERIC_IN_LOCAL);
243
405
constexpr bool do_ext_tests = space != access ::address_space::generic_space;
406
+ bool do_usm_tests = q.get_device ().has (aspect::usm_shared_allocations);
244
407
if constexpr (do_local_tests) {
245
408
#ifdef RUN_DEPRECATED
246
409
if constexpr (do_ext_tests) {
@@ -259,23 +422,47 @@ void add_test(queue q, size_t N) {
259
422
order, scope>(q, N);
260
423
add_plus_equal_test<::sycl::ext::oneapi::atomic_ref, space, T, Difference,
261
424
order, scope>(q, N);
425
+ if (do_usm_tests) {
426
+ add_fetch_test_usm_shared<::sycl::ext::oneapi::atomic_ref, space, T,
427
+ Difference, order, scope>(q, N);
428
+ add_plus_equal_test_usm_shared<::sycl::ext::oneapi::atomic_ref, space,
429
+ T, Difference, order, scope>(q, N);
430
+ }
262
431
if constexpr (!std::is_floating_point_v<T>) {
263
432
add_pre_inc_test<::sycl::ext::oneapi::atomic_ref, space, T, Difference,
264
433
order, scope>(q, N);
265
434
add_post_inc_test<::sycl::ext::oneapi::atomic_ref, space, T, Difference,
266
435
order, scope>(q, N);
436
+ if (do_usm_tests) {
437
+ add_pre_inc_test_usm_shared<::sycl::ext::oneapi::atomic_ref, space, T,
438
+ Difference, order, scope>(q, N);
439
+ add_post_inc_test_usm_shared<::sycl::ext::oneapi::atomic_ref, space,
440
+ T, Difference, order, scope>(q, N);
441
+ }
267
442
}
268
443
}
269
444
#else
270
445
add_fetch_test<::sycl::atomic_ref, space, T, Difference, order, scope>(q,
271
446
N);
272
447
add_plus_equal_test<::sycl::atomic_ref, space, T, Difference, order, scope>(
273
448
q, N);
449
+ if (do_usm_tests) {
450
+ add_fetch_test_usm_shared<::sycl::atomic_ref, space, T, Difference, order,
451
+ scope>(q, N);
452
+ add_plus_equal_test_usm_shared<::sycl::atomic_ref, space, T, Difference,
453
+ order, scope>(q, N);
454
+ }
274
455
if constexpr (!std::is_floating_point_v<T>) {
275
456
add_pre_inc_test<::sycl::atomic_ref, space, T, Difference, order, scope>(
276
457
q, N);
277
458
add_post_inc_test<::sycl::atomic_ref, space, T, Difference, order, scope>(
278
459
q, N);
460
+ if (do_usm_tests) {
461
+ add_pre_inc_test_usm_shared<::sycl::atomic_ref, space, T, Difference,
462
+ order, scope>(q, N);
463
+ add_post_inc_test_usm_shared<::sycl::atomic_ref, space, T, Difference,
464
+ order, scope>(q, N);
465
+ }
279
466
}
280
467
#endif
281
468
}
0 commit comments