Skip to content

[SYCL] Add tests for atomic operations on malloc_shared-allocated memory #12480

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 187 additions & 0 deletions sycl/test-e2e/AtomicRef/add.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,47 @@ void add_fetch_test(queue q, size_t N) {
assert(std::unique(output.begin(), output.end()) == output.end());
}

template <template <typename, memory_order, memory_scope, access::address_space>
class AtomicRef,
access::address_space space, typename T, typename Difference = T,
memory_order order = memory_order::relaxed,
memory_scope scope = memory_scope::device>
void add_fetch_test_usm_shared(queue q, size_t N) {
T *sum = malloc_shared<T>(1, q);
T *output = malloc_shared<T>(N, q);
T *output_begin = &output[0], *output_end = &output[N];
sum[0] = T(0);
std::fill(output_begin, output_end, T(0));
{
q.submit([&](handler &cgh) {
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = AtomicRef < T,
(order == memory_order::acquire || order == memory_order::release)
? memory_order::relaxed
: order,
scope, space > (sum[0]);
output[gid] = atm.fetch_add(Difference(1), order);
});
}).wait_and_throw();
}

// All work-items increment by 1, so final value should be equal to N.
assert(sum[0] == T(N));

// Fetch returns original value: will be in [0, N-1].
auto min_e = std::min_element(output_begin, output_end);
auto max_e = std::max_element(output_begin, output_end);
assert(*min_e == T(0) && *max_e == T(N - 1));

// Intermediate values should be unique.
std::sort(output_begin, output_end);
assert(std::unique(output_begin, output_end) == output_end);

free(sum, q);
free(output, q);
}

template <template <typename, memory_order, memory_scope, access::address_space>
class AtomicRef,
access::address_space space, typename T, typename Difference = T,
Expand Down Expand Up @@ -146,6 +187,47 @@ void add_plus_equal_test(queue q, size_t N) {
assert(std::unique(output.begin(), output.end()) == output.end());
}

template <template <typename, memory_order, memory_scope, access::address_space>
class AtomicRef,
access::address_space space, typename T, typename Difference = T,
memory_order order = memory_order::relaxed,
memory_scope scope = memory_scope::device>
void add_plus_equal_test_usm_shared(queue q, size_t N) {
T *sum = malloc_shared<T>(1, q);
T *output = malloc_shared<T>(N, q);
T *output_begin = &output[0], *output_end = &output[N];
sum[0] = T(0);
std::fill(output_begin, output_end, T(0));
{
q.submit([&](handler &cgh) {
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = AtomicRef < T,
(order == memory_order::acquire || order == memory_order::release)
? memory_order::relaxed
: order,
scope, space > (sum[0]);
output[gid] = atm += Difference(1);
});
}).wait_and_throw();
}

// All work-items increment by 1, so final value should be equal to N.
assert(sum[0] == T(N));

// += returns updated value: will be in [1, N].
auto min_e = std::min_element(output_begin, output_end);
auto max_e = std::max_element(output_begin, output_end);
assert(*min_e == T(1) && *max_e == T(N));

// Intermediate values should be unique.
std::sort(output_begin, output_end);
assert(std::unique(output_begin, output_end) == output_end);

free(sum, q);
free(output, q);
}

template <template <typename, memory_order, memory_scope, access::address_space>
class AtomicRef,
access::address_space space, typename T, typename Difference = T,
Expand Down Expand Up @@ -188,6 +270,46 @@ void add_pre_inc_test(queue q, size_t N) {
assert(std::unique(output.begin(), output.end()) == output.end());
}

template <template <typename, memory_order, memory_scope, access::address_space>
class AtomicRef,
access::address_space space, typename T, typename Difference = T,
memory_order order = memory_order::relaxed,
memory_scope scope = memory_scope::device>
void add_pre_inc_test_usm_shared(queue q, size_t N) {
T *sum = malloc_shared<T>(1, q);
T *output = malloc_shared<T>(N, q);
T *output_begin = &output[0], *output_end = &output[N];
sum[0] = T(0);
{
q.submit([&](handler &cgh) {
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = AtomicRef < T,
(order == memory_order::acquire || order == memory_order::release)
? memory_order::relaxed
: order,
scope, space > (sum[0]);
output[gid] = ++atm;
});
}).wait_and_throw();
}

// All work-items increment by 1, so final value should be equal to N.
assert(sum[0] == T(N));

// Pre-increment returns updated value: will be in [1, N].
auto min_e = std::min_element(output_begin, output_end);
auto max_e = std::max_element(output_begin, output_end);
assert(*min_e == T(1) && *max_e == T(N));

// Intermediate values should be unique.
std::sort(output_begin, output_end);
assert(std::unique(output_begin, output_end) == output_end);

free(sum, q);
free(output, q);
}

template <template <typename, memory_order, memory_scope, access::address_space>
class AtomicRef,
access::address_space space, typename T, typename Difference = T,
Expand Down Expand Up @@ -230,6 +352,46 @@ void add_post_inc_test(queue q, size_t N) {
assert(std::unique(output.begin(), output.end()) == output.end());
}

template <template <typename, memory_order, memory_scope, access::address_space>
class AtomicRef,
access::address_space space, typename T, typename Difference = T,
memory_order order = memory_order::relaxed,
memory_scope scope = memory_scope::device>
void add_post_inc_test_usm_shared(queue q, size_t N) {
T *sum = malloc_shared<T>(1, q);
T *output = malloc_shared<T>(N, q);
T *output_begin = &output[0], *output_end = &output[N];
sum[0] = T(0);
{
q.submit([&](handler &cgh) {
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = AtomicRef < T,
(order == memory_order::acquire || order == memory_order::release)
? memory_order::relaxed
: order,
scope, space > (sum[0]);
output[gid] = atm++;
});
}).wait_and_throw();
}

// All work-items increment by 1, so final value should be equal to N.
assert(sum[0] == T(N));

// Post-increment returns original value: will be in [0, N-1].
auto min_e = std::min_element(output_begin, output_end);
auto max_e = std::max_element(output_begin, output_end);
assert(*min_e == T(0) && *max_e == T(N - 1));

// Intermediate values should be unique.
std::sort(output_begin, output_end);
assert(std::unique(output_begin, output_end) == output_end);

free(sum, q);
free(output, q);
}

template <access::address_space space, typename T, typename Difference = T,
memory_order order = memory_order::relaxed,
memory_scope scope = memory_scope::device>
Expand All @@ -241,6 +403,7 @@ void add_test(queue q, size_t N) {
space == access::address_space::global_space ||
(space == access::address_space::generic_space && !TEST_GENERIC_IN_LOCAL);
constexpr bool do_ext_tests = space != access::address_space::generic_space;
bool do_usm_tests = q.get_device().has(aspect::usm_shared_allocations);
if constexpr (do_local_tests) {
#ifdef RUN_DEPRECATED
if constexpr (do_ext_tests) {
Expand All @@ -259,23 +422,47 @@ void add_test(queue q, size_t N) {
order, scope>(q, N);
add_plus_equal_test<::sycl::ext::oneapi::atomic_ref, space, T, Difference,
order, scope>(q, N);
if (do_usm_tests) {
add_fetch_test_usm_shared<::sycl::ext::oneapi::atomic_ref, space, T,
Difference, order, scope>(q, N);
add_plus_equal_test_usm_shared<::sycl::ext::oneapi::atomic_ref, space,
T, Difference, order, scope>(q, N);
}
if constexpr (!std::is_floating_point_v<T>) {
add_pre_inc_test<::sycl::ext::oneapi::atomic_ref, space, T, Difference,
order, scope>(q, N);
add_post_inc_test<::sycl::ext::oneapi::atomic_ref, space, T, Difference,
order, scope>(q, N);
if (do_usm_tests) {
add_pre_inc_test_usm_shared<::sycl::ext::oneapi::atomic_ref, space, T,
Difference, order, scope>(q, N);
add_post_inc_test_usm_shared<::sycl::ext::oneapi::atomic_ref, space,
T, Difference, order, scope>(q, N);
}
}
}
#else
add_fetch_test<::sycl::atomic_ref, space, T, Difference, order, scope>(q,
N);
add_plus_equal_test<::sycl::atomic_ref, space, T, Difference, order, scope>(
q, N);
if (do_usm_tests) {
add_fetch_test_usm_shared<::sycl::atomic_ref, space, T, Difference, order,
scope>(q, N);
add_plus_equal_test_usm_shared<::sycl::atomic_ref, space, T, Difference,
order, scope>(q, N);
}
if constexpr (!std::is_floating_point_v<T>) {
add_pre_inc_test<::sycl::atomic_ref, space, T, Difference, order, scope>(
q, N);
add_post_inc_test<::sycl::atomic_ref, space, T, Difference, order, scope>(
q, N);
if (do_usm_tests) {
add_pre_inc_test_usm_shared<::sycl::atomic_ref, space, T, Difference,
order, scope>(q, N);
add_post_inc_test_usm_shared<::sycl::atomic_ref, space, T, Difference,
order, scope>(q, N);
}
}
#endif
}
Expand Down
46 changes: 46 additions & 0 deletions sycl/test-e2e/AtomicRef/and.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,44 @@ void and_global_test(queue q) {
assert(std::unique(output.begin(), output.end()) == output.end());
}

template <template <typename, memory_order, memory_scope, access::address_space>
class AtomicRef,
access::address_space space, typename T,
memory_order order = memory_order::relaxed,
memory_scope scope = memory_scope::device>
void and_global_test_usm_shared(queue q) {
const size_t N = 32;
const T initial = T((1ll << N) - 1);
T *cum = malloc_shared<T>(1, q);
cum[0] = initial;
T *output = malloc_shared<T>(N, q);
T *output_begin = &output[0], *output_end = &output[N];
std::fill(output_begin, output_end, T(0));
{
q.submit([&](handler &cgh) {
cgh.parallel_for(range<1>(N), [=](item<1> it) {
size_t gid = it.get_id(0);
auto atm = AtomicRef < T,
(order == memory_order::acquire || order == memory_order::release)
? memory_order::relaxed
: order,
scope, space > (cum[0]);
output[gid] = atm.fetch_and(~T(1ll << gid), order);
});
}).wait_and_throw();
}

// Final value should be equal to 0.
assert(cum[0] == 0);

// All other values should be unique; each work-item sets one bit to 0.
std::sort(output_begin, output_end);
assert(std::unique(output_begin, output_end) == output_end);

free(cum, q);
free(output, q);
}

template <access::address_space space, typename T,
memory_order order = memory_order::relaxed,
memory_scope scope = memory_scope::device>
Expand All @@ -108,6 +146,7 @@ void and_test(queue q) {
space == access::address_space::global_space ||
(space == access::address_space::generic_space && !TEST_GENERIC_IN_LOCAL);
constexpr bool do_ext_tests = space != access::address_space::generic_space;
bool do_usm_tests = q.get_device().has(aspect::usm_shared_allocations);
if constexpr (do_local_tests) {
#ifdef RUN_DEPRECATED
if constexpr (do_ext_tests) {
Expand All @@ -123,9 +162,16 @@ void and_test(queue q) {
if constexpr (do_ext_tests) {
and_global_test<::sycl::ext::oneapi::atomic_ref, space, T, order, scope>(
q);
if (do_usm_tests) {
and_global_test_usm_shared<::sycl::ext::oneapi::atomic_ref, space, T,
order, scope>(q);
}
}
#else
and_global_test<::sycl::atomic_ref, space, T, order, scope>(q);
if (do_usm_tests) {
and_global_test_usm_shared<::sycl::atomic_ref, space, T, order, scope>(q);
}
#endif
}
}
Expand Down
46 changes: 46 additions & 0 deletions sycl/test-e2e/AtomicRef/assignment.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,63 @@ void assignment_test(queue q, size_t N) {
assert(assignment >= T(0) && assignment <= T(N - 1));
}

template <template <typename, memory_order, memory_scope, access::address_space>
class AtomicRef,
access::address_space address_space, typename T>
class assignment_usm_kernel;

template <template <typename, memory_order, memory_scope, access::address_space>
class AtomicRef,
access::address_space address_space, typename T>
void assignment_test_usm_shared(queue q, size_t N) {
T initial = T(N);
T *st = malloc_shared<T>(1, q);
st[0] = initial;
{
q.submit([&](handler &cgh) {
cgh.parallel_for<assignment_usm_kernel<AtomicRef, address_space, T>>(
range<1>(N), [=](item<1> it) {
size_t gid = it.get_id(0);
auto atm = AtomicRef<T, memory_order::relaxed,
memory_scope::device, address_space>(st[0]);
atm = T(gid);
});
}).wait_and_throw();
}

// The initial value should have been overwritten by a work-item ID.
// Atomicity isn't tested here, but support for assignment() is.
assert(st[0] != initial);
assert(st[0] >= T(0) && st[0] <= T(N - 1));

free(st, q);
}

template <typename T> void assignment_test(queue q, size_t N) {
bool do_usm_tests = q.get_device().has(aspect::usm_shared_allocations);
#ifdef RUN_DEPRECATED
assignment_test<::sycl::ext::oneapi::atomic_ref,
access::address_space::global_space, T>(q, N);
if (do_usm_tests) {
assignment_test_usm_shared<::sycl::ext::oneapi::atomic_ref,
access::address_space::global_space, T>(q, N);
}
#else
assignment_test<::sycl::atomic_ref, access::address_space::global_space, T>(
q, N);
if (do_usm_tests) {
assignment_test_usm_shared<::sycl::atomic_ref,
access::address_space::global_space, T>(q, N);
}
#endif
}

template <typename T> void assignment_generic_test(queue q, size_t N) {
bool do_usm_tests = q.get_device().has(aspect::usm_shared_allocations);
assignment_test<::sycl::atomic_ref, access::address_space::generic_space, T>(
q, N);
if (do_usm_tests) {
assignment_test_usm_shared<::sycl::atomic_ref,
access::address_space::generic_space, T>(q, N);
}
}
Loading