Skip to content

Commit 5e5d564

Browse files
authored
[SYCL] Enable 2 reduction tests for ACC and level_zero (intel/llvm-test-suite#410)
This patch has minor fixes to the test to increase their stability. It also changes the input data initialization to make the test-checks more meaningful/useful. Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent 8ddfb7f commit 5e5d564

File tree

4 files changed

+50
-23
lines changed

4 files changed

+50
-23
lines changed

SYCL/Reduction/reduction_nd_range_scalar.hpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,21 @@ int test(queue &Q, T Identity, T Init, BinaryOperation BOp,
1414
const nd_range<Dims> &Range) {
1515
printTestLabel<T, BinaryOperation>(IsSYCL2020, Range);
1616

17-
// Skip the test for such big arrays now.
18-
constexpr size_t TwoGB = 2LL * 1024 * 1024 * 1024;
17+
// It is a known problem with passing data that is close to 4Gb in size
18+
// to device. Such data breaks the execution pretty badly.
19+
// Some of test cases calling this function try to verify the correctness
20+
// of reduction with the global range bigger than the maximal work-group size
21+
// for the device. Maximal WG size for device may be very big, e.g. it is
22+
// 67108864 for ACC emulator. Multiplying that by some factor
23+
// (to exceed max WG-Size) and multiplying it by the element size may exceed
24+
// the safe size of data passed to device.
25+
// Let's set it to 1 GB for now, and just skip the test if it exceeds 1Gb.
26+
constexpr size_t OneGB = 1LL * 1024 * 1024 * 1024;
1927
range<Dims> GlobalRange = Range.get_global_range();
20-
if (GlobalRange.size() > TwoGB)
28+
if (GlobalRange.size() * sizeof(T) > OneGB) {
29+
std::cout << " SKIPPED due to too big data size" << std::endl;
2130
return 0;
31+
}
2232

2333
buffer<T, Dims> InBuf(GlobalRange);
2434
buffer<T, 1> OutBuf(1);

SYCL/Reduction/reduction_range_1d_s1_dw.cpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
5-
// TODO: accelerator may not suport atomics required by the current
6-
// implementation. Enable testing when implementation is fixed.
7-
// RUNx: %ACC_RUN_PLACEHOLDER %t.out
4+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
85

96
#include "reduction_range_scalar.hpp"
107

@@ -30,15 +27,15 @@ int main() {
3027

3128
// Fast-reduce and Fast-atomics. Try various range types/sizes.
3229
tests<class A1, int>(Q, 0, 99, std::plus<int>{}, 1);
33-
tests<class A2, int64_t>(Q, 0, 99, std::plus<>{}, 7);
34-
tests<class A3, int64_t>(Q, 0, 99, std::plus<>{}, 64);
30+
tests<class A2, int>(Q, 0, 99, std::plus<>{}, 7);
31+
tests<class A3, int>(Q, 0, 99, std::plus<>{}, 64);
3532
tests<class A4, int>(Q, 0, 99, std::plus<>{}, MaxWGSize * 2);
3633
tests<class A5, int>(Q, 0, 99, std::plus<>{}, MaxWGSize * 2 + 5);
3734

3835
// Try various types & ranges.
39-
tests<class B1, int>(Q, ~0, 99, std::bit_and<>{}, 7);
40-
tests<class B2, int>(Q, 0, 0xff99, std::bit_xor<>{}, MaxWGSize);
41-
tests<class B3, int>(Q, 0, 0xff99, std::bit_or<>{}, 3);
36+
tests<class B1, int>(Q, ~0, 0xfefefefe, std::bit_and<>{}, 7);
37+
tests<class B2, int>(Q, 0, 0xfedcff99, std::bit_xor<>{}, MaxWGSize);
38+
tests<class B3, int>(Q, 0, 0xfedcff99, std::bit_or<>{}, 3);
4239
tests<class B4, short>(Q, 1, 2, std::multiplies<>{}, 7);
4340
tests<class B5, int>(Q, (std::numeric_limits<int>::max)(), -99,
4441
ext::oneapi::minimum<>{}, MaxWGSize * 2);

SYCL/Reduction/reduction_range_1d_s1_rw.cpp

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,7 @@
11
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
5-
// TODO: test disabled due to Jenkins testing failure on unrelated commit
6-
// Sporadic failure
7-
// UNSUPPORTED: linux && level_zero
8-
9-
// TODO: accelerator may not suport atomics required by the current
10-
// implementation. Enable testing when implementation is fixed.
11-
// RUNx: %ACC_RUN_PLACEHOLDER %t.out
4+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
125

136
// This test performs basic checks of parallel_for(range<1>, reduction, func)
147
// with reductions initialized with 1-dimensional read_write accessor
@@ -34,15 +27,15 @@ int main() {
3427

3528
// Fast-reduce and Fast-atomics. Try various range types/sizes.
3629
tests<class A1, int>(Q, 0, 99, std::plus<int>{}, 1);
37-
tests<class A2, int64_t>(Q, 0, 99, std::plus<>{}, 7);
38-
tests<class A3, int64_t>(Q, 0, 99, std::plus<>{}, 64);
30+
tests<class A2, int>(Q, 0, 99, std::plus<>{}, 7);
31+
tests<class A3, int>(Q, 0, 99, std::plus<>{}, 64);
3932
tests<class A4, int>(Q, 0, 99, std::plus<>{}, MaxWGSize * 2);
4033
tests<class A5, int>(Q, 0, 99, std::plus<>{}, MaxWGSize * 2 + 5);
4134

4235
// Try various types & ranges.
4336
tests<class B1, int>(Q, ~0, ~0, std::bit_and<>{}, 8);
4437
tests<class B2, int>(Q, 0, 0x12340000, std::bit_xor<>{}, 16);
45-
tests<class B3, int>(Q, 0, 0x3400, std::bit_or<>{}, MaxWGSize * 4);
38+
tests<class B3, int>(Q, 0, 0x3400, std::bit_or<>{}, MaxWGSize * 3);
4639
tests<class B4, uint64_t>(Q, 1, 2, std::multiplies<>{}, 16);
4740
tests<class B5, float>(Q, 1, 3, std::multiplies<>{}, 11);
4841
tests<class B6, int>(Q, (std::numeric_limits<int>::max)(), -99,

SYCL/Reduction/reduction_utils.hpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,15 @@ void initInputData(buffer<T, 1> &InBuf, T &ExpectedOut, T Identity,
1414
if (std::is_same_v<BinaryOperation, std::multiplies<T>> ||
1515
std::is_same_v<BinaryOperation, std::multiplies<>>)
1616
In[I] = 1.1 + (((I % 11) == 0) ? 1 : 0);
17+
else if (std::is_same_v<BinaryOperation, std::bit_and<T>> ||
18+
std::is_same_v<BinaryOperation, std::bit_and<>>)
19+
In[I] = (I + 1) | 0x10203040;
20+
else if (std::is_same_v<BinaryOperation, sycl::minimum<T>> ||
21+
std::is_same_v<BinaryOperation, sycl::minimum<>>)
22+
In[I] = Range[0] - I;
23+
else if (std::is_same_v<BinaryOperation, sycl::maximum<T>> ||
24+
std::is_same_v<BinaryOperation, sycl::maximum<>>)
25+
In[I] = I;
1726
else
1827
In[I] = ((I + 1) % 5) + 1.1;
1928
ExpectedOut = BOp(ExpectedOut, In[I]);
@@ -32,6 +41,15 @@ void initInputData(buffer<T, 2> &InBuf, T &ExpectedOut, T Identity,
3241
if (std::is_same_v<BinaryOperation, std::multiplies<T>> ||
3342
std::is_same_v<BinaryOperation, std::multiplies<>>)
3443
In[J][I] = 1.1 + ((((I + J * 3) % 11) == 0) ? 1 : 0);
44+
else if (std::is_same_v<BinaryOperation, std::bit_and<T>> ||
45+
std::is_same_v<BinaryOperation, std::bit_and<>>)
46+
In[J][I] = (I + J + 1) | 0x10203040;
47+
else if (std::is_same_v<BinaryOperation, sycl::minimum<T>> ||
48+
std::is_same_v<BinaryOperation, sycl::minimum<>>)
49+
In[J][I] = Range[0] + Range[1] - I - J;
50+
else if (std::is_same_v<BinaryOperation, sycl::maximum<T>> ||
51+
std::is_same_v<BinaryOperation, sycl::maximum<>>)
52+
In[J][I] = I + J;
3553
else
3654
In[J][I] = ((I + 1 + J) % 5) + 1.1;
3755
ExpectedOut = BOp(ExpectedOut, In[J][I]);
@@ -52,6 +70,15 @@ void initInputData(buffer<T, 3> &InBuf, T &ExpectedOut, T Identity,
5270
if (std::is_same_v<BinaryOperation, std::multiplies<T>> ||
5371
std::is_same_v<BinaryOperation, std::multiplies<>>)
5472
In[K][J][I] = 1.1 + ((((I + J * 3 + K) % 11) == 0) ? 1 : 0);
73+
else if (std::is_same_v<BinaryOperation, std::bit_and<T>> ||
74+
std::is_same_v<BinaryOperation, std::bit_and<>>)
75+
In[K][J][I] = (I + J + K + 1) | 0x10203040;
76+
else if (std::is_same_v<BinaryOperation, sycl::minimum<T>> ||
77+
std::is_same_v<BinaryOperation, sycl::minimum<>>)
78+
In[K][J][I] = Range[0] + Range[1] + Range[2] - I - J - K;
79+
else if (std::is_same_v<BinaryOperation, sycl::maximum<T>> ||
80+
std::is_same_v<BinaryOperation, sycl::maximum<>>)
81+
In[K][J][I] = I + J + K;
5582
else
5683
In[K][J][I] = ((I + 1 + J + K * 3) % 5) + 1.1;
5784
ExpectedOut = BOp(ExpectedOut, In[K][J][I]);

0 commit comments

Comments
 (0)