Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit c38e874

Browse files
authored
[SYCL] Add test cases for SYCL2020 reductions (#194)
* [SYCL] Add test cases for SYCL2020 reductions + initialize_to_identity property The corresponding changes in SYCL RT: intel/llvm#3410 intel/llvm#3481 This patch - adds checks for SYCL-2020 reductions with initialize_to_identity property (#3410) - enables the test reduction_nd_N_vars.cpp for level-zero (#3481) - reduces some of tests by eliminating unneeded/duplicated checks. - removes the test reduction_transparent.cpp as useless because transparent operators already checked in many other tests. - reduces runtime of reduction tests by about 200x in average by re-using queue created once instead of creating it in each of test-cases Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent 9df8e9b commit c38e874

16 files changed

+488
-524
lines changed

SYCL/Reduction/reduction_big_data.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ size_t getSafeMaxWGSize(size_t MaxWGSize, size_t MemSize, size_t OneElemSize) {
2828
}
2929

3030
template <typename KernelName, typename T, int Dim, class BinaryOperation>
31-
void test(T Identity) {
32-
queue Q;
31+
void test(queue &Q, T Identity) {
3332
device Device = Q.get_device();
3433

3534
std::size_t MaxWGSize = Device.get_info<info::device::max_work_group_size>();
@@ -99,10 +98,11 @@ template <class T> struct BigCustomVecPlus {
9998
};
10099

101100
int main() {
102-
test<class Test1, float, 0, ONEAPI::maximum<>>(getMinimumFPValue<float>());
101+
queue Q;
102+
test<class Test1, float, 0, ONEAPI::maximum<>>(Q, getMinimumFPValue<float>());
103103

104104
using BCV = BigCustomVec<long long>;
105-
test<class Test2, BCV, 1, BigCustomVecPlus<long long>>(BCV(0));
105+
test<class Test2, BCV, 1, BigCustomVecPlus<long long>>(Q, BCV(0));
106106

107107
std::cout << "Test passed\n";
108108
return 0;

SYCL/Reduction/reduction_nd_N_vars.cpp

+48-50
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,11 @@
1-
// TODO: level_zero reports an internal error for this test.
2-
// UNSUPPORTED: level_zero
3-
4-
// TODO: Windows implementation of std::tuple is not trivially copiable and
5-
// thus cannot be passed from HOST to DEVICE. Enable the test on Windows when
6-
// SYCL RT gets new type traits having less strict requirements for objects
7-
// being passed to DEVICE.
8-
// UNSUPPORTED: windows
9-
101
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
112
// RUN: %CPU_RUN_PLACEHOLDER %t.out
123
// RUN: %GPU_RUN_PLACEHOLDER %t.out
134
// RUN: %ACC_RUN_PLACEHOLDER %t.out
145

6+
// TODO: The test irregularly reports incorrect results on CPU.
7+
// UNSUPPORTED: cpu
8+
159
// This test checks handling of parallel_for() accepting nd_range and
1610
// two or more reductions.
1711

@@ -36,26 +30,30 @@ constexpr access::mode RW = access::mode::read_write;
3630
constexpr access::mode DW = access::mode::discard_write;
3731

3832
template <typename T>
39-
bool cherkResultIsExpected(int TestCaseNum, T Expected, T Computed) {
33+
bool cherkResultIsExpected(int TestCaseNum, T Expected, T Computed,
34+
bool IsSYCL2020) {
4035
bool Success;
4136
if (!std::is_floating_point<T>::value)
4237
Success = (Expected == Computed);
4338
else
4439
Success = std::abs((Expected / Computed) - 1) < 0.5;
4540

46-
if (!Success)
47-
std::cout << TestCaseNum << ": Expected value = " << Expected
41+
if (!Success) {
42+
std::cerr << "Is SYCL2020 mode: " << IsSYCL2020 << std::endl;
43+
std::cerr << TestCaseNum << ": Expected value = " << Expected
4844
<< ", Computed value = " << Computed << "\n";
45+
}
4946

5047
return Success;
5148
}
5249

5350
// Returns 0 if the test case passed. Otherwise, some non-zero value.
54-
template <class Name, bool IsSYCL2020Mode, typename T1, access::mode Mode1,
51+
template <class Name, bool IsSYCL2020, typename T1, access::mode Mode1,
5552
typename T2, access::mode Mode2, typename T3, access::mode Mode3,
56-
typename T4, class BinaryOperation1, class BinaryOperation2,
57-
class BinaryOperation3, class BinaryOperation4>
58-
int testOne(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
53+
typename T4, access::mode Mode4, class BinaryOperation1,
54+
class BinaryOperation2, class BinaryOperation3,
55+
class BinaryOperation4>
56+
int testOne(queue &Q, T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
5957
T2 IdentityVal2, T2 InitVal2, BinaryOperation2 BOp2,
6058
T3 IdentityVal3, T3 InitVal3, BinaryOperation3 BOp3,
6159
T4 IdentityVal4, T3 InitVal4, BinaryOperation4 BOp4,
@@ -68,7 +66,6 @@ int testOne(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
6866
buffer<T2, 1> OutBuf2(1);
6967
buffer<T3, 1> OutBuf3(1);
7068

71-
queue Q;
7269
auto Dev = Q.get_device();
7370
if (AllocType4 == usm::alloc::shared &&
7471
!Dev.get_info<info::device::usm_shared_allocations>())
@@ -100,8 +97,9 @@ int testOne(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
10097
CorrectOut2 = BOp2(CorrectOut2, InitVal2);
10198
if (Mode3 == access::mode::read_write)
10299
CorrectOut3 = BOp3(CorrectOut3, InitVal3);
103-
// 4th reduction is USM and this is read_write.
104-
CorrectOut4 = BOp4(CorrectOut4, InitVal4);
100+
// discard_write mode for USM reductions is available only SYCL2020.
101+
if (Mode4 == access::mode::read_write || !IsSYCL2020)
102+
CorrectOut4 = BOp4(CorrectOut4, InitVal4);
105103

106104
// Inititialize data.
107105
{
@@ -123,17 +121,21 @@ int testOne(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
123121
}
124122

125123
auto NDR = nd_range<1>{range<1>(NWorkItems), range<1>{WGSize}};
126-
if constexpr (IsSYCL2020Mode) {
124+
if constexpr (IsSYCL2020) {
127125
Q.submit([&](handler &CGH) {
128126
auto In1 = InBuf1.template get_access<access::mode::read>(CGH);
129127
auto In2 = InBuf2.template get_access<access::mode::read>(CGH);
130128
auto In3 = InBuf3.template get_access<access::mode::read>(CGH);
131129
auto In4 = InBuf4.template get_access<access::mode::read>(CGH);
132130

133-
auto Redu1 = sycl::reduction(OutBuf1, CGH, IdentityVal1, BOp1);
134-
auto Redu2 = sycl::reduction(OutBuf2, CGH, IdentityVal2, BOp2);
135-
auto Redu3 = sycl::reduction(OutBuf3, CGH, IdentityVal3, BOp3);
136-
auto Redu4 = sycl::reduction(Out4, IdentityVal4, BOp4);
131+
auto Redu1 = sycl::reduction(OutBuf1, CGH, IdentityVal1, BOp1,
132+
getPropertyList<Mode1>());
133+
auto Redu2 = sycl::reduction(OutBuf2, CGH, IdentityVal2, BOp2,
134+
getPropertyList<Mode2>());
135+
auto Redu3 = sycl::reduction(OutBuf3, CGH, IdentityVal3, BOp3,
136+
getPropertyList<Mode3>());
137+
auto Redu4 =
138+
sycl::reduction(Out4, IdentityVal4, BOp4, getPropertyList<Mode4>());
137139

138140
auto Lambda = [=](nd_item<1> NDIt, auto &Sum1, auto &Sum2, auto &Sum3,
139141
auto &Sum4) {
@@ -193,10 +195,10 @@ int testOne(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
193195
Out4Val = *Out4;
194196
}
195197

196-
Error += cherkResultIsExpected(1, CorrectOut1, Out1[0]) ? 0 : 1;
197-
Error += cherkResultIsExpected(2, CorrectOut2, Out2[0]) ? 0 : 1;
198-
Error += cherkResultIsExpected(3, CorrectOut3, Out3[0]) ? 0 : 1;
199-
Error += cherkResultIsExpected(4, CorrectOut4, Out4Val) ? 0 : 1;
198+
Error += cherkResultIsExpected(1, CorrectOut1, Out1[0], IsSYCL2020) ? 0 : 1;
199+
Error += cherkResultIsExpected(2, CorrectOut2, Out2[0], IsSYCL2020) ? 0 : 1;
200+
Error += cherkResultIsExpected(3, CorrectOut3, Out3[0], IsSYCL2020) ? 0 : 1;
201+
Error += cherkResultIsExpected(4, CorrectOut4, Out4Val, IsSYCL2020) ? 0 : 1;
200202
free(Out4, Q.get_context());
201203
}
202204

@@ -211,45 +213,41 @@ int testOne(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
211213
// sycl::reduction and sycl::ONEAPI::reduction
212214
template <class Name, typename T1, access::mode Mode1, typename T2,
213215
access::mode Mode2, typename T3, access::mode Mode3, typename T4,
214-
class BinaryOperation1, class BinaryOperation2,
216+
access::mode Mode4, class BinaryOperation1, class BinaryOperation2,
215217
class BinaryOperation3, class BinaryOperation4>
216-
int testBoth(T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
218+
int testBoth(queue &Q, T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
217219
T2 IdentityVal2, T2 InitVal2, BinaryOperation2 BOp2,
218220
T3 IdentityVal3, T3 InitVal3, BinaryOperation3 BOp3,
219221
T4 IdentityVal4, T3 InitVal4, BinaryOperation4 BOp4,
220222
usm::alloc AllocType4, size_t NWorkItems, size_t WGSize) {
221223
int Error =
222-
testOne<KName<Name, false>, false, T1, Mode1, T2, Mode2, T3, Mode3, T4>(
223-
IdentityVal1, InitVal1, BOp1, IdentityVal2, InitVal2, BOp2,
224-
IdentityVal3, InitVal3, BOp3, IdentityVal4, InitVal4, BOp4,
225-
AllocType4, NWorkItems, WGSize);
226-
227-
// TODO: property::reduction::initialize_to_identity is not supported yet.
228-
// Thus only read_write mode is tested now.
229-
constexpr access::mode _Mode1 = (Mode1 == DW) ? RW : Mode1;
230-
constexpr access::mode _Mode2 = (Mode2 == DW) ? RW : Mode2;
231-
constexpr access::mode _Mode3 = (Mode3 == DW) ? RW : Mode3;
224+
testOne<KName<Name, false>, false, T1, Mode1, T2, Mode2, T3, Mode3, T4,
225+
Mode4>(Q, IdentityVal1, InitVal1, BOp1, IdentityVal2, InitVal2,
226+
BOp2, IdentityVal3, InitVal3, BOp3, IdentityVal4, InitVal4,
227+
BOp4, AllocType4, NWorkItems, WGSize);
228+
232229
Error +=
233-
testOne<KName<Name, true>, true, T1, _Mode1, T2, _Mode2, T3, _Mode3, T4>(
234-
IdentityVal1, InitVal1, BOp1, IdentityVal2, InitVal2, BOp2,
235-
IdentityVal3, InitVal3, BOp3, IdentityVal4, InitVal4, BOp4,
236-
AllocType4, NWorkItems, WGSize);
230+
testOne<KName<Name, true>, true, T1, Mode1, T2, Mode2, T3, Mode3, T4,
231+
Mode4>(Q, IdentityVal1, InitVal1, BOp1, IdentityVal2, InitVal2,
232+
BOp2, IdentityVal3, InitVal3, BOp3, IdentityVal4, InitVal4,
233+
BOp4, AllocType4, NWorkItems, WGSize);
237234
return Error;
238235
}
239236

240237
int main() {
241-
int Error = testBoth<class FP32Plus16x16, float, DW, int, RW, short, RW, int>(
242-
0, 1000, std::plus<float>{}, 0, 2000, std::plus<>{}, 0, 4000,
238+
queue Q;
239+
int Error = testBoth<class Case1, float, DW, int, RW, short, RW, int, RW>(
240+
Q, 0, 1000, std::plus<float>{}, 0, 2000, std::plus<>{}, 0, 4000,
243241
std::bit_or<>{}, 0, 8000, std::bit_xor<>{}, usm::alloc::shared, 16, 16);
244242

245243
auto Add = [](auto x, auto y) { return (x + y); };
246-
Error += testBoth<class FP32Plus5x257, float, RW, int, RW, short, DW, int>(
247-
0, 1000, std::plus<float>{}, 0, 2000, std::plus<>{}, 0, 4000, Add, 0,
248-
8000, std::bit_xor<int>{}, usm::alloc::device, 5 * (256 + 1), 5);
244+
Error += testBoth<class Case2, float, RW, int, RW, short, DW, int, DW>(
245+
Q, 0, 1000, std::plus<float>{}, 0, 2000, std::plus<>{}, 0, 4000, Add, 0,
246+
8000, std::plus<>{}, usm::alloc::device, 5 * (256 + 1), 5);
249247

250248
if (!Error)
251249
std::cout << "Test passed\n";
252250
else
253-
std::cout << Error << " test-cases failed\n";
251+
std::cerr << Error << " test-cases failed\n";
254252
return Error;
255253
}

SYCL/Reduction/reduction_nd_conditional.cpp

+6-7
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ template <class T> struct VecPlus {
5656

5757
template <typename SpecializationKernelName, typename T, int Dim,
5858
class BinaryOperation>
59-
void test(T Identity, size_t WGSize, size_t NWItems) {
59+
void test(queue &Q, T Identity, size_t WGSize, size_t NWItems) {
6060
buffer<T, 1> InBuf(NWItems);
6161
buffer<T, 1> OutBuf(1);
6262

@@ -66,7 +66,6 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
6666
initInputData(InBuf, CorrectOut, Identity, BOp, NWItems);
6767

6868
// Compute.
69-
queue Q;
7069
Q.submit([&](handler &CGH) {
7170
auto In = InBuf.template get_access<access::mode::read>(CGH);
7271
accessor<T, Dim, access::mode::discard_write, access::target::global_buffer>
@@ -100,11 +99,11 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
10099
}
101100

102101
int main() {
103-
test<class KernelName_lAx, int, 0, ONEAPI::plus<int>>(0, 2, 2);
104-
test<class KernelName_eVBkBK, int, 1, ONEAPI::plus<int>>(0, 7, 7);
105-
test<class KernelName_vMSyszeYKJbaXATnPL, int, 0, ONEAPI::plus<int>>(0, 2,
106-
64);
107-
test<class KernelName_UPKnfG, short, 1, ONEAPI::plus<short>>(0, 16, 256);
102+
queue Q;
103+
test<class A, int, 0, ONEAPI::plus<int>>(Q, 0, 2, 2);
104+
test<class B, int, 1, ONEAPI::plus<int>>(Q, 0, 7, 7);
105+
test<class C, int, 0, ONEAPI::plus<int>>(Q, 0, 2, 64);
106+
test<class D, short, 1, ONEAPI::plus<short>>(Q, 0, 16, 256);
108107

109108
std::cout << "Test passed\n";
110109
return 0;

SYCL/Reduction/reduction_nd_ext_double.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@
1111

1212
#include "reduction_nd_ext_type.hpp"
1313

14-
int main() { return runTests<double>("cl_khr_double"); }
14+
int main() { return runTests<double>("cl_khr_fp64"); }

SYCL/Reduction/reduction_nd_ext_type.hpp

+27-23
Original file line numberDiff line numberDiff line change
@@ -14,26 +14,26 @@ constexpr access::mode DW = access::mode::discard_write;
1414

1515
template <typename Name, bool IsSYCL2020Mode, typename T, int Dim,
1616
access::mode Mode, class BinaryOperation>
17-
void test(T Identity, size_t WGSize, size_t NWItems) {
17+
void test(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) {
1818
buffer<T, 1> InBuf(NWItems);
1919
buffer<T, 1> OutBuf(1);
2020

2121
// Initialize.
2222
BinaryOperation BOp;
2323
T CorrectOut;
2424
initInputData(InBuf, CorrectOut, Identity, BOp, NWItems);
25-
2625
if (Mode == access::mode::read_write)
27-
(OutBuf.template get_access<access::mode::write>())[0] = Identity;
26+
CorrectOut = BOp(CorrectOut, Init);
27+
28+
(OutBuf.template get_access<access::mode::write>())[0] = Init;
2829

2930
// Compute.
30-
queue Q;
3131
nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize});
3232
if constexpr (IsSYCL2020Mode) {
3333
Q.submit([&](handler &CGH) {
3434
auto In = InBuf.template get_access<access::mode::read>(CGH);
35-
auto Redu = sycl::reduction(OutBuf, CGH, Identity, BOp);
36-
35+
auto Redu =
36+
sycl::reduction(OutBuf, CGH, Identity, BOp, getPropertyList<Mode>());
3737
CGH.parallel_for<Name>(NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) {
3838
Sum.combine(In[NDIt.get_global_linear_id()]);
3939
});
@@ -60,42 +60,46 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
6060
std::cout << "Computed value: " << ComputedOut
6161
<< ", Expected value: " << CorrectOut << ", MaxDiff = " << MaxDiff
6262
<< "\n";
63+
if (IsSYCL2020Mode)
64+
std::cout << std::endl;
6365
assert(0 && "Wrong value.");
6466
}
6567
}
6668

6769
template <typename Name, typename T, int Dim, access::mode Mode,
6870
class BinaryOperation>
69-
void testBoth(T Identity, size_t WGSize, size_t NWItems) {
71+
void testBoth(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) {
7072
test<KName<Name, false>, false, T, Dim, Mode, BinaryOperation>(
71-
Identity, WGSize, NWItems);
73+
Q, Identity, Init, WGSize, NWItems);
7274

73-
// TODO: property::reduction::initialize_to_identity is not supported yet.
74-
// Thus only read_write mode is tested now.
75-
constexpr access::mode _Mode = (Mode == DW) ? RW : Mode;
76-
test<KName<Name, true>, true, T, Dim, _Mode, BinaryOperation>(
77-
Identity, WGSize, NWItems);
75+
test<KName<Name, true>, true, T, Dim, Mode, BinaryOperation>(
76+
Q, Identity, Init, WGSize, NWItems);
7877
}
7978

8079
template <typename T> int runTests(const string_class &ExtensionName) {
81-
device D = default_selector().select_device();
80+
queue Q;
81+
device D = Q.get_device();
8282
if (!D.is_host() && !D.has_extension(ExtensionName)) {
8383
std::cout << "Test skipped\n";
8484
return 0;
8585
}
8686

87-
// Check some less standards WG sizes and corner cases first.
88-
testBoth<class A, T, 1, RW, std::multiplies<T>>(0, 4, 4);
89-
testBoth<class B, T, 0, DW, ONEAPI::plus<T>>(0, 4, 64);
87+
testBoth<class A, T, 1, RW, std::multiplies<T>>(Q, 1, 77, 4, 4);
9088

91-
testBoth<class C, T, 0, RW, ONEAPI::minimum<T>>(getMaximumFPValue<T>(), 7, 7);
92-
testBoth<class D, T, 1, access::mode::discard_write, ONEAPI::maximum<T>>(
93-
getMinimumFPValue<T>(), 7, 7 * 5);
89+
testBoth<class B1, T, 0, DW, ONEAPI::plus<T>>(Q, 0, 77, 4, 64);
90+
testBoth<class B2, T, 1, RW, ONEAPI::plus<>>(Q, 0, 33, 3, 3 * 5);
9491

95-
testBoth<class E, T, 1, RW, ONEAPI::plus<>>(1, 3, 3 * 5);
96-
testBoth<class F, T, 1, DW, ONEAPI::minimum<>>(getMaximumFPValue<T>(), 3, 3);
97-
testBoth<class G, T, 0, DW, ONEAPI::maximum<>>(getMinimumFPValue<T>(), 3, 3);
92+
testBoth<class C1, T, 0, RW, ONEAPI::minimum<T>>(Q, getMaximumFPValue<T>(),
93+
-10.0, 7, 7);
94+
testBoth<class C2, T, 0, RW, ONEAPI::minimum<T>>(Q, getMaximumFPValue<T>(),
95+
99.0, 7, 7);
96+
testBoth<class C3, T, 1, DW, ONEAPI::minimum<>>(Q, getMaximumFPValue<T>(),
97+
-99.0, 3, 3);
9898

99+
testBoth<class D1, T, 0, DW, ONEAPI::maximum<>>(Q, getMinimumFPValue<T>(),
100+
99.0, 3, 3);
101+
testBoth<class D2, T, 1, RW, ONEAPI::maximum<T>>(Q, getMinimumFPValue<T>(),
102+
99.0, 7, 7 * 5);
99103
std::cout << "Test passed\n";
100104
return 0;
101105
}

0 commit comments

Comments
 (0)