Skip to content

Commit 7cb9d8e

Browse files
committed
[SYCL] E2E Test fix for last update intel#2
Signed-off-by: Hu, Peisen <[email protected]>
1 parent ce067bb commit 7cb9d8e

File tree

7 files changed

+103
-105
lines changed

7 files changed

+103
-105
lines changed

sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp

+18-7
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,22 @@ template <typename T> void dummy_kernel(T *Input, int N, sycl::nd_item<1> It) {
2424
#endif
2525
}
2626

27+
template <typename T> struct KernelFunctor {
28+
using namespace sycl::ext::oneapi::experimental;
29+
30+
T mAcc;
31+
properties mClusterLaunchProperty;
32+
KernelFunctor(properties ClusterLaunchProperty, T Acc)
33+
: mClusterLaunchProperty(ClusterLaunchProperty), mAcc(Acc) {}
34+
35+
void operator()(sycl::nd_item<1> It) const {
36+
dummy_kernel(
37+
mAcc.template get_multi_ptr<sycl::access::decorated::yes>().get(), 4096,
38+
It);
39+
}
40+
auto get(properties_tag) const { return mClusterLaunchProperty; }
41+
};
42+
2743
int main() {
2844

2945
std::vector<int> HostArray(4096, -20);
@@ -46,13 +62,8 @@ int main() {
4662
cuda::cluster_size ClusterDims(sycl::range{2});
4763
properties ClusterLaunchProperty{ClusterDims};
4864
auto Acc = Buff.template get_access<sycl::access::mode::read_write>(CGH);
49-
CGH.parallel_for(
50-
sycl::nd_range({4096}, {32}), ClusterLaunchProperty,
51-
[=](sycl::nd_item<1> It) {
52-
dummy_kernel(
53-
Acc.get_multi_ptr<sycl::access::decorated::yes>().get(), 4096,
54-
It);
55-
});
65+
CGH.parallel_for(sycl::nd_range({4096}, {32}),
66+
KernelFunctor(ClusterLaunchProperty, Acc));
5667
});
5768
Queue.submit([&](sycl::handler &CGH) {
5869
auto Acc = Buff.template get_access<sycl::access::mode::read_write>(CGH);

sycl/test-e2e/DeviceCodeSplit/grf.cpp

+11-2
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,15 @@ bool checkResult(const std::vector<float> &A, int Inc) {
6767
return true;
6868
}
6969

70+
template <typename T> struct KernelFunctor {
71+
T mPA;
72+
properties mProp;
73+
KernelFunctor(properties Prop, T PA) : mProp(Prop), mPA(PA) {}
74+
75+
void operator()(id<1> i) const { PA[i] += 2; }
76+
auto get(properties_tag) const { return mProp; }
77+
};
78+
7079
int main(void) {
7180
constexpr unsigned Size = 32;
7281
constexpr unsigned VL = 16;
@@ -122,8 +131,8 @@ int main(void) {
122131

123132
auto e = q.submit([&](handler &cgh) {
124133
auto PA = bufa.get_access<access::mode::read_write>(cgh);
125-
cgh.parallel_for<class SYCLKernelSpecifiedGRF>(
126-
Size, prop, [=](id<1> i) { PA[i] += 2; });
134+
cgh.parallel_for<class SYCLKernelSpecifiedGRF>(Size,
135+
KernelFunctor(prop, PA));
127136
});
128137
e.wait();
129138
} catch (sycl::exception const &e) {

sycl/test-e2e/Graph/Inputs/sub_group_prop.cpp

-68
Original file line numberDiff line numberDiff line change
@@ -39,49 +39,13 @@ void test(queue &Queue, const std::vector<size_t> SupportedSGSizes) {
3939
return;
4040
}
4141

42-
auto Props = ext::oneapi::experimental::properties{
43-
ext::oneapi::experimental::sub_group_size<SGSize>};
44-
4542
nd_range<1> NdRange(SGSize * 4, SGSize * 2);
4643

4744
size_t ReadSubGroupSize = 0;
4845
{
4946
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));
5047
ReadSubGroupSizeBuf.set_write_back(false);
5148

52-
{
53-
exp_ext::command_graph Graph{
54-
Queue.get_context(),
55-
Queue.get_device(),
56-
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};
57-
58-
add_node(Graph, Queue, [&](handler &CGH) {
59-
accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH,
60-
sycl::write_only, sycl::no_init};
61-
62-
CGH.parallel_for<SubGroupKernel<Variant::Function, SGSize>>(
63-
NdRange, Props, [=](nd_item<1> NdItem) {
64-
auto SG = NdItem.get_sub_group();
65-
if (NdItem.get_global_linear_id() == 0)
66-
ReadSubGroupSizeBufAcc[0] = SG.get_local_linear_range();
67-
});
68-
});
69-
70-
auto ExecGraph = Graph.finalize();
71-
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); });
72-
Queue.wait_and_throw();
73-
}
74-
75-
host_accessor HostAcc(ReadSubGroupSizeBuf);
76-
ReadSubGroupSize = HostAcc[0];
77-
}
78-
assert(ReadSubGroupSize == SGSize && "Failed check for function.");
79-
80-
ReadSubGroupSize = 0;
81-
{
82-
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));
83-
ReadSubGroupSizeBuf.set_write_back(false);
84-
8549
{
8650
exp_ext::command_graph Graph{
8751
Queue.get_context(),
@@ -107,38 +71,6 @@ void test(queue &Queue, const std::vector<size_t> SupportedSGSizes) {
10771
ReadSubGroupSize = HostAcc[0];
10872
}
10973
assert(ReadSubGroupSize == SGSize && "Failed check for functor.");
110-
111-
ReadSubGroupSize = 0;
112-
{
113-
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));
114-
ReadSubGroupSizeBuf.set_write_back(false);
115-
116-
{
117-
exp_ext::command_graph Graph{
118-
Queue.get_context(),
119-
Queue.get_device(),
120-
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};
121-
122-
add_node(Graph, Queue, [&](handler &CGH) {
123-
accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH,
124-
sycl::write_only, sycl::no_init};
125-
KernelFunctorWithSGSizeProp<SGSize> KernelFunctor{
126-
ReadSubGroupSizeBufAcc};
127-
128-
CGH.parallel_for<SubGroupKernel<Variant::Functor, SGSize>>(
129-
NdRange, Props, KernelFunctor);
130-
});
131-
132-
auto ExecGraph = Graph.finalize();
133-
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); });
134-
Queue.wait_and_throw();
135-
}
136-
137-
host_accessor HostAcc(ReadSubGroupSizeBuf);
138-
ReadSubGroupSize = HostAcc[0];
139-
}
140-
assert(ReadSubGroupSize == SGSize &&
141-
"Failed check for functor and properties.");
14274
}
14375

14476
int main() {

sycl/test-e2e/VirtualFunctions/misc/group-barrier.cpp

+20-7
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,24 @@ class MultiplyOp : public BaseOp {
101101
}
102102
};
103103

104+
template <typename T1, typename T2, typename T3> struct KernelFunctor {
105+
T1 mDeviceStorage;
106+
T2 mDataAcc;
107+
T3 mLocalAcc;
108+
KernelFunctor(T1 DeviceStorage, T2 DataAcc, T3 LocalAcc)
109+
: mDeviceStorage(DeviceStorage), mDataAcc(DataAcc), mLocalAcc(LocalAcc) {}
110+
111+
void operator()(sycl::nd_item<1> It) const {
112+
auto *Ptr = mDeviceStorage->template getAs<BaseOp>();
113+
mDataAcc[It.get_global_id()] = Ptr->apply(
114+
mLocalAcc.template get_multi_ptr<sycl::access::decorated::no>().get(),
115+
It.get_group());
116+
}
117+
auto get(oneapi::properties_tag) const {
118+
return oneapi::properties{oneapi::assume_indirect_calls};
119+
}
120+
};
121+
104122
int main() try {
105123
using storage_t = obj_storage_t<SumOp, MultiplyOp>;
106124

@@ -113,7 +131,6 @@ int main() try {
113131
sycl::range G{16};
114132
sycl::range L{4};
115133

116-
constexpr oneapi::properties props{oneapi::assume_indirect_calls};
117134
for (unsigned TestCase = 0; TestCase < 2; ++TestCase) {
118135
sycl::buffer<int> DataStorage(G);
119136

@@ -126,12 +143,8 @@ int main() try {
126143
q.submit([&](sycl::handler &CGH) {
127144
sycl::accessor DataAcc(DataStorage, CGH, sycl::read_write);
128145
sycl::local_accessor<int> LocalAcc(L, CGH);
129-
CGH.parallel_for(sycl::nd_range{G, L}, props, [=](auto It) {
130-
auto *Ptr = DeviceStorage->getAs<BaseOp>();
131-
DataAcc[It.get_global_id()] = Ptr->apply(
132-
LocalAcc.get_multi_ptr<sycl::access::decorated::no>().get(),
133-
It.get_group());
134-
});
146+
CGH.parallel_for(sycl::nd_range{G, L},
147+
KernelFunctor(DeviceStorage, DataAcc, LocalAcc));
135148
}).wait_and_throw();
136149

137150
auto *Ptr = HostStorage.construct</* ret type = */ BaseOp>(TestCase);

sycl/test-e2e/VirtualFunctions/misc/range-non-uniform-vf-2.cpp

+20-9
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,25 @@ class OpB : public BaseOp {
4444
virtual int bar(int V) { return V / 2; }
4545
};
4646

47+
template <typename T1, typename T2> struct KernelFunctor {
48+
T1 mDeviceStorage;
49+
T2 mDataAcc;
50+
KernelFunctor(T1 DeviceStorage, T2 DataAcc)
51+
: mDeviceStorage(DeviceStorage), mDataAcc(DataAcc) {}
52+
53+
void operator()(sycl::id<1> It) const {
54+
// Select method that corresponds to this work-item
55+
auto *Ptr = mDeviceStorage->template getAs<BaseOp>();
56+
if (It % 2)
57+
mDataAcc[It] = Ptr->foo(mDataAcc[It]);
58+
else
59+
mDataAcc[It] = Ptr->bar(mDataAcc[It]);
60+
}
61+
auto get(oneapi::properties_tag) const {
62+
return oneapi::properties{oneapi::assume_indirect_calls};
63+
}
64+
};
65+
4766
int main() try {
4867
using storage_t = obj_storage_t<OpA, OpB>;
4968

@@ -54,7 +73,6 @@ int main() try {
5473
auto *DeviceStorage = sycl::malloc_shared<storage_t>(1, q);
5574
sycl::range R{1024};
5675

57-
constexpr oneapi::properties props{oneapi::assume_indirect_calls};
5876
for (size_t TestCase = 0; TestCase < 2; ++TestCase) {
5977
std::vector<int> HostData(R.size());
6078
std::iota(HostData.begin(), HostData.end(), 0);
@@ -69,14 +87,7 @@ int main() try {
6987

7088
q.submit([&](sycl::handler &CGH) {
7189
sycl::accessor DataAcc(DataStorage, CGH, sycl::read_write);
72-
CGH.parallel_for(R, props, [=](auto It) {
73-
// Select method that corresponds to this work-item
74-
auto *Ptr = DeviceStorage->template getAs<BaseOp>();
75-
if (It % 2)
76-
DataAcc[It] = Ptr->foo(DataAcc[It]);
77-
else
78-
DataAcc[It] = Ptr->bar(DataAcc[It]);
79-
});
90+
CGH.parallel_for(R, KernelFunctor(DeviceStorage, DataAcc));
8091
});
8192

8293
BaseOp *Ptr = HostStorage.construct</* ret type = */ BaseOp>(TestCase);

sycl/test-e2e/VirtualFunctions/misc/range-non-uniform-vf.cpp

+18-7
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,23 @@ class RoundOp : public BaseOp {
4141
virtual float apply(float V) { return sycl::round(V); }
4242
};
4343

44+
template <typename T1, typename T2> struct KernelFunctor {
45+
T1 mDeviceStorage;
46+
T2 mDataAcc;
47+
KernelFunctor(T1 DeviceStorage, T2 DataAcc)
48+
: mDeviceStorage(DeviceStorage), mDataAcc(DataAcc) {}
49+
50+
void operator()(sycl::id<1> It) const {
51+
// Select an object that corresponds to this work-item
52+
auto Ind = It % 3;
53+
auto *Ptr = mDeviceStorage[Ind].template getAs<BaseOp>();
54+
mDataAcc[It] = Ptr->apply(mDataAcc[It]);
55+
}
56+
auto get(oneapi::properties_tag) const {
57+
return oneapi::properties{oneapi::assume_indirect_calls};
58+
}
59+
};
60+
4461
int main() try {
4562
using storage_t = obj_storage_t<FloorOp, CeilOp, RoundOp>;
4663

@@ -51,7 +68,6 @@ int main() try {
5168
auto *DeviceStorage = sycl::malloc_shared<storage_t>(3, q);
5269
sycl::range R{1024};
5370

54-
constexpr oneapi::properties props{oneapi::assume_indirect_calls};
5571
{
5672
std::vector<float> HostData(R.size());
5773
for (size_t I = 1; I < HostData.size(); ++I)
@@ -69,12 +85,7 @@ int main() try {
6985

7086
q.submit([&](sycl::handler &CGH) {
7187
sycl::accessor DataAcc(DataStorage, CGH, sycl::read_write);
72-
CGH.parallel_for(R, props, [=](auto it) {
73-
// Select an object that corresponds to this work-item
74-
auto Ind = it % 3;
75-
auto *Ptr = DeviceStorage[Ind].template getAs<BaseOp>();
76-
DataAcc[it] = Ptr->apply(DataAcc[it]);
77-
});
88+
CGH.parallel_for(R, KernelFunctor(DeviceStorage, DataAcc));
7889
});
7990

8091
BaseOp *Ptr[] = {HostStorage[0].construct</* ret type = */ BaseOp>(0),

sycl/test-e2e/VirtualFunctions/misc/range-uniform-vf.cpp

+16-5
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,21 @@ class RoundOp : public BaseOp {
4141
virtual float apply(float V) { return sycl::round(V); }
4242
};
4343

44+
template <typename T1, typename T2> struct KernelFunctor {
45+
T1 mDeviceStorage;
46+
T2 mDataAcc;
47+
KernelFunctor(T1 DeviceStorage, T2 DataAcc)
48+
: mDeviceStorage(DeviceStorage), mDataAcc(DataAcc) {}
49+
50+
void operator()(sycl::id<1> It) const {
51+
auto *Ptr = mDeviceStorage->getAs<BaseOp>();
52+
mDataAcc[It] = Ptr->apply(mDataAcc[It]);
53+
}
54+
auto get(oneapi::properties_tag) const {
55+
return oneapi::properties{oneapi::assume_indirect_calls};
56+
}
57+
};
58+
4459
int main() try {
4560
using storage_t = obj_storage_t<FloorOp, CeilOp, RoundOp>;
4661

@@ -51,7 +66,6 @@ int main() try {
5166
auto *DeviceStorage = sycl::malloc_shared<storage_t>(1, q);
5267
sycl::range R{1024};
5368

54-
constexpr oneapi::properties props{oneapi::assume_indirect_calls};
5569
for (unsigned TestCase = 0; TestCase < 3; ++TestCase) {
5670
std::vector<float> HostData(R.size());
5771
for (size_t I = 1; I < HostData.size(); ++I)
@@ -67,10 +81,7 @@ int main() try {
6781

6882
q.submit([&](sycl::handler &CGH) {
6983
sycl::accessor DataAcc(DataStorage, CGH, sycl::read_write);
70-
CGH.parallel_for(R, props, [=](auto it) {
71-
auto *Ptr = DeviceStorage->getAs<BaseOp>();
72-
DataAcc[it] = Ptr->apply(DataAcc[it]);
73-
});
84+
CGH.parallel_for(R, KernelFunctor(DeviceStorage, DataAcc));
7485
});
7586

7687
auto *Ptr = HostStorage.construct</* ret type = */ BaseOp>(TestCase);

0 commit comments

Comments
 (0)