Skip to content

Commit 2aa315f

Browse files
authored
[SYCL][Fusion] Test if kernel fusion API compiles (intel#1404)
Two simple tests to check that code using the kernel fusion extension API compiles correctly. The tests currently do not yet execute the compiled application, as the necessary functionality will only be added to the implementation in a later PR. Spec: intel/llvm#7098 Implementation: intel/llvm#7416 Signed-off-by: Lukas Sommer <[email protected]>
1 parent 83e7ada commit 2aa315f

File tree

2 files changed

+128
-0
lines changed

2 files changed

+128
-0
lines changed

SYCL/KernelFusion/cancel_fusion.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// UNSUPPORTED: cuda || hip
3+
4+
// Test cancel fusion
5+
6+
#include <sycl/sycl.hpp>
7+
8+
using namespace sycl;
9+
10+
int main() {
11+
constexpr size_t dataSize = 512;
12+
int in1[dataSize], in2[dataSize], in3[dataSize], tmp[dataSize], out[dataSize];
13+
14+
for (size_t i = 0; i < dataSize; ++i) {
15+
in1[i] = i * 2;
16+
in2[i] = i * 3;
17+
in3[i] = i * 4;
18+
tmp[i] = -1;
19+
out[i] = -1;
20+
}
21+
22+
queue q{ext::codeplay::experimental::property::queue::enable_fusion{}};
23+
24+
{
25+
buffer<int> bIn1{in1, range{dataSize}};
26+
buffer<int> bIn2{in2, range{dataSize}};
27+
buffer<int> bIn3{in3, range{dataSize}};
28+
buffer<int> bTmp{tmp, range{dataSize}};
29+
buffer<int> bOut{out, range{dataSize}};
30+
31+
ext::codeplay::experimental::fusion_wrapper fw{q};
32+
fw.start_fusion();
33+
34+
assert(fw.is_in_fusion_mode() && "Queue should be in fusion mode");
35+
36+
q.submit([&](handler &cgh) {
37+
auto accIn1 = bIn1.get_access(cgh);
38+
auto accIn2 = bIn2.get_access(cgh);
39+
auto accTmp = bTmp.get_access(cgh);
40+
cgh.parallel_for<class KernelOne>(
41+
dataSize, [=](id<1> i) { accTmp[i] = accIn1[i] + accIn2[i]; });
42+
});
43+
44+
q.submit([&](handler &cgh) {
45+
auto accTmp = bTmp.get_access(cgh);
46+
auto accIn3 = bIn3.get_access(cgh);
47+
auto accOut = bOut.get_access(cgh);
48+
cgh.parallel_for<class KernelTwo>(
49+
dataSize, [=](id<1> i) { accOut[i] = accTmp[i] * accIn3[i]; });
50+
});
51+
52+
fw.cancel_fusion();
53+
54+
assert(!fw.is_in_fusion_mode() &&
55+
"Queue should not be in fusion mode anymore");
56+
}
57+
58+
// Check the results
59+
for (size_t i = 0; i < dataSize; ++i) {
60+
assert(out[i] == (20 * i * i) && "Computation error");
61+
}
62+
63+
return 0;
64+
}

SYCL/KernelFusion/complete_fusion.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// UNSUPPORTED: cuda || hip
3+
4+
// Test complete fusion without any internalization
5+
6+
#include <sycl/sycl.hpp>
7+
8+
using namespace sycl;
9+
10+
int main() {
11+
constexpr size_t dataSize = 512;
12+
int in1[dataSize], in2[dataSize], in3[dataSize], tmp[dataSize], out[dataSize];
13+
14+
for (size_t i = 0; i < dataSize; ++i) {
15+
in1[i] = i * 2;
16+
in2[i] = i * 3;
17+
in3[i] = i * 4;
18+
tmp[i] = -1;
19+
out[i] = -1;
20+
}
21+
22+
queue q{ext::codeplay::experimental::property::queue::enable_fusion{}};
23+
24+
{
25+
buffer<int> bIn1{in1, range{dataSize}};
26+
buffer<int> bIn2{in2, range{dataSize}};
27+
buffer<int> bIn3{in3, range{dataSize}};
28+
buffer<int> bTmp{tmp, range{dataSize}};
29+
buffer<int> bOut{out, range{dataSize}};
30+
31+
ext::codeplay::experimental::fusion_wrapper fw{q};
32+
fw.start_fusion();
33+
34+
assert(fw.is_in_fusion_mode() && "Queue should be in fusion mode");
35+
36+
q.submit([&](handler &cgh) {
37+
auto accIn1 = bIn1.get_access(cgh);
38+
auto accIn2 = bIn2.get_access(cgh);
39+
auto accTmp = bTmp.get_access(cgh);
40+
cgh.parallel_for<class KernelOne>(
41+
dataSize, [=](id<1> i) { accTmp[i] = accIn1[i] + accIn2[i]; });
42+
});
43+
44+
q.submit([&](handler &cgh) {
45+
auto accTmp = bTmp.get_access(cgh);
46+
auto accIn3 = bIn3.get_access(cgh);
47+
auto accOut = bOut.get_access(cgh);
48+
cgh.parallel_for<class KernelTwo>(
49+
dataSize, [=](id<1> i) { accOut[i] = accTmp[i] * accIn3[i]; });
50+
});
51+
52+
fw.complete_fusion({ext::codeplay::experimental::property::no_barriers{}});
53+
54+
assert(!fw.is_in_fusion_mode() &&
55+
"Queue should not be in fusion mode anymore");
56+
}
57+
58+
// Check the results
59+
for (size_t i = 0; i < dataSize; ++i) {
60+
assert(out[i] == (20 * i * i) && "Computation error");
61+
}
62+
63+
return 0;
64+
}

0 commit comments

Comments
 (0)