Skip to content

Commit 0a06787

Browse files
[SYCL] sycl_khr_free_function_commands extension (#16770)
This PR is implementation of `sycl_khr_free_function_commands` KHR extension. KhronosGroup/SYCL-Docs#644 --------- Co-authored-by: Steffen Larsen <[email protected]>
1 parent 410bd24 commit 0a06787

File tree

15 files changed

+1256
-1
lines changed

15 files changed

+1256
-1
lines changed

sycl/include/sycl/ext/khr/free_function_commands.hpp

+515
Large diffs are not rendered by default.

sycl/include/sycl/sycl.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
#include <sycl/ext/intel/experimental/task_sequence_properties.hpp>
7575
#include <sycl/ext/intel/experimental/usm_properties.hpp>
7676
#include <sycl/ext/intel/usm_pointers.hpp>
77+
#include <sycl/ext/khr/free_function_commands.hpp>
7778
#include <sycl/ext/oneapi/bfloat16.hpp>
7879
#include <sycl/ext/oneapi/bindless_images.hpp>
7980
#include <sycl/ext/oneapi/device_global/device_global.hpp>

sycl/source/feature_test.hpp.in

+2-1
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,10 @@ inline namespace _V1 {
111111
#define SYCL_EXT_ONEAPI_GET_KERNEL_INFO 1
112112
#define SYCL_EXT_ONEAPI_WORK_GROUP_MEMORY 1
113113
#define SYCL_EXT_ONEAPI_WORK_GROUP_SCRATCH_MEMORY 1
114-
#define SYCL_EXT_ONEAPI_WORK_GROUP_STATIC 1
114+
#define SYCL_EXT_ONEAPI_WORK_GROUP_STATIC 1
115115
#define SYCL_EXT_ONEAPI_NUM_COMPUTE_UNITS 1
116116
#define SYCL_EXT_ONEAPI_DEVICE_IMAGE_BACKEND_CONTENT 1
117+
#define SYCL_KHR_FREE_FUNCTION_COMMANDS 1
117118
// In progress yet
118119
#define SYCL_EXT_ONEAPI_ATOMIC16 0
119120

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#include <iostream>
2+
#include <string>
3+
4+
static int Check(int *Data, int Expected, size_t Index,
5+
const std::string &TestName) {
6+
if (Data[Index] == Expected)
7+
return 0;
8+
std::cout << "Failed " << TestName << " at index " << Index << " : "
9+
<< Data[Index] << " != " << Expected << std::endl;
10+
return 1;
11+
}
12+
13+
static int Check(int &Data, int Expected, const std::string &TestName) {
14+
if (Data == Expected)
15+
return 0;
16+
std::cout << "Failed " << TestName << " : " << Data << " != " << Expected
17+
<< std::endl;
18+
return 1;
19+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// REQUIRES: aspect-usm_shared_allocations
2+
// RUN: %{build} -o %t.out
3+
// RUN: %{run} %t.out
4+
5+
// This test checks whether the free function command launch is valid.
6+
#define __DPCPP_ENABLE_UNFINISHED_KHR_EXTENSIONS
7+
8+
#include <sycl/detail/core.hpp>
9+
#include <sycl/ext/khr/free_function_commands.hpp>
10+
#include <sycl/usm.hpp>
11+
12+
#include <array>
13+
14+
#include "helpers.hpp"
15+
16+
int main() {
17+
18+
int Failed = 0;
19+
20+
sycl::queue Queue;
21+
constexpr size_t Dim = 8;
22+
23+
{
24+
constexpr size_t N = Dim * Dim * Dim;
25+
26+
std::array<int, N> Numbers = {0};
27+
{
28+
sycl::buffer<int, 1> MemBuffer{Numbers.data(), sycl::range<1>{N}};
29+
sycl::khr::submit(Queue, [&](sycl::handler &Handler) {
30+
sycl::accessor MemAcc{MemBuffer, Handler, sycl::write_only};
31+
sycl::khr::launch(Handler, sycl::range<3>(Dim, Dim, Dim),
32+
[=](sycl::item<3> Item) {
33+
size_t Index = Item.get_linear_id();
34+
MemAcc[Index] = 901;
35+
});
36+
});
37+
}
38+
for (size_t i = 0; i < N; ++i)
39+
Failed += Check(Numbers.data(), 901, i, "launch_with_buffer");
40+
}
41+
42+
{
43+
constexpr size_t N = Dim * Dim;
44+
int *Numbers = sycl::malloc_shared<int>(N, Queue);
45+
46+
sycl::event Event =
47+
sycl::khr::submit_tracked(Queue, [&](sycl::handler &Handler) {
48+
sycl::khr::launch(Handler, sycl::range<1>(N),
49+
[=](sycl::item<1> Item) { Numbers[Item] = 2; });
50+
});
51+
52+
sycl::khr::submit(Queue, [&](sycl::handler &Handler) {
53+
Handler.depends_on(Event);
54+
sycl::khr::launch(Handler, sycl::range<3>(Dim / 4, Dim / 2, Dim),
55+
[=](sycl::item<3> Item) {
56+
size_t Index = Item.get_linear_id();
57+
Numbers[Index] = Numbers[Index] + 900;
58+
});
59+
});
60+
61+
Queue.wait();
62+
for (size_t i = 0; i < N; ++i)
63+
Failed += Check(Numbers, 902, i, "launch_with_usm");
64+
65+
sycl::khr::launch(Queue, sycl::range<2>(Dim, Dim), [=](sycl::item<2> Item) {
66+
size_t Index = Item.get_linear_id();
67+
Numbers[Index] = 903;
68+
});
69+
70+
Queue.wait();
71+
for (size_t i = 0; i < N; ++i)
72+
Failed += Check(Numbers, 903, i, "launch_shortcut_with_usm");
73+
74+
sycl::free(Numbers, Queue);
75+
}
76+
77+
return Failed;
78+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// REQUIRES: aspect-usm_shared_allocations
2+
// RUN: %{build} -o %t.out
3+
// RUN: %{run} %t.out
4+
5+
// This test checks whether the free function command launch grouped is valid.
6+
7+
#define __DPCPP_ENABLE_UNFINISHED_KHR_EXTENSIONS
8+
9+
#include <sycl/detail/core.hpp>
10+
#include <sycl/ext/khr/free_function_commands.hpp>
11+
#include <sycl/usm.hpp>
12+
13+
#include "helpers.hpp"
14+
15+
int main() {
16+
17+
int Failed = 0;
18+
sycl::queue Queue;
19+
constexpr size_t N = 1024;
20+
21+
{
22+
std::array<int, N> Numbers = {0};
23+
{
24+
sycl::buffer<int> MemBuffer{Numbers.data(), N};
25+
Queue.submit([&](sycl::handler &Handler) {
26+
sycl::accessor MemAcc{MemBuffer, Handler, sycl::write_only};
27+
sycl::khr::launch_grouped(Handler, sycl::range<3>(8, 8, N / 64),
28+
sycl::range<3>(4, 4, 4),
29+
[=](sycl::nd_item<3> Item) {
30+
size_t Index = Item.get_global_linear_id();
31+
MemAcc[Index] = 301;
32+
});
33+
});
34+
}
35+
for (size_t i = 0; i < N; ++i)
36+
Failed +=
37+
Check(Numbers.data(), 301, i, "launch_grouped_with_sycl_buffer");
38+
}
39+
40+
{
41+
int *Numbers = sycl::malloc_shared<int>(N, Queue);
42+
43+
sycl::event Event =
44+
sycl::khr::submit_tracked(Queue, [&](sycl::handler &Handler) {
45+
sycl::khr::launch_grouped(Handler, sycl::range<3>(4, 4, N / 16),
46+
sycl::range<3>(4, 4, 4),
47+
[=](sycl::nd_item<3> Item) {
48+
Numbers[Item.get_global_linear_id()] = 2;
49+
});
50+
});
51+
52+
sycl::khr::submit(Queue, [&](sycl::handler &Handler) {
53+
Handler.depends_on(Event);
54+
sycl::khr::launch_grouped(Handler, sycl::range<1>(N), sycl::range<1>(8),
55+
[=](sycl::nd_item<1> Item) {
56+
size_t Index = Item.get_global_linear_id();
57+
Numbers[Index] = Numbers[Index] + 300;
58+
});
59+
});
60+
Queue.wait();
61+
for (size_t i = 0; i < N; ++i)
62+
Failed += Check(Numbers, 302, i, "launch_grouped_with_usm");
63+
64+
sycl::khr::launch_grouped(Queue, sycl::range<2>(8, N / 8),
65+
sycl::range<2>(8, 8), [=](sycl::nd_item<2> Item) {
66+
Numbers[Item.get_global_linear_id()] = 303;
67+
});
68+
Queue.wait();
69+
for (size_t i = 0; i < N; ++i)
70+
Failed += Check(Numbers, 303, i, "launch_grouped_shortcut_with_usm");
71+
sycl::free(Numbers, Queue);
72+
}
73+
74+
return Failed;
75+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// REQUIRES: aspect-usm_shared_allocations
2+
// RUN: %{build} -o %t.out
3+
// RUN: %{run} %t.out
4+
5+
// This test checks whether the free function command launch grouped reduce is
6+
// valid.
7+
8+
#define __DPCPP_ENABLE_UNFINISHED_KHR_EXTENSIONS
9+
10+
#include <sycl/detail/core.hpp>
11+
#include <sycl/ext/khr/free_function_commands.hpp>
12+
#include <sycl/reduction.hpp>
13+
#include <sycl/usm.hpp>
14+
15+
#include "helpers.hpp"
16+
17+
int main() {
18+
19+
int Failed = 0;
20+
sycl::queue Queue;
21+
22+
{
23+
int Result = 0;
24+
constexpr size_t Dim = 8;
25+
constexpr size_t N = Dim * Dim * Dim - 1;
26+
constexpr int ExpectedResult = N * (N + 1) / 2;
27+
{
28+
sycl::buffer<int> sumBuf{&Result, 1};
29+
Queue.submit([&](sycl::handler &Handler) {
30+
sycl::khr::launch_grouped_reduce(
31+
Handler, sycl::range<3>(Dim, Dim, Dim), sycl::range<3>(8, 8, 8),
32+
[=](sycl::nd_item<3> Item, auto &Sum) {
33+
Sum += Item.get_local_linear_id();
34+
},
35+
sycl::reduction(sumBuf, Handler, sycl::plus<>()));
36+
});
37+
}
38+
Failed +=
39+
Check(Result, ExpectedResult, "launch_grouped_reduce_with_buffer");
40+
}
41+
42+
{
43+
int *Result = sycl::malloc_shared<int>(1, Queue);
44+
Result[0] = 0;
45+
constexpr size_t N = 1024;
46+
constexpr int ExpectedResult = ((N - 1) * N) / 2;
47+
48+
sycl::event Event =
49+
sycl::khr::submit_tracked(Queue, [&](sycl::handler &Handler) {
50+
sycl::khr::launch_grouped_reduce(
51+
Handler, sycl::range<3>(8, 8, N / 64), sycl::range<3>(8, 8, 8),
52+
[=](sycl::nd_item<3> Item, auto &Sum) { Sum += 1; },
53+
sycl::reduction(Result, sycl::plus<>()));
54+
});
55+
56+
sycl::khr::submit(Queue, [&](sycl::handler &Handler) {
57+
Handler.depends_on(Event);
58+
sycl::khr::launch_grouped_reduce(
59+
Handler, sycl::range<2>(16, N / 16), sycl::range<2>(8, 8),
60+
[=](sycl::nd_item<2> Item, auto &Sum) { Sum += 1; },
61+
sycl::reduction(Result, sycl::plus<>()));
62+
});
63+
Queue.wait();
64+
Failed += Check(Result[0], N * 2, "launch_grouped_reduce_with_usm");
65+
66+
Result[0] = 0;
67+
sycl::khr::launch_grouped_reduce(
68+
Queue, sycl::range<1>(N), sycl::range<1>(8),
69+
[=](sycl::nd_item<1> Item, auto &Sum) {
70+
Sum += Item.get_global_linear_id();
71+
},
72+
sycl::reduction(Result, sycl::plus<>()));
73+
74+
Queue.wait();
75+
Failed += Check(Result[0], ExpectedResult,
76+
"launch_grouped_reduce_shortcut_with_usm");
77+
sycl::free(Result, Queue);
78+
}
79+
80+
return Failed;
81+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// REQUIRES: aspect-usm_shared_allocations
2+
// RUN: %{build} -o %t.out
3+
// RUN: %{run} %t.out
4+
5+
// This test checks whether the free function command launch reduce is valid.
6+
#define __DPCPP_ENABLE_UNFINISHED_KHR_EXTENSIONS
7+
8+
#include <sycl/detail/core.hpp>
9+
#include <sycl/ext/khr/free_function_commands.hpp>
10+
#include <sycl/reduction.hpp>
11+
#include <sycl/usm.hpp>
12+
13+
#include "helpers.hpp"
14+
15+
int main() {
16+
17+
int Failed = 0;
18+
sycl::queue Queue;
19+
20+
{
21+
int Result = 0;
22+
constexpr size_t Dim = 8;
23+
constexpr size_t N = Dim * Dim * Dim - 1;
24+
constexpr int ExpectedResult = N * (N + 1) / 2;
25+
{
26+
sycl::buffer<int> sumBuf{&Result, 1};
27+
Queue.submit([&](sycl::handler &Handler) {
28+
sycl::khr::launch_reduce(
29+
Handler, sycl::range<3>(Dim, Dim, Dim),
30+
[=](sycl::item<3> Item, auto &Sum) { Sum += Item.get_linear_id(); },
31+
sycl::reduction(sumBuf, Handler, sycl::plus<>()));
32+
});
33+
}
34+
Failed += Check(Result, ExpectedResult, "launch_reduce_with_buffer");
35+
}
36+
37+
{
38+
int *Result = sycl::malloc_shared<int>(1, Queue);
39+
Result[0] = 0;
40+
constexpr size_t N = 1024;
41+
constexpr int ExpectedResult = ((N - 1) * N) / 2;
42+
43+
sycl::event Event =
44+
sycl::khr::submit_tracked(Queue, [&](sycl::handler &Handler) {
45+
sycl::khr::launch_reduce(
46+
Handler, sycl::range<1>(N),
47+
[=](sycl::id<1> Id, auto &Sum) {
48+
int NegativeId = -(int)Id;
49+
Sum += NegativeId;
50+
},
51+
sycl::reduction(Result, sycl::plus<>()));
52+
});
53+
54+
Queue.submit([&](sycl::handler &Handler) {
55+
Handler.depends_on(Event);
56+
sycl::khr::launch_reduce(
57+
Handler, sycl::range<1>(N),
58+
[=](sycl::item<1> Item, auto &Sum) { Sum += Item.get_linear_id(); },
59+
sycl::reduction(Result, sycl::plus<>()));
60+
});
61+
62+
Queue.wait();
63+
Failed += Check(Result[0], 0, "launch_reduce_with_usm");
64+
65+
sycl::khr::launch_reduce(
66+
Queue, sycl::range<1>(N), [=](sycl::id<1> Id, auto &Sum) { Sum += Id; },
67+
sycl::reduction(Result, sycl::plus<>()));
68+
69+
Queue.wait();
70+
Failed +=
71+
Check(Result[0], ExpectedResult, "launch_reduce_shortcut_with_usm");
72+
sycl::free(Result, Queue);
73+
}
74+
75+
return Failed;
76+
}

0 commit comments

Comments
 (0)