Skip to content

Commit 90b7353

Browse files
[Cherry-pick][SYCL][XPTI] Added test covering kernel info (intel#949)
2 parents 076b09c + cf0e45d commit 90b7353

16 files changed

+324
-23
lines changed

SYCL/XPTI/Inputs/buffer_info_collector.cpp

+146-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
#include "xpti/xpti_trace_framework.hpp"
22

3+
#include <algorithm>
34
#include <iostream>
45
#include <mutex>
56
#include <string_view>
67

78
std::mutex GMutex;
89

10+
XPTI_CALLBACK_API void syclCallback(uint16_t, xpti::trace_event_data_t *,
11+
xpti::trace_event_data_t *, uint64_t,
12+
const void *);
13+
914
XPTI_CALLBACK_API void memCallback(uint16_t, xpti::trace_event_data_t *,
1015
xpti::trace_event_data_t *, uint64_t,
1116
const void *);
@@ -64,6 +69,43 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int MajorVersion,
6469
static_cast<uint16_t>(xpti::trace_point_type_t::offload_alloc_accessor),
6570
syclBufferCallback);
6671
}
72+
if (NameView == "sycl") {
73+
uint8_t StreamID = xptiRegisterStream(StreamName);
74+
xptiRegisterCallback(
75+
StreamID, static_cast<uint16_t>(xpti::trace_point_type_t::graph_create),
76+
syclCallback);
77+
xptiRegisterCallback(
78+
StreamID, static_cast<uint16_t>(xpti::trace_point_type_t::node_create),
79+
syclCallback);
80+
xptiRegisterCallback(
81+
StreamID, static_cast<uint16_t>(xpti::trace_point_type_t::edge_create),
82+
syclCallback);
83+
xptiRegisterCallback(
84+
StreamID, static_cast<uint16_t>(xpti::trace_point_type_t::task_begin),
85+
syclCallback);
86+
xptiRegisterCallback(
87+
StreamID, static_cast<uint16_t>(xpti::trace_point_type_t::task_end),
88+
syclCallback);
89+
xptiRegisterCallback(
90+
StreamID, static_cast<uint16_t>(xpti::trace_point_type_t::signal),
91+
syclCallback);
92+
xptiRegisterCallback(
93+
StreamID,
94+
static_cast<uint16_t>(xpti::trace_point_type_t::barrier_begin),
95+
syclCallback);
96+
xptiRegisterCallback(
97+
StreamID, static_cast<uint16_t>(xpti::trace_point_type_t::barrier_end),
98+
syclCallback);
99+
xptiRegisterCallback(
100+
StreamID, static_cast<uint16_t>(xpti::trace_point_type_t::wait_begin),
101+
syclCallback);
102+
xptiRegisterCallback(
103+
StreamID, static_cast<uint16_t>(xpti::trace_point_type_t::wait_end),
104+
syclCallback);
105+
xptiRegisterCallback(
106+
StreamID, static_cast<uint16_t>(xpti::trace_point_type_t::signal),
107+
syclCallback);
108+
}
67109
}
68110

69111
XPTI_CALLBACK_API void xptiTraceFinish(const char *streamName) {
@@ -86,9 +128,9 @@ XPTI_CALLBACK_API void memCallback(uint16_t TraceType,
86128
} else if (Type == xpti::trace_point_type_t::mem_release_end) {
87129
std::cout << "Mem Release End : ";
88130
}
89-
std::cout << " mem_obj_handle: " << Data->mem_object_handle << "\n";
90-
std::cout << " alloc_pointer : " << Data->alloc_pointer << "\n";
91-
std::cout << " alloc_size : " << Data->alloc_size << "\n";
131+
std::cout << "mem_obj_handle:0x" << std::hex << Data->mem_object_handle;
132+
std::cout << "|alloc_pointer:0x" << Data->alloc_pointer;
133+
std::cout << "|alloc_size:" << std::dec << Data->alloc_size << std::endl;
92134
}
93135

94136
XPTI_CALLBACK_API void syclBufferCallback(uint16_t TraceType,
@@ -148,3 +190,104 @@ XPTI_CALLBACK_API void syclBufferCallback(uint16_t TraceType,
148190
std::cout << "Unknown tracepoint\n";
149191
}
150192
}
193+
194+
template <typename T>
195+
T getMetadataByKey(xpti::metadata_t *Metadata, const char *key) {
196+
for (auto &Item : *Metadata) {
197+
if (std::string(xptiLookupString(Item.first)) == key) {
198+
return xpti::getMetadata<T>(Item).second;
199+
}
200+
}
201+
return {};
202+
}
203+
204+
bool isMetadataPresent(xpti::metadata_t *Metadata, const char *key) {
205+
for (auto &Item : *Metadata) {
206+
if (std::string(xptiLookupString(Item.first)) == key) {
207+
return true;
208+
}
209+
}
210+
return false;
211+
}
212+
void parseMetadata(xpti::trace_event_data_t *Event) {
213+
xpti::metadata_t *Metadata = xptiQueryMetadata(Event);
214+
if (isMetadataPresent(Metadata, "kernel_name")) {
215+
std::cout << getMetadataByKey<std::string>(Metadata, "kernel_name") << "|";
216+
}
217+
if (isMetadataPresent(Metadata, "sym_source_file_name") &&
218+
isMetadataPresent(Metadata, "sym_line_no") &&
219+
isMetadataPresent(Metadata, "sym_column_no")) {
220+
std::cout << getMetadataByKey<std::string>(Metadata, "sym_source_file_name")
221+
<< ":" << getMetadataByKey<int>(Metadata, "sym_line_no") << ":"
222+
<< getMetadataByKey<int>(Metadata, "sym_column_no") << "|";
223+
}
224+
if (isMetadataPresent(Metadata, "enqueue_kernel_data")) {
225+
auto KernelEnqueueData =
226+
getMetadataByKey<xpti::offload_kernel_enqueue_data_t>(
227+
Metadata, "enqueue_kernel_data");
228+
229+
std::cout << "{" << KernelEnqueueData.global_size[0] << ", "
230+
<< KernelEnqueueData.global_size[1] << ", "
231+
<< KernelEnqueueData.global_size[2] << "}, {"
232+
<< KernelEnqueueData.local_size[0] << ", "
233+
<< KernelEnqueueData.local_size[1] << ", "
234+
<< KernelEnqueueData.local_size[2] << "}, {"
235+
<< KernelEnqueueData.offset[0] << ", "
236+
<< KernelEnqueueData.offset[1] << ", "
237+
<< KernelEnqueueData.offset[2] << "}, "
238+
<< KernelEnqueueData.args_num << "|\n";
239+
240+
for (int i = 0; i < KernelEnqueueData.args_num; i++) {
241+
std::string Name("arg" + std::to_string(i));
242+
243+
auto arg = getMetadataByKey<xpti::offload_kernel_arg_data_t>(
244+
Metadata, Name.c_str());
245+
std::cout << " " << Name << " : {" << arg.type << ", " << std::hex
246+
<< "0x" << (uintptr_t)arg.pointer << std::dec << ", "
247+
<< arg.size << ", " << arg.index << "} "
248+
<< "\n";
249+
}
250+
}
251+
}
252+
XPTI_CALLBACK_API void syclCallback(uint16_t TraceType,
253+
xpti::trace_event_data_t *,
254+
xpti::trace_event_data_t *Event, uint64_t,
255+
const void *UserData) {
256+
std::lock_guard Lock{GMutex};
257+
auto Type = static_cast<xpti::trace_point_type_t>(TraceType);
258+
switch (Type) {
259+
case xpti::trace_point_type_t::graph_create:
260+
std::cout << "Graph create|";
261+
break;
262+
case xpti::trace_point_type_t::node_create:
263+
std::cout << "Node create|";
264+
break;
265+
case xpti::trace_point_type_t::edge_create:
266+
std::cout << "Edge create|";
267+
break;
268+
case xpti::trace_point_type_t::task_begin:
269+
std::cout << "Task begin|";
270+
break;
271+
case xpti::trace_point_type_t::task_end:
272+
std::cout << "Task end|";
273+
break;
274+
case xpti::trace_point_type_t::signal:
275+
std::cout << "Signal|";
276+
break;
277+
case xpti::trace_point_type_t::wait_begin:
278+
std::cout << "Wait begin|";
279+
break;
280+
case xpti::trace_point_type_t::wait_end:
281+
std::cout << "Wait end|";
282+
break;
283+
case xpti::trace_point_type_t::barrier_begin:
284+
std::cout << "Barrier begin|";
285+
break;
286+
case xpti::trace_point_type_t::barrier_end:
287+
std::cout << "Barrier end|";
288+
break;
289+
default:
290+
std::cout << "Unknown tracepoint|";
291+
}
292+
parseMetadata(Event);
293+
}

SYCL/XPTI/buffer/accessors.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// REQUIRES: xptifw, opencl
22
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
33
// RUN: %clangxx -fsycl %s -o %t.out
4-
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll SYCL_DEVICE_FILTER=opencl %t.out | FileCheck %s 2>&1
4+
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll %BE_RUN_PLACEHOLDER %t.out | FileCheck %s 2>&1
55

66
#ifdef XPTI_COLLECTOR
77

SYCL/XPTI/buffer/host_array.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// REQUIRES: xptifw, opencl
22
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
33
// RUN: %clangxx -fsycl %s -o %t.out
4-
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll SYCL_DEVICE_FILTER=opencl %t.out | FileCheck %s 2>&1
4+
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll %BE_RUN_PLACEHOLDER %t.out | FileCheck %s 2>&1
55

66
#ifdef XPTI_COLLECTOR
77

SYCL/XPTI/buffer/in_cycle.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// REQUIRES: xptifw, opencl
22
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
33
// RUN: %clangxx -fsycl %s -o %t.out
4-
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll SYCL_DEVICE_FILTER=opencl %t.out | FileCheck %s 2>&1
4+
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll %BE_RUN_PLACEHOLDER %t.out | FileCheck %s 2>&1
55

66
// It looks like order of events diffres on Windows
77
#ifdef XPTI_COLLECTOR

SYCL/XPTI/buffer/multiple_buffers.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// REQUIRES: xptifw, opencl
22
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
33
// RUN: %clangxx -fsycl %s -o %t.out
4-
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll SYCL_DEVICE_FILTER=opencl %t.out | FileCheck %s 2>&1
4+
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll %BE_RUN_PLACEHOLDER %t.out | FileCheck %s 2>&1
55

66
#ifdef XPTI_COLLECTOR
77

SYCL/XPTI/buffer/multiple_queues.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// REQUIRES: xptifw, opencl, (cpu || acc)
22
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
33
// RUN: %clangxx -fsycl %s -o %t.out
4-
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll SYCL_DEVICE_FILTER=opencl %t.out | FileCheck %s 2>&1
4+
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll %BE_RUN_PLACEHOLDER %t.out | FileCheck %s 2>&1
55

66
#ifdef XPTI_COLLECTOR
77

SYCL/XPTI/buffer/recursion.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// REQUIRES: xptifw, opencl
22
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
33
// RUN: %clangxx -fsycl %s -o %t.out
4-
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll SYCL_DEVICE_FILTER=opencl %t.out | FileCheck %s 2>&1
4+
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll %BE_RUN_PLACEHOLDER %t.out | FileCheck %s 2>&1
55

66
// It looks like order of events diffres on Windows
77
#ifdef XPTI_COLLECTOR

SYCL/XPTI/buffer/sub_buffer.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// REQUIRES: xptifw, opencl
22
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
33
// RUN: %clangxx -fsycl %s -o %t.out
4-
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll SYCL_DEVICE_FILTER=opencl %t.out | FileCheck %s 2>&1
4+
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll %BE_RUN_PLACEHOLDER %t.out | FileCheck %s 2>&1
55

66
#ifdef XPTI_COLLECTOR
77

SYCL/XPTI/buffer/use_host_ptr.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// REQUIRES: xptifw, opencl
22
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
33
// RUN: %clangxx -fsycl %s -o %t.out
4-
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll SYCL_DEVICE_FILTER=opencl %t.out | FileCheck %s 2>&1
4+
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll %BE_RUN_PLACEHOLDER %t.out | FileCheck %s 2>&1
55

66
#ifdef XPTI_COLLECTOR
77

SYCL/XPTI/kernel/basic.cpp

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// REQUIRES: xptifw, opencl
2+
// RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib %shared_lib %fPIC %cxx_std_optionc++17 -o %t_collector.dll
3+
// RUN: %clangxx -fsycl %s -o %t.out
4+
// RUN: env XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher XPTI_SUBSCRIBERS=%t_collector.dll %BE_RUN_PLACEHOLDER %t.out | FileCheck %s 2>&1
5+
6+
#ifdef XPTI_COLLECTOR
7+
8+
#include "../Inputs/buffer_info_collector.cpp"
9+
10+
#else
11+
12+
#include <sycl/sycl.hpp>
13+
14+
using namespace sycl::access;
15+
constexpr sycl::specialization_id<int> int_id(42);
16+
17+
class Functor1 {
18+
public:
19+
Functor1(short X_,
20+
cl::sycl::accessor<int, 1, mode::read_write, target::device> &Acc_)
21+
: X(X_), Acc(Acc_) {}
22+
23+
void operator()() const { Acc[0] += X; }
24+
25+
private:
26+
short X;
27+
cl::sycl::accessor<int, 1, mode::read_write, target::device> Acc;
28+
};
29+
30+
class Functor2 {
31+
public:
32+
Functor2(short X_,
33+
cl::sycl::accessor<int, 1, mode::read_write, target::device> &Acc_)
34+
: X(X_), Acc(Acc_) {}
35+
36+
void operator()(sycl::id<1> id = 0) const { Acc[id] += X; }
37+
38+
private:
39+
short X;
40+
cl::sycl::accessor<int, 1, mode::read_write, target::device> Acc;
41+
};
42+
43+
int main() {
44+
bool MismatchFound = false;
45+
sycl::queue Queue{};
46+
47+
// CHECK:{{[0-9]+}}|Create buffer|[[BUFFERID:[0-9,a-f,x]+]]|0x0|{{i(nt)*}}|4|1|{5,0,0}|{{.*}}.cpp:[[# @LINE + 1]]:24
48+
sycl::buffer<int, 1> Buf(5);
49+
sycl::range<1> Range{Buf.size()};
50+
short Val = Buf.size();
51+
auto PtrDevice = sycl::malloc_device<int>(7, Queue);
52+
auto PtrShared = sycl::malloc_shared<int>(8, Queue);
53+
Queue
54+
.submit([&](sycl::handler &cgh) {
55+
// CHECK: {{[0-9]+}}|Construct accessor|[[BUFFERID]]|[[ACCID1:.+]]|2014|1026|{{.*}}.cpp:[[# @LINE + 1]]:19
56+
auto A1 = Buf.get_access<mode::read_write>(cgh);
57+
// CHECK: {{[0-9]+}}|Construct accessor|0x0|[[ACCID2:.*]]|2016|1026|{{.*}}.cpp:[[# @LINE + 1]]:65
58+
sycl::accessor<int, 1, mode::read_write, target::local> A2(Range, cgh);
59+
// CHECK:Node create|{{.*}}FillBuffer{{.*}}|{{.*}}.cpp:[[# @LINE - 6 ]]:3|{5, 1, 1}, {0, 0, 0}, {0, 0, 0}, 6
60+
cgh.parallel_for<class FillBuffer>(
61+
Range, [=](sycl::id<1> WIid, sycl::kernel_handler kh) {
62+
// CHECK: arg0 : {1, {{[0-9,a-f,x]+}}, 2, 0}
63+
int h = Val;
64+
// CHECK: arg1 : {1, {{.*}}0, 20, 1}
65+
A2[WIid[0]] = h;
66+
// CHECK: arg2 : {0, [[ACCID1]], 4062, 2}
67+
// CHECK: arg3 : {1, [[ACCID1]], 8, 3}
68+
A1[WIid[0]] = A2[WIid[0]];
69+
// CHECK: arg4 : {3, {{.*}}, 8, 4}
70+
PtrDevice[WIid[0]] = WIid[0];
71+
// CHECK: arg5 : {3, {{.*}}, 8, 5}
72+
PtrShared[WIid[0]] = PtrDevice[WIid[0]];
73+
});
74+
})
75+
.wait();
76+
77+
// CHECK: {{[0-9]+}}|Construct accessor|[[BUFFERID]]|[[ACCID3:.*]]|2018|1024|{{.*}}.cpp:[[# @LINE + 1]]:15
78+
{ auto HA = Buf.get_access<mode::read>(); }
79+
80+
Queue.submit([&](cl::sycl::handler &cgh) {
81+
// CHECK: {{[0-9]+}}|Construct accessor|[[BUFFERID]]|[[ACCID4:.+]]|2014|1026|{{.*}}.cpp:[[# @LINE + 1]]:16
82+
auto Acc = Buf.get_access<mode::read_write>(cgh);
83+
Functor1 F(Val, Acc);
84+
// CHECK: Node create|{{.*}}Functor1|{{.*}}.cpp:[[# @LINE - 4 ]]:3|{1, 1, 1}, {0, 0, 0}, {0, 0, 0}, 3
85+
cgh.single_task(F);
86+
// CHECK: arg0 : {1, {{[0-9,a-f,x]+}}, 2, 0}
87+
// CHECK: arg1 : {0, [[ACCID4]], 4062, 1}
88+
// CHECK: arg2 : {1, [[ACCID4]], 8, 2}
89+
});
90+
91+
Queue.submit([&](cl::sycl::handler &cgh) {
92+
// CHECK: {{[0-9]+}}|Construct accessor|[[BUFFERID]]|[[ACCID5:.+]]|2014|1026|{{.*}}.cpp:[[# @LINE + 1]]:16
93+
auto Acc = Buf.get_access<mode::read_write>(cgh);
94+
Functor2 F(Val, Acc);
95+
// CHECK: Node create|{{.*}}Functor2|{{.*}}.cpp:[[# @LINE - 4 ]]:3|{5, 1, 1}, {0, 0, 0}, {0, 0, 0}, 3
96+
cgh.parallel_for(Range, F);
97+
// CHECK: arg0 : {1, {{[0-9,a-f,x]+}}, 2, 0}
98+
// CHECK: arg1 : {0, [[ACCID5]], 4062, 1}
99+
// CHECK: arg2 : {1, [[ACCID5]], 8, 2}
100+
});
101+
102+
return 0;
103+
}
104+
#endif

0 commit comments

Comments
 (0)