Skip to content

[SYCL][Level Zero] Implement sycl_ext_intel_queue_index extension #7599

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 31 commits into from
Dec 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
3e0fe6a
WIP [SYCL][PI] Add PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES
aelovikov-intel Nov 28, 2022
f642f25
WIP Add SYCL-level interface
aelovikov-intel Nov 28, 2022
fedf467
WIP Add property, not passed to the plugin yet
aelovikov-intel Nov 28, 2022
f1ff38b
WIP Define feature macro
aelovikov-intel Nov 28, 2022
a34fc24
WIP Introduce piQueueCreateEx
aelovikov-intel Nov 29, 2022
00fa9bf
WIP Change SYCL RT to use piQueueCreateEx
aelovikov-intel Nov 29, 2022
971ef1a
WIP Final piece?
aelovikov-intel Nov 30, 2022
19ddea0
Fix typo
aelovikov-intel Nov 30, 2022
3d6892b
Add PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES case for ESIMD…
aelovikov-intel Nov 30, 2022
97337cb
Merge remote-tracking branch 'origin/sycl' into queue-index
aelovikov-intel Nov 30, 2022
ebb1f30
Update TODO comment
aelovikov-intel Nov 30, 2022
48c8b81
clang-format
aelovikov-intel Nov 30, 2022
ae6d72a
Fix CUDA/HIP build
aelovikov-intel Nov 30, 2022
0acba89
Same for ESIMD_EMULATOR
aelovikov-intel Nov 30, 2022
b3c49ff
Update Linux symbols
aelovikov-intel Dec 1, 2022
0f36b15
Windows symbols
aelovikov-intel Dec 1, 2022
cc76d2c
Merge remote-tracking branch 'origin/sycl' into queue-index
aelovikov-intel Dec 1, 2022
ca1eea8
Add sycl/unittests/queue/InOrderQueue.cpp
aelovikov-intel Dec 1, 2022
1e199b3
Merge remote-tracking branch 'origin/sycl' into queue-index
aelovikov-intel Dec 1, 2022
fb9841b
Add missing PI entry for opencl plugin
aelovikov-intel Dec 1, 2022
312388e
clang-format
aelovikov-intel Dec 1, 2022
98560e0
Merge remote-tracking branch 'origin/sycl' into queue-index
aelovikov-intel Dec 2, 2022
930fe25
Merge remote-tracking branch 'origin/sycl' into queue-index
aelovikov-intel Dec 5, 2022
fb22a45
Bump _PI_H_VERSION_MINOR
aelovikov-intel Dec 6, 2022
a9fffdd
Merge remote-tracking branch 'origin/sycl' into queue-index
aelovikov-intel Dec 9, 2022
9892278
Move the extension to "supported"
aelovikov-intel Dec 9, 2022
7cfb199
Rename piQueueCreateEx -> piextQueueCreate
aelovikov-intel Dec 11, 2022
622c991
Rename PI QUEUE flags
aelovikov-intel Dec 11, 2022
423a8f2
Extra doc comments
aelovikov-intel Dec 11, 2022
ca9de54
Merge remote-tracking branch 'origin/sycl' into queue-index
aelovikov-intel Dec 11, 2022
7522fa9
Merge remote-tracking branch 'origin/sycl' into queue-index
aelovikov-intel Dec 12, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,7 @@ SYCL specification refer to that revision.

== Status

This is a proposed extension specification, intended to gather community
feedback. Interfaces defined in this specification may not be implemented yet
or may be in a preliminary state. The specification itself may also change in
incompatible ways before it is finalized. *Shipping software products should
not rely on APIs defined in this specification.*
This extension is implemented and fully supported by DPC++.


== Overview
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/detail/pi.def
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ _PI_API(piextContextGetNativeHandle)
_PI_API(piextContextCreateWithNativeHandle)
// Queue
_PI_API(piQueueCreate)
_PI_API(piextQueueCreate)
_PI_API(piQueueGetInfo)
_PI_API(piQueueFinish)
_PI_API(piQueueFlush)
Expand Down
35 changes: 27 additions & 8 deletions sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,12 @@
// partitioning by affinity domain is disabled by default and can be temporarily
// restored via SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING
// environment variable.
// 12.20 Added piextQueueCreate API to be used instead of piQueueCreate, also
// added PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES for piDeviceGetInfo.
// Both are needed to support sycl_ext_intel_queue_index extension.

#define _PI_H_VERSION_MAJOR 12
#define _PI_H_VERSION_MINOR 19
#define _PI_H_VERSION_MINOR 20

#define _PI_STRING_HELPER(a) #a
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
Expand Down Expand Up @@ -295,6 +298,9 @@ typedef enum {
// Return 0 if device doesn't have any memory modules. Return the minimum of
// the bus width values if there are several memory modules on the device.
PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH = 0x10031,
// Return 1 if the device doesn't have a notion of a "queue index". Otherwise,
// return the number of queue indices that are available for this device.
PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES = 0x10032,
PI_DEVICE_INFO_ATOMIC_64 = 0x10110,
PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111,
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000,
Expand Down Expand Up @@ -587,13 +593,17 @@ constexpr pi_usm_mem_properties PI_MEM_USM_ALLOC_BUFFER_LOCATION = 0x419E;
// NOTE: queue properties are implemented this way to better support bit
// manipulations
using pi_queue_properties = pi_bitfield;
constexpr pi_queue_properties PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE = (1 << 0);
constexpr pi_queue_properties PI_QUEUE_PROFILING_ENABLE = (1 << 1);
constexpr pi_queue_properties PI_QUEUE_ON_DEVICE = (1 << 2);
constexpr pi_queue_properties PI_QUEUE_ON_DEVICE_DEFAULT = (1 << 3);
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS = (1 << 4);
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_PRIORITY_LOW = (1 << 5);
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_PRIORITY_HIGH = (1 << 6);
constexpr pi_queue_properties PI_QUEUE_FLAGS = -1;
constexpr pi_queue_properties PI_QUEUE_COMPUTE_INDEX = -2;
// clang-format off
constexpr pi_queue_properties PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE = (1 << 0);
constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE = (1 << 1);
constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE = (1 << 2);
constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE_DEFAULT = (1 << 3);
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS = (1 << 4);
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW = (1 << 5);
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH = (1 << 6);
// clang-format on

using pi_result = _pi_result;
using pi_platform_info = _pi_platform_info;
Expand Down Expand Up @@ -1125,9 +1135,18 @@ __SYCL_EXPORT pi_result piextContextCreateWithNativeHandle(
//
// Queue
//

// TODO: Remove during next ABI break and rename piextQueueCreate to
// piQueueCreate.
__SYCL_EXPORT pi_result piQueueCreate(pi_context context, pi_device device,
pi_queue_properties properties,
pi_queue *queue);
/// \param properties points to a zero-terminated array of extra data describing
/// desired queue properties. Format is
/// {[PROPERTY[, property-specific elements of data]*,]* 0}
__SYCL_EXPORT pi_result piextQueueCreate(pi_context context, pi_device device,
pi_queue_properties *properties,
pi_queue *queue);

__SYCL_EXPORT pi_result piQueueGetInfo(pi_queue command_queue,
pi_queue_info param_name,
Expand Down
3 changes: 2 additions & 1 deletion sycl/include/sycl/detail/property_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ enum PropWithDataKind {
ImageContextBound = 3,
BufferMemChannel = 4,
AccPropBufferLocation = 5,
PropWithDataKindSize = 6,
QueueComputeIndex = 6,
PropWithDataKindSize = 7,
};

// Base class for dataless properties, needed to check that the type of an
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/feature_test.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {
#define SYCL_EXT_INTEL_MEM_CHANNEL_PROPERTY 1
#define SYCL_EXT_INTEL_USM_ADDRESS_SPACES 1
#define SYCL_EXT_INTEL_RUNTIME_BUFFER_LOCATION 1
#define SYCL_EXT_INTEL_QUEUE_INDEX 1
#define SYCL_EXT_ONEAPI_BACKEND_LEVEL_ZERO 3
#define SYCL_EXT_ONEAPI_USM_DEVICE_READ_ONLY 1
#define SYCL_EXT_ONEAPI_KERNEL_PROPERTIES 1
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/info/ext_intel_device_traits.def
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ __SYCL_PARAM_TRAITS_SPEC(ext::intel, device, uuid, detail::uuid_type, PI_DEVICE_
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, free_memory, pi_uint64, PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY)
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_clock_rate, pi_uint32, PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE)
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_bus_width, pi_uint32, PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH)
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, max_compute_queue_indices, pi_int32, PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES)
#ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF
Expand Down
21 changes: 21 additions & 0 deletions sycl/include/sycl/properties/queue_properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,24 @@ class use_default_stream
// clang-format on
} // namespace property::queue

namespace ext {
namespace intel {
namespace property {
namespace queue {
class compute_index : public sycl::detail::PropertyWithData<
sycl::detail::PropWithDataKind::QueueComputeIndex> {
public:
compute_index(int idx) : idx(idx) {}
int get_index() { return idx; }

private:
int idx;
};
} // namespace queue
} // namespace property
} // namespace intel
} // namespace ext

// Forward declaration
class queue;

Expand Down Expand Up @@ -78,6 +96,9 @@ struct is_property_of<property::queue::cuda::use_default_stream, queue>
template <>
struct is_property_of<ext::oneapi::cuda::property::queue::use_default_stream,
queue> : std::true_type {};
template <>
struct is_property_of<ext::intel::property::queue::compute_index, queue>
: std::true_type {};

} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
33 changes: 25 additions & 8 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ _pi_event::_pi_event(pi_command_type type, pi_context context, pi_queue queue,
streamToken_{stream_token}, evEnd_{nullptr}, evStart_{nullptr},
evQueued_{nullptr}, queue_{queue}, stream_{stream}, context_{context} {

bool profilingEnabled = queue_->properties_ & PI_QUEUE_PROFILING_ENABLE;
bool profilingEnabled = queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE;

PI_CHECK_ERROR(cuEventCreate(
&evEnd_, profilingEnabled ? CU_EVENT_DEFAULT : CU_EVENT_DISABLE_TIMING));
Expand Down Expand Up @@ -526,7 +526,7 @@ pi_result _pi_event::start() {
pi_result result = PI_SUCCESS;

try {
if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) {
if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) {
// NOTE: This relies on the default stream to be unused.
result = PI_CHECK_ERROR(cuEventRecord(evQueued_, 0));
result = PI_CHECK_ERROR(cuEventRecord(evStart_, stream_));
Expand Down Expand Up @@ -633,7 +633,7 @@ pi_result _pi_event::release() {

PI_CHECK_ERROR(cuEventDestroy(evEnd_));

if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) {
if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) {
PI_CHECK_ERROR(cuEventDestroy(evQueued_));
PI_CHECK_ERROR(cuEventDestroy(evStart_));
}
Expand Down Expand Up @@ -1681,14 +1681,14 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
}
case PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: {
// The mandated minimum capability:
auto capability =
PI_QUEUE_PROFILING_ENABLE | PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
auto capability = PI_QUEUE_FLAG_PROFILING_ENABLE |
PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
return getInfo(param_value_size, param_value, param_value_size_ret,
capability);
}
case PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: {
// The mandated minimum capability:
auto capability = PI_QUEUE_PROFILING_ENABLE;
auto capability = PI_QUEUE_FLAG_PROFILING_ENABLE;
return getInfo(param_value_size, param_value, param_value_size_ret,
capability);
}
Expand Down Expand Up @@ -1945,6 +1945,10 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
sycl::detail::pi::assertion(value >= 0);
return getInfo(param_value_size, param_value, param_value_size_ret, value);
}
case PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: {
return getInfo(param_value_size, param_value, param_value_size_ret,
pi_int32{1});
}

// TODO: Investigate if this information is available on CUDA.
case PI_DEVICE_INFO_DEVICE_ID:
Expand Down Expand Up @@ -2501,7 +2505,7 @@ pi_result cuda_piQueueCreate(pi_context context, pi_device device,
}

const bool is_out_of_order =
properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
properties & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;

std::vector<CUstream> computeCuStreams(
is_out_of_order ? _pi_queue::default_num_compute_streams : 1);
Expand All @@ -2524,6 +2528,17 @@ pi_result cuda_piQueueCreate(pi_context context, pi_device device,
return PI_ERROR_OUT_OF_RESOURCES;
}
}
pi_result cuda_piextQueueCreate(pi_context Context, pi_device Device,
pi_queue_properties *Properties,
pi_queue *Queue) {
assert(Properties);
// Expect flags mask to be passed first.
assert(Properties[0] == PI_QUEUE_FLAGS);
pi_queue_properties Flags = Properties[1];
// Extra data isn't supported yet.
assert(Properties[2] == 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct me if I'm wrong, but this assert can be triggered by the user. Would it be better to return PI_ERROR_INVALID_OPERATION instead to make it a recoverable error rather than a failed assert? Note that the call should be changed to call rather than call_nocheck.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discussed offline. This will be done in a follow-up patch.

return cuda_piQueueCreate(Context, Device, Flags, Queue);
}

pi_result cuda_piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name,
size_t param_value_size, void *param_value,
Expand Down Expand Up @@ -3849,7 +3864,8 @@ pi_result cuda_piEventGetProfilingInfo(pi_event event,
assert(event != nullptr);

pi_queue queue = event->get_queue();
if (queue == nullptr || !(queue->properties_ & PI_QUEUE_PROFILING_ENABLE)) {
if (queue == nullptr ||
!(queue->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE)) {
return PI_ERROR_PROFILING_INFO_NOT_AVAILABLE;
}

Expand Down Expand Up @@ -5473,6 +5489,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
cuda_piextContextCreateWithNativeHandle)
// Queue
_PI_CL(piQueueCreate, cuda_piQueueCreate)
_PI_CL(piextQueueCreate, cuda_piextQueueCreate)
_PI_CL(piQueueGetInfo, cuda_piQueueGetInfo)
_PI_CL(piQueueFinish, cuda_piQueueFinish)
_PI_CL(piQueueFlush, cuda_piQueueFlush)
Expand Down
16 changes: 14 additions & 2 deletions sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
case PI_DEVICE_INFO_OPENCL_C_VERSION:
return ReturnValue("");
case PI_DEVICE_INFO_QUEUE_PROPERTIES:
return ReturnValue(pi_queue_properties{PI_QUEUE_ON_DEVICE});
return ReturnValue(pi_queue_properties{PI_QUEUE_FLAG_ON_DEVICE});
case PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES: {
struct {
size_t Arr[3];
Expand Down Expand Up @@ -785,6 +785,8 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
return ReturnValue(pi_uint32{0});
case PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL:
return ReturnValue(size_t{1});
case PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES:
return ReturnValue(pi_int32{1});

CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS)
CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS)
Expand Down Expand Up @@ -923,11 +925,21 @@ bool _pi_context::checkSurfaceArgument(pi_mem_flags Flags, void *HostPtr) {
return true;
}

pi_result piextQueueCreate(pi_context Context, pi_device Device,
pi_queue_properties *Properties, pi_queue *Queue) {
assert(Properties);
// Expect flags mask to be passed first.
assert(Properties[0] == PI_QUEUE_FLAGS);
pi_queue_properties Flags = Properties[1];
// Extra data isn't supported yet.
assert(Properties[2] == 0);
return piQueueCreate(Context, Device, Flags, Queue);
}
pi_result piQueueCreate(pi_context Context, pi_device Device,
pi_queue_properties Properties, pi_queue *Queue) {
ARG_UNUSED(Device);

if (Properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) {
if (Properties & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) {
// TODO : Support Out-of-order Queue
*Queue = nullptr;
return PI_ERROR_INVALID_QUEUE_PROPERTIES;
Expand Down
33 changes: 25 additions & 8 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ _pi_event::_pi_event(pi_command_type type, pi_context context, pi_queue queue,

assert(type != PI_COMMAND_TYPE_USER);

bool profilingEnabled = queue_->properties_ & PI_QUEUE_PROFILING_ENABLE;
bool profilingEnabled = queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE;

PI_CHECK_ERROR(hipEventCreateWithFlags(
&evEnd_, profilingEnabled ? hipEventDefault : hipEventDisableTiming));
Expand Down Expand Up @@ -562,7 +562,7 @@ pi_result _pi_event::start() {
pi_result result = PI_SUCCESS;

try {
if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) {
if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) {
// NOTE: This relies on the default stream to be unused.
PI_CHECK_ERROR(hipEventRecord(evQueued_, 0));
PI_CHECK_ERROR(hipEventRecord(evStart_, queue_->get()));
Expand Down Expand Up @@ -663,7 +663,7 @@ pi_result _pi_event::release() {
assert(queue_ != nullptr);
PI_CHECK_ERROR(hipEventDestroy(evEnd_));

if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) {
if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) {
PI_CHECK_ERROR(hipEventDestroy(evQueued_));
PI_CHECK_ERROR(hipEventDestroy(evStart_));
}
Expand Down Expand Up @@ -1588,14 +1588,14 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
}
case PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: {
// The mandated minimum capability:
auto capability =
PI_QUEUE_PROFILING_ENABLE | PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
auto capability = PI_QUEUE_FLAG_PROFILING_ENABLE |
PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
return getInfo(param_value_size, param_value, param_value_size_ret,
capability);
}
case PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: {
// The mandated minimum capability:
auto capability = PI_QUEUE_PROFILING_ENABLE;
auto capability = PI_QUEUE_FLAG_PROFILING_ENABLE;
return getInfo(param_value_size, param_value, param_value_size_ret,
capability);
}
Expand Down Expand Up @@ -1841,6 +1841,10 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
sycl::detail::pi::assertion(value >= 0);
return getInfo(param_value_size, param_value, param_value_size_ret, value);
}
case PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: {
return getInfo(param_value_size, param_value, param_value_size_ret,
pi_int32{1});
}

// TODO: Implement.
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES:
Expand Down Expand Up @@ -2378,7 +2382,7 @@ pi_result hip_piQueueCreate(pi_context context, pi_device device,
unsigned int flags = 0;

const bool is_out_of_order =
properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
properties & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;

std::vector<hipStream_t> computeHipStreams(
is_out_of_order ? _pi_queue::default_num_compute_streams : 1);
Expand All @@ -2401,6 +2405,17 @@ pi_result hip_piQueueCreate(pi_context context, pi_device device,
return PI_ERROR_OUT_OF_RESOURCES;
}
}
pi_result hip_piextQueueCreate(pi_context Context, pi_device Device,
pi_queue_properties *Properties,
pi_queue *Queue) {
assert(Properties);
// Expect flags mask to be passed first.
assert(Properties[0] == PI_QUEUE_FLAGS);
pi_queue_properties Flags = Properties[1];
// Extra data isn't supported yet.
assert(Properties[2] == 0);
return hip_piQueueCreate(Context, Device, Flags, Queue);
}

pi_result hip_piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name,
size_t param_value_size, void *param_value,
Expand Down Expand Up @@ -3674,7 +3689,8 @@ pi_result hip_piEventGetProfilingInfo(pi_event event,
assert(event != nullptr);

pi_queue queue = event->get_queue();
if (queue == nullptr || !(queue->properties_ & PI_QUEUE_PROFILING_ENABLE)) {
if (queue == nullptr ||
!(queue->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE)) {
return PI_ERROR_PROFILING_INFO_NOT_AVAILABLE;
}

Expand Down Expand Up @@ -5201,6 +5217,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
hip_piextContextCreateWithNativeHandle)
// Queue
_PI_CL(piQueueCreate, hip_piQueueCreate)
_PI_CL(piextQueueCreate, hip_piextQueueCreate)
_PI_CL(piQueueGetInfo, hip_piQueueGetInfo)
_PI_CL(piQueueFinish, hip_piQueueFinish)
_PI_CL(piQueueFlush, hip_piQueueFlush)
Expand Down
Loading