Skip to content

[L0] Support updating kernel commands in command buffers #1353

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
459 changes: 406 additions & 53 deletions source/adapters/level_zero/command_buffer.cpp

Large diffs are not rendered by default.

17 changes: 17 additions & 0 deletions source/adapters/level_zero/command_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,21 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
// Event which a command-buffer waits on until the main command-list event
// have been reset.
ur_event_handle_t AllResetEvent = nullptr;
// Indicates if command-buffer commands can be updated after it is closed.
bool IsUpdatable = false;
// Indicates if command buffer was finalized.
bool IsFinalized = false;
};

struct ur_exp_command_buffer_command_handle_t_ : public _ur_object {
ur_exp_command_buffer_command_handle_t_(ur_exp_command_buffer_handle_t,
uint64_t, ur_kernel_handle_t);

~ur_exp_command_buffer_command_handle_t_();

// Command-buffer of this command.
ur_exp_command_buffer_handle_t CommandBuffer;

uint64_t CommandId;
ur_kernel_handle_t Kernel;
};
36 changes: 36 additions & 0 deletions source/adapters/level_zero/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ ur_result_t ze2urResult(ze_result_t ZeResult) {
return UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
case ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY:
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
case ZE_RESULT_ERROR_UNSUPPORTED_FEATURE:
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
default:
return UR_RESULT_ERROR_UNKNOWN;
}
Expand Down Expand Up @@ -171,6 +173,40 @@ template <> ze_structure_type_t getZeStructureType<ze_fence_desc_t>() {
template <> ze_structure_type_t getZeStructureType<ze_command_list_desc_t>() {
return ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
}
template <>
ze_structure_type_t
getZeStructureType<ze_mutable_command_list_exp_properties_t>() {
return ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_LIST_EXP_PROPERTIES;
}
template <>
ze_structure_type_t getZeStructureType<ze_mutable_command_list_exp_desc_t>() {
return ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_LIST_EXP_DESC;
}
template <>
ze_structure_type_t getZeStructureType<ze_mutable_command_id_exp_desc_t>() {
return ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_ID_EXP_DESC;
}
template <>
ze_structure_type_t getZeStructureType<ze_mutable_group_count_exp_desc_t>() {
return ZE_STRUCTURE_TYPE_MUTABLE_GROUP_COUNT_EXP_DESC;
}
template <>
ze_structure_type_t getZeStructureType<ze_mutable_group_size_exp_desc_t>() {
return ZE_STRUCTURE_TYPE_MUTABLE_GROUP_SIZE_EXP_DESC;
}
template <>
ze_structure_type_t getZeStructureType<ze_mutable_global_offset_exp_desc_t>() {
return ZE_STRUCTURE_TYPE_MUTABLE_GLOBAL_OFFSET_EXP_DESC;
}
template <>
ze_structure_type_t
getZeStructureType<ze_mutable_kernel_argument_exp_desc_t>() {
return ZE_STRUCTURE_TYPE_MUTABLE_KERNEL_ARGUMENT_EXP_DESC;
}
template <>
ze_structure_type_t getZeStructureType<ze_mutable_commands_exp_desc_t>() {
return ZE_STRUCTURE_TYPE_MUTABLE_COMMANDS_EXP_DESC;
}
template <> ze_structure_type_t getZeStructureType<ze_context_desc_t>() {
return ZE_STRUCTURE_TYPE_CONTEXT_DESC;
}
Expand Down
27 changes: 25 additions & 2 deletions source/adapters/level_zero/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -917,8 +917,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
}
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
return ReturnValue(true);
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP:
return ReturnValue(false);
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: {
// TODO: Level Zero API allows to check support for all sub-features:
// ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS,
// ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT,
// ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE,
// ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET,
// ZE_MUTABLE_COMMAND_EXP_FLAG_SIGNAL_EVENT,
// ZE_MUTABLE_COMMAND_EXP_FLAG_WAIT_EVENTS
// but UR has only one property to check the mutable command lists feature
// support. For now return true if kernel arguments can be updated.
auto KernelArgUpdateSupport =
Device->ZeDeviceMutableCmdListsProperties->mutableCommandFlags &
ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS;
return ReturnValue(KernelArgUpdateSupport &&
Device->Platform->ZeMutableCmdListExt.Supported);
}
case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP:
return ReturnValue(true);
case UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP:
Expand Down Expand Up @@ -1142,6 +1156,15 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal,
(ZeDevice, &Count, &Properties));
};

ZeDeviceMutableCmdListsProperties.Compute =
[ZeDevice](
ZeStruct<ze_mutable_command_list_exp_properties_t> &Properties) {
ze_device_properties_t P;
P.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
P.pNext = &Properties;
ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &P));
};

ImmCommandListUsed = this->useImmediateCommandLists();

uint32_t numQueueGroups = 0;
Expand Down
2 changes: 2 additions & 0 deletions source/adapters/level_zero/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,4 +195,6 @@ struct ur_device_handle_t_ : _ur_object {
ZeCache<ZeStruct<ze_device_cache_properties_t>> ZeDeviceCacheProperties;
ZeCache<ZeStruct<ze_device_ip_version_ext_t>> ZeDeviceIpVersionExt;
ZeCache<struct ze_global_memsize> ZeGlobalMemSize;
ZeCache<ZeStruct<ze_mutable_command_list_exp_properties_t>>
ZeDeviceMutableCmdListsProperties;
};
33 changes: 33 additions & 0 deletions source/adapters/level_zero/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,39 @@ ur_result_t ur_platform_handle_t_::initialize() {
// If yes, then set up L0 API pointers if the platform supports it.
ZeUSMImport.setZeUSMImport(this);

// Check if mutable command list extension is supported and initialize
// function pointers.
ZeMutableCmdListExt.Supported |=
(ZE_CALL_NOCHECK(
zeDriverGetExtensionFunctionAddress,
(ZeDriver, "zeCommandListGetNextCommandIdExp",
reinterpret_cast<void **>(
&ZeMutableCmdListExt.zexCommandListGetNextCommandIdExp))) == 0);

ZeMutableCmdListExt.Supported &=
(ZE_CALL_NOCHECK(zeDriverGetExtensionFunctionAddress,
(ZeDriver, "zeCommandListUpdateMutableCommandsExp",
reinterpret_cast<void **>(
&ZeMutableCmdListExt
.zexCommandListUpdateMutableCommandsExp))) ==
0);

ZeMutableCmdListExt.Supported &=
(ZE_CALL_NOCHECK(
zeDriverGetExtensionFunctionAddress,
(ZeDriver, "zeCommandListUpdateMutableCommandSignalEventExp",
reinterpret_cast<void **>(
&ZeMutableCmdListExt
.zexCommandListUpdateMutableCommandSignalEventExp))) == 0);

ZeMutableCmdListExt.Supported &=
(ZE_CALL_NOCHECK(
zeDriverGetExtensionFunctionAddress,
(ZeDriver, "zeCommandListUpdateMutableCommandWaitEventsExp",
reinterpret_cast<void **>(
&ZeMutableCmdListExt
.zexCommandListUpdateMutableCommandWaitEventsExp))) == 0);

return UR_RESULT_SUCCESS;
}

Expand Down
18 changes: 18 additions & 0 deletions source/adapters/level_zero/platform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,22 @@ struct ur_platform_handle_t_ : public _ur_platform {
// in the driver.
std::list<ur_context_handle_t> Contexts;
ur_shared_mutex ContextsMutex;

// Structure with function pointers for mutable command list extension.
// Not all drivers may support it, so considering that the platform object is
// associated with particular Level Zero driver, store this extension here.
struct ZeMutableCmdListExtension {
bool Supported = false;
ze_result_t (*zexCommandListGetNextCommandIdExp)(
ze_command_list_handle_t, const ze_mutable_command_id_exp_desc_t *,
uint64_t *) = nullptr;
ze_result_t (*zexCommandListUpdateMutableCommandsExp)(
ze_command_list_handle_t,
const ze_mutable_commands_exp_desc_t *) = nullptr;
ze_result_t (*zexCommandListUpdateMutableCommandSignalEventExp)(
ze_command_list_handle_t, uint64_t, ze_event_handle_t) = nullptr;
ze_result_t (*zexCommandListUpdateMutableCommandWaitEventsExp)(
ze_command_list_handle_t, uint64_t, uint32_t,
ze_event_handle_t *) = nullptr;
} ZeMutableCmdListExt;
};
50 changes: 29 additions & 21 deletions test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ TEST_P(BufferFillCommandTest, UpdateParameters) {

// Test updating the global size so that the fill outputs to a larger buffer
TEST_P(BufferFillCommandTest, UpdateGlobalSize) {
if (!updatable_execution_range_support) {
GTEST_SKIP() << "Execution range update is not supported.";
}

ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0,
nullptr, nullptr));
ASSERT_SUCCESS(urQueueFinish(queue));
Expand Down Expand Up @@ -153,7 +157,7 @@ TEST_P(BufferFillCommandTest, UpdateGlobalSize) {
0, // numNewPointerArgs
0, // numNewValueArgs
0, // numNewExecInfos
0, // newWorkDim
1, // newWorkDim
&new_output_desc, // pNewMemObjArgList
nullptr, // pNewPointerArgList
nullptr, // pNewValueArgList
Expand All @@ -180,7 +184,8 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) {
ASSERT_SUCCESS(urQueueFinish(queue));
ValidateBuffer(buffer, sizeof(val) * global_size, val);

size_t new_global_size = 64;
size_t new_global_size =
updatable_execution_range_support ? 64 : global_size;
const size_t new_buffer_size = sizeof(val) * new_global_size;
ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE,
new_buffer_size, nullptr, &new_buffer));
Expand Down Expand Up @@ -247,25 +252,28 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) {
ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle,
&input_update_desc));

ur_exp_command_buffer_update_kernel_launch_desc_t global_size_update_desc = {
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype
nullptr, // pNext
0, // numNewMemObjArgs
0, // numNewPointerArgs
0, // numNewValueArgs
0, // numNewExecInfos
0, // newWorkDim
nullptr, // pNewMemObjArgList
nullptr, // pNewPointerArgList
nullptr, // pNewValueArgList
nullptr, // pNewExecInfoList
nullptr, // pNewGlobalWorkOffset
&new_global_size, // pNewGlobalWorkSize
nullptr, // pNewLocalWorkSize
};

ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(
command_handle, &global_size_update_desc));
if (updatable_execution_range_support) {
ur_exp_command_buffer_update_kernel_launch_desc_t
global_size_update_desc = {
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype
nullptr, // pNext
0, // numNewMemObjArgs
0, // numNewPointerArgs
0, // numNewValueArgs
0, // numNewExecInfos
0, // newWorkDim
nullptr, // pNewMemObjArgList
nullptr, // pNewPointerArgList
nullptr, // pNewValueArgList
nullptr, // pNewExecInfoList
nullptr, // pNewGlobalWorkOffset
&new_global_size, // pNewGlobalWorkSize
nullptr, // pNewLocalWorkSize
};

ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(
command_handle, &global_size_update_desc));
}

ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0,
nullptr, nullptr));
Expand Down
26 changes: 26 additions & 0 deletions test/conformance/exp_command_buffer/fixtures.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,24 +112,50 @@ struct urUpdatableCommandBufferExpExecutionTest
GTEST_SKIP() << "Updating EXP command-buffers is not supported.";
}

// Currently level zero driver doesn't support updating execution range.
if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) {
updatable_execution_range_support = false;
}

// Create a command-buffer with update enabled.
ur_exp_command_buffer_desc_t desc{
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true};

ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc,
&updatable_cmd_buf_handle));
ASSERT_NE(updatable_cmd_buf_handle, nullptr);

// Currently there are synchronization issue with immediate submission when used for command buffers.
// So, create queue with batched submission for this test suite if the backend is Level Zero.
if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) {
ur_queue_flags_t flags = UR_QUEUE_FLAG_SUBMISSION_BATCHED;
ur_queue_properties_t props = {
/*.stype =*/UR_STRUCTURE_TYPE_QUEUE_PROPERTIES,
/*.pNext =*/nullptr,
/*.flags =*/flags,
};
ASSERT_SUCCESS(urQueueCreate(context, device, &props, &queue));
ASSERT_NE(queue, nullptr);
} else {
queue = urCommandBufferExpExecutionTest::queue;
}
}

void TearDown() override {
if (updatable_cmd_buf_handle) {
EXPECT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle));
}
if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO && queue) {
ASSERT_SUCCESS(urQueueRelease(queue));
}

UUR_RETURN_ON_FATAL_FAILURE(
urCommandBufferExpExecutionTest::TearDown());
}

ur_exp_command_buffer_handle_t updatable_cmd_buf_handle = nullptr;
ur_bool_t updatable_execution_range_support = true;
ur_queue_handle_t queue = nullptr;
};

struct urCommandBufferCommandExpTest
Expand Down
4 changes: 4 additions & 0 deletions test/conformance/exp_command_buffer/ndrange_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ struct NDRangeUpdateTest
UUR_RETURN_ON_FATAL_FAILURE(
urUpdatableCommandBufferExpExecutionTest::SetUp());

if (!updatable_execution_range_support) {
GTEST_SKIP() << "Execution range update is not supported.";
}

ur_device_usm_access_capability_flags_t shared_usm_flags;
ASSERT_SUCCESS(
uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags));
Expand Down
10 changes: 6 additions & 4 deletions test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,9 @@ TEST_P(USMFillCommandTest, UpdateParameters) {
ASSERT_SUCCESS(urQueueFinish(queue));
Validate((uint32_t *)shared_ptr, global_size, val);

// Allocate a new USM pointer of larger size
size_t new_global_size = 64;
// Allocate a new USM pointer of larger size if feature is supported.
size_t new_global_size =
updatable_execution_range_support ? 64 : global_size;
const size_t new_allocation_size = sizeof(val) * new_global_size;
ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr,
new_allocation_size, &new_shared_ptr));
Expand Down Expand Up @@ -128,8 +129,9 @@ TEST_P(USMFillCommandTest, UpdateParameters) {
&new_input_desc, // pNewValueArgList
nullptr, // pNewExecInfoList
nullptr, // pNewGlobalWorkOffset
&new_global_size, // pNewGlobalWorkSize
nullptr, // pNewLocalWorkSize
updatable_execution_range_support ? &new_global_size
: nullptr, // pNewGlobalWorkSize
nullptr, // pNewLocalWorkSize
};

// Update kernel and enqueue command-buffer again
Expand Down