Skip to content

Commit c2e98ef

Browse files
authored
Merge pull request #2444 from isaacault/kernel-cts
Reduce gap between Kernel CTS and Specification.
2 parents cb804bc + 805162e commit c2e98ef

24 files changed

+526
-136
lines changed

include/ur_api.h

-1
Original file line numberDiff line numberDiff line change
@@ -5188,7 +5188,6 @@ typedef struct ur_kernel_arg_pointer_properties_t {
51885188
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
51895189
/// + `NULL == hKernel`
51905190
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX
5191-
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE
51925191
UR_APIEXPORT ur_result_t UR_APICALL
51935192
urKernelSetArgPointer(
51945193
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object

scripts/core/kernel.yml

-1
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,6 @@ params:
352352
desc: "[in][optional] Pointer obtained by USM allocation or virtual memory mapping operation. If null then argument value is considered null."
353353
returns:
354354
- $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX
355-
- $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE
356355
--- #--------------------------------------------------------------------------
357356
type: struct
358357
desc: "Properties for for $xKernelSetExecInfo."

source/adapters/level_zero/kernel.cpp

+6-5
Original file line numberDiff line numberDiff line change
@@ -813,12 +813,13 @@ ur_result_t urKernelGetGroupInfo(
813813
(ZeKernelDevice, &kernelProperties));
814814
if (ZeResult || workGroupProperties.maxGroupSize == 0) {
815815
return ReturnValue(
816-
uint64_t{Device->ZeDeviceComputeProperties->maxTotalGroupSize});
816+
size_t{Device->ZeDeviceComputeProperties->maxTotalGroupSize});
817817
}
818-
return ReturnValue(workGroupProperties.maxGroupSize);
818+
// Specification states this returns a size_t.
819+
return ReturnValue(size_t{workGroupProperties.maxGroupSize});
819820
} else {
820821
return ReturnValue(
821-
uint64_t{Device->ZeDeviceComputeProperties->maxTotalGroupSize});
822+
size_t{Device->ZeDeviceComputeProperties->maxTotalGroupSize});
822823
}
823824
}
824825
case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: {
@@ -830,12 +831,12 @@ ur_result_t urKernelGetGroupInfo(
830831
return ReturnValue(WgSize);
831832
}
832833
case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE:
833-
return ReturnValue(uint32_t{Kernel->ZeKernelProperties->localMemSize});
834+
return ReturnValue(size_t{Kernel->ZeKernelProperties->localMemSize});
834835
case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
835836
return ReturnValue(size_t{Device->ZeDeviceProperties->physicalEUSimdWidth});
836837
}
837838
case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: {
838-
return ReturnValue(uint32_t{Kernel->ZeKernelProperties->privateMemSize});
839+
return ReturnValue(size_t{Kernel->ZeKernelProperties->privateMemSize});
839840
}
840841
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE:
841842
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE:

source/adapters/level_zero/v2/kernel.cpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -512,10 +512,11 @@ ur_result_t urKernelGetGroupInfo(
512512
ZE_CALL_NOCHECK(zeKernelGetProperties, (zeDevice, &kernelProperties));
513513
if (zeResult == ZE_RESULT_SUCCESS &&
514514
workGroupProperties.maxGroupSize != 0) {
515-
return returnValue(workGroupProperties.maxGroupSize);
515+
// Specification states this returns a size_t.
516+
return returnValue(size_t{workGroupProperties.maxGroupSize});
516517
}
517518
return returnValue(
518-
uint64_t{hDevice->ZeDeviceComputeProperties->maxTotalGroupSize});
519+
size_t{hDevice->ZeDeviceComputeProperties->maxTotalGroupSize});
519520
}
520521
case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: {
521522
auto props = hKernel->getProperties(hDevice);
@@ -527,15 +528,15 @@ ur_result_t urKernelGetGroupInfo(
527528
}
528529
case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: {
529530
auto props = hKernel->getProperties(hDevice);
530-
return returnValue(uint32_t{props.localMemSize});
531+
return returnValue(size_t{props.localMemSize});
531532
}
532533
case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
533534
return returnValue(
534535
size_t{hDevice->ZeDeviceProperties->physicalEUSimdWidth});
535536
}
536537
case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: {
537538
auto props = hKernel->getProperties(hDevice);
538-
return returnValue(uint32_t{props.privateMemSize});
539+
return returnValue(size_t{props.privateMemSize});
539540
}
540541
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE:
541542
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE:

source/loader/ur_libapi.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -3983,7 +3983,6 @@ ur_result_t UR_APICALL urKernelRelease(
39833983
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
39843984
/// + `NULL == hKernel`
39853985
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX
3986-
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE
39873986
ur_result_t UR_APICALL urKernelSetArgPointer(
39883987
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
39893988
uint32_t argIndex, ///< [in] argument index in range [0, num args - 1]

source/ur_api.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -3395,7 +3395,6 @@ ur_result_t UR_APICALL urKernelRelease(
33953395
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
33963396
/// + `NULL == hKernel`
33973397
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX
3398-
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE
33993398
ur_result_t UR_APICALL urKernelSetArgPointer(
34003399
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
34013400
uint32_t argIndex, ///< [in] argument index in range [0, num args - 1]

test/conformance/device_code/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,9 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy.cpp)
159159
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy_usm.cpp)
160160
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/indexers_usm.cpp)
161161
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/build_failure.cpp)
162+
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fixed_sg_size.cpp)
162163
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fixed_wg_size.cpp)
164+
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/max_wg_size.cpp)
163165
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/sequence.cpp)
164166
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/standard_types.cpp)
165167
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/subgroup.cpp)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
#include <sycl/sycl.hpp>
7+
8+
struct KernelFunctor {
9+
void operator()(sycl::nd_item<3>) const {}
10+
void operator()(sycl::item<3>) const {}
11+
12+
auto get(sycl::ext::oneapi::experimental::properties_tag) {
13+
return sycl::ext::oneapi::experimental::properties{
14+
sycl::ext::oneapi::experimental::sub_group_size<8>};
15+
}
16+
};
17+
18+
int main() {
19+
sycl::queue myQueue;
20+
myQueue.submit([&](sycl::handler &cgh) {
21+
cgh.parallel_for<class FixedSgSize>(sycl::range<3>(8, 8, 8),
22+
KernelFunctor{});
23+
});
24+
25+
myQueue.wait();
26+
return 0;
27+
}

test/conformance/device_code/fixed_wg_size.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ struct KernelFunctor {
1111

1212
auto get(sycl::ext::oneapi::experimental::properties_tag) {
1313
return sycl::ext::oneapi::experimental::properties{
14-
sycl::ext::oneapi::experimental::work_group_size<4, 4, 4>};
14+
sycl::ext::oneapi::experimental::work_group_size<8, 4, 2>};
1515
}
1616
};
1717

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
#include <sycl/sycl.hpp>
7+
8+
struct KernelFunctor {
9+
void operator()(sycl::nd_item<3>) const {}
10+
void operator()(sycl::item<3>) const {}
11+
12+
auto get(sycl::ext::oneapi::experimental::properties_tag) {
13+
return sycl::ext::oneapi::experimental::properties{
14+
sycl::ext::oneapi::experimental::max_work_group_size<8, 4, 2>,
15+
sycl::ext::oneapi::experimental::max_linear_work_group_size<64>};
16+
}
17+
};
18+
19+
int main() {
20+
sycl::queue myQueue;
21+
myQueue.submit([&](sycl::handler &cgh) {
22+
cgh.parallel_for<class MaxWgSize>(sycl::range<3>(8, 8, 8),
23+
KernelFunctor{});
24+
});
25+
26+
myQueue.wait();
27+
return 0;
28+
}

test/conformance/enqueue/urEnqueueKernelLaunch.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@ struct urEnqueueKernelLaunchKernelWgSizeTest : uur::urKernelExecutionTest {
2727

2828
std::array<size_t, 3> global_size{32, 32, 32};
2929
std::array<size_t, 3> global_offset{0, 0, 0};
30-
// This must match the size in fixed_wg_size.cpp
31-
std::array<size_t, 3> wg_size{4, 4, 4};
30+
// This value correlates to work_group_size<8, 4, 2> in fixed_wg_size.cpp.
31+
// In SYCL, the right-most dimension varies the fastest in linearization.
32+
// In UR, this is on the left, so we reverse the order of these values.
33+
std::array<size_t, 3> wg_size{2, 4, 8};
3234
size_t n_dimensions = 3;
3335
};
3436
UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchKernelWgSizeTest);

test/conformance/kernel/kernel_adapter_cuda.match

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/*
1+
urKernelGetGroupInfoFixedWorkGroupSizeTest.CompileWorkGroupSize/*
2+
urKernelGetGroupInfoMaxWorkGroupSizeTest.CompileMaxWorkGroupSize/*
3+
urKernelGetGroupInfoMaxWorkGroupSizeTest.CompileMaxLinearWorkGroupSize/*
4+
urKernelGetSubGroupInfoFixedSubGroupSizeTest.CompileNumSubGroups/*
25
{{OPT}}urKernelSetArgLocalTest.InvalidKernelArgumentIndex/*
36
{{OPT}}urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/*
47
{{OPT}}urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/*

test/conformance/kernel/kernel_adapter_hip.match

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/*
1+
urKernelGetGroupInfoFixedWorkGroupSizeTest.CompileWorkGroupSize/*
2+
urKernelGetSubGroupInfoFixedSubGroupSizeTest.CompileNumSubGroups/*
23
urKernelSetArgLocalTest.InvalidKernelArgumentIndex/*
34
urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/*
45
urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/*

test/conformance/kernel/kernel_adapter_level_zero.match

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# Match tests that use fixed_sg_size.cpp as it fails to compile on some
2+
# hardware.
3+
{{OPT}}urKernelGetSubGroupInfoFixedSubGroupSizeTest.CompileNumSubGroups/*
4+
15
urKernelSetExecInfoTest.SuccessIndirectAccess/*
26
urKernelSetExecInfoUSMPointersTest.SuccessHost/*
37
urKernelSetExecInfoUSMPointersTest.SuccessDevice/*

test/conformance/kernel/kernel_adapter_level_zero_v2.match

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# Match tests that use fixed_sg_size.cpp as it fails to compile on some
2+
# hardware.
3+
{{OPT}}urKernelGetSubGroupInfoFixedSubGroupSizeTest.CompileNumSubGroups/*
4+
15
urKernelSetExecInfoTest.SuccessIndirectAccess/*
26
urKernelSetExecInfoUSMPointersTest.SuccessHost/*
37
urKernelSetExecInfoUSMPointersTest.SuccessDevice/*

test/conformance/kernel/kernel_adapter_native_cpu.match

+27-9
Original file line numberDiff line numberDiff line change
@@ -6,34 +6,51 @@ urKernelCreateTest.InvalidKernelName/*
66
urKernelCreateWithNativeHandleTest.Success/*
77
urKernelCreateWithNativeHandleTest.InvalidNullHandleContext/*
88
urKernelCreateWithNativeHandleTest.InvalidNullPointerNativeKernel/*
9-
urKernelGetGroupInfoTest.Success/*
9+
urKernelGetGroupInfoFixedWorkGroupSizeTest.CompileWorkGroupSize/*
10+
urKernelGetGroupInfoMaxWorkGroupSizeTest.CompileMaxWorkGroupSize/*
11+
urKernelGetGroupInfoMaxWorkGroupSizeTest.CompileMaxLinearWorkGroupSize/*
12+
urKernelGetGroupInfoTest.GlobalWorkSize/*
13+
urKernelGetGroupInfoTest.WorkGroupSize/*
14+
urKernelGetGroupInfoTest.LocalMemSize/*
15+
urKernelGetGroupInfoTest.PreferredWorkGroupSizeMultiple/*
16+
urKernelGetGroupInfoTest.PrivateMemSize/*
17+
urKernelGetGroupInfoTest.CompileWorkGroupSizeEmpty/*
18+
urKernelGetGroupInfoTest.CompileMaxWorkGroupSizeEmpty/*
1019
urKernelGetGroupInfoTest.InvalidNullHandleKernel/*
1120
urKernelGetGroupInfoTest.InvalidNullHandleDevice/*
1221
urKernelGetGroupInfoTest.InvalidEnumeration/*
13-
urKernelGetGroupInfoSingleTest.CompileWorkGroupSizeEmpty/*
14-
urKernelGetGroupInfoSingleTest.CompileMaxWorkGroupSizeEmpty/*
15-
urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/*
16-
urKernelGetInfoTest.Success/*
22+
urKernelGetInfoTest.FunctionName/*
23+
urKernelGetInfoTest.NumArgs/*
24+
urKernelGetInfoTest.ReferenceCount/*
25+
urKernelGetInfoTest.Context/*
26+
urKernelGetInfoTest.Program/*
27+
urKernelGetInfoTest.Attributes/*
28+
urKernelGetInfoTest.NumRegs/*
29+
urKernelGetInfoTest.KernelNameCorrect/*
30+
urKernelGetInfoTest.KernelContextCorrect/*
1731
urKernelGetInfoTest.InvalidNullHandleKernel/*
1832
urKernelGetInfoTest.InvalidEnumeration/*
1933
urKernelGetInfoTest.InvalidSizeZero/*
2034
urKernelGetInfoTest.InvalidSizeSmall/*
2135
urKernelGetInfoTest.InvalidNullPointerPropValue/*
2236
urKernelGetInfoTest.InvalidNullPointerPropSizeRet/*
23-
urKernelGetInfoSingleTest.KernelNameCorrect/*
24-
urKernelGetInfoSingleTest.KernelContextCorrect/*
2537
urKernelGetNativeHandleTest.Success/*
2638
urKernelGetNativeHandleTest.InvalidNullHandleKernel/*
2739
urKernelGetNativeHandleTest.InvalidNullPointerNativeKernel/*
28-
urKernelGetSubGroupInfoTest.Success/*
40+
urKernelGetSubGroupInfoFixedSubGroupSizeTest.CompileNumSubGroups/*
41+
urKernelGetSubGroupInfoTest.MaxSubGroupSize/*
42+
urKernelGetSubGroupInfoTest.MaxNumSubGroups/*
43+
urKernelGetSubGroupInfoTest.SubGroupSizeIntel/*
44+
urKernelGetSubGroupInfoTest.CompileNumSubgroupsIsZero/*
2945
urKernelGetSubGroupInfoTest.InvalidNullHandleKernel/*
3046
urKernelGetSubGroupInfoTest.InvalidNullHandleDevice/*
3147
urKernelGetSubGroupInfoTest.InvalidEnumeration/*
32-
urKernelGetSubGroupInfoSingleTest.CompileNumSubgroupsIsZero/*
3348
urKernelReleaseTest.Success/*
49+
urKernelReleaseTest.CheckReferenceCount/*
3450
urKernelReleaseTest.KernelReleaseAfterProgramRelease/*
3551
urKernelReleaseTest.InvalidNullHandleKernel/*
3652
urKernelRetainTest.Success/*
53+
urKernelRetainTest.CheckReferenceCount/*
3754
urKernelRetainTest.InvalidNullHandleKernel/*
3855
urKernelSetArgLocalTest.Success/*
3956
urKernelSetArgLocalTest.InvalidNullHandleKernel/*
@@ -44,6 +61,7 @@ urKernelSetArgLocalMultiTest.Overwrite/*
4461
urKernelSetArgMemObjTest.Success/*
4562
urKernelSetArgMemObjTest.InvalidNullHandleKernel/*
4663
urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/*
64+
urKernelSetArgMemObjTest.InvalidEnumeration/*
4765
urKernelSetArgPointerTest.SuccessHost/*
4866
urKernelSetArgPointerTest.SuccessDevice/*
4967
urKernelSetArgPointerTest.SuccessShared/*
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Match tests that use fixed_wg_size.cpp as it fails to compile on some
2+
# hardware.
3+
{{OPT}}urKernelGetGroupInfoMaxWorkGroupSizeTest.CompileMaxWorkGroupSize/*
4+
{{OPT}}urKernelGetGroupInfoMaxWorkGroupSizeTest.CompileMaxLinearWorkGroupSize/*
5+
6+
# Match tests that use fixed_sg_size.cpp as it fails to compile on some
7+
# hardware.
8+
{{OPT}}urKernelGetSubGroupInfoFixedSubGroupSizeTest.CompileNumSubGroups/*

0 commit comments

Comments
 (0)