Skip to content

Commit a993497

Browse files
committed
Merge branch 'main' into tensormap-exp-api
2 parents d5eb268 + f01741a commit a993497

File tree

102 files changed

+1802
-1024
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+1802
-1024
lines changed

.github/workflows/multi_device.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,4 @@ jobs:
6363

6464
- name: Test adapters
6565
working-directory: ${{github.workspace}}/build
66-
run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" -E "enqueue|kernel|integration|exp_command_buffer|exp_enqueue_native|exp_launch_properties|exp_usm_p2p" --timeout 180
66+
run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" -E "exp_command_buffer" --timeout 180

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ if(UR_ENABLE_TRACING)
137137

138138
if (UR_BUILD_XPTI_LIBS)
139139
# fetch xpti proxy library for the tracing layer
140-
FetchContentSparse_Declare(xpti https://github.com/intel/llvm.git "sycl-nightly/20230703" "xpti")
140+
FetchContentSparse_Declare(xpti https://github.com/intel/llvm.git "nightly-2024-10-22" "xpti")
141141
FetchContent_MakeAvailable(xpti)
142142

143143
# set -fPIC for xpti since we are linking it with a shared library
@@ -149,7 +149,7 @@ if(UR_ENABLE_TRACING)
149149
set(XPTI_DIR ${xpti_SOURCE_DIR})
150150
set(XPTI_ENABLE_TESTS OFF CACHE INTERNAL "Turn off xptifw tests")
151151

152-
FetchContentSparse_Declare(xptifw https://github.com/intel/llvm.git "sycl-nightly/20230703" "xptifw")
152+
FetchContentSparse_Declare(xptifw https://github.com/intel/llvm.git "nightly-2024-10-22" "xptifw")
153153

154154
FetchContent_MakeAvailable(xptifw)
155155

cmake/helpers.cmake

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,10 @@ macro(add_sanitizer_flag flag)
5858
set(CMAKE_REQUIRED_LIBRARIES ${SAVED_CMAKE_REQUIRED_LIBRARIES})
5959
endmacro()
6060

61-
check_cxx_compiler_flag("-fcf-protection=full" CXX_HAS_FCF_PROTECTION_FULL)
61+
if(CMAKE_SYSTEM_NAME STREQUAL Linux)
62+
check_cxx_compiler_flag("-fcf-protection=full" CXX_HAS_FCF_PROTECTION_FULL)
63+
check_cxx_compiler_flag("-fstack-clash-protection" CXX_HAS_FSTACK_CLASH_PROTECTION)
64+
endif()
6265

6366
function(add_ur_target_compile_options name)
6467
if(NOT MSVC)
@@ -81,9 +84,7 @@ function(add_ur_target_compile_options name)
8184
# -flto
8285
# $<$<CXX_COMPILER_ID:Clang,AppleClang>:-fsanitize=cfi>
8386
$<$<BOOL:${CXX_HAS_FCF_PROTECTION_FULL}>:-fcf-protection=full>
84-
# -fstack-clash-protection is not supported in apple clang or GCC < 8
85-
$<$<AND:$<CXX_COMPILER_ID:GNU>,$<VERSION_GREATER_EQUAL:$<CXX_COMPILER_VERSION>,8>>:-fstack-clash-protection>
86-
$<$<CXX_COMPILER_ID:Clang>:-fstack-clash-protection>
87+
$<$<BOOL:${CXX_HAS_FSTACK_CLASH_PROTECTION}>:-fstack-clash-protection>
8788

8889
# Colored output
8990
$<$<CXX_COMPILER_ID:GNU>:-fdiagnostics-color=always>

examples/collector/collector.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,7 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version,
125125
return;
126126
}
127127
if (std::string_view(stream_name) != UR_STREAM_NAME) {
128-
std::cout << "Invalid stream name: " << stream_name << ". Expected "
129-
<< UR_STREAM_NAME << ". Aborting." << std::endl;
128+
// we expect ur.call, but this can also be xpti.framework.
130129
return;
131130
}
132131

include/ur_api.h

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,6 +1056,8 @@ typedef enum ur_adapter_backend_t {
10561056
/// + `NULL == phAdapters`
10571057
/// - ::UR_RESULT_ERROR_INVALID_SIZE
10581058
/// + `NumEntries == 0 && phPlatforms != NULL`
1059+
/// - ::UR_RESULT_ERROR_INVALID_VALUE
1060+
/// + `pNumPlatforms == NULL && phPlatforms == NULL`
10591061
UR_APIEXPORT ur_result_t UR_APICALL
10601062
urPlatformGet(
10611063
ur_adapter_handle_t *phAdapters, ///< [in][range(0, NumAdapters)] array of adapters to query for platforms.
@@ -1639,6 +1641,9 @@ typedef enum ur_device_info_t {
16391641
UR_DEVICE_INFO_GLOBAL_VARIABLE_SUPPORT = 118, ///< [::ur_bool_t] return true if the device supports the
16401642
///< `EnqueueDeviceGlobalVariableWrite` and
16411643
///< `EnqueueDeviceGlobalVariableRead` entry points.
1644+
UR_DEVICE_INFO_USM_POOL_SUPPORT = 119, ///< [::ur_bool_t] return true if the device supports USM pooling. Pertains
1645+
///< to the `USMPool` entry points and usage of the `pool` parameter of the
1646+
///< USM alloc entry points.
16421647
UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP = 0x1000, ///< [::ur_bool_t] Returns true if the device supports the use of
16431648
///< command-buffers.
16441649
UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP = 0x1001, ///< [::ur_device_command_buffer_update_capability_flags_t] Command-buffer
@@ -3503,7 +3508,6 @@ typedef struct ur_usm_pool_limits_desc_t {
35033508
/// @brief USM allocate host memory
35043509
///
35053510
/// @details
3506-
/// - This function must support memory pooling.
35073511
/// - If pUSMDesc is not NULL and pUSMDesc->pool is not NULL the allocation
35083512
/// will be served from a specified memory pool.
35093513
/// - Otherwise, the behavior is implementation-defined.
@@ -3536,6 +3540,8 @@ typedef struct ur_usm_pool_limits_desc_t {
35363540
/// + `size` is greater than ::UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE.
35373541
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
35383542
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
3543+
/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE
3544+
/// + If any device associated with `hContext` reports `false` for ::UR_DEVICE_INFO_USM_POOL_SUPPORT
35393545
UR_APIEXPORT ur_result_t UR_APICALL
35403546
urUSMHostAlloc(
35413547
ur_context_handle_t hContext, ///< [in] handle of the context object
@@ -3549,7 +3555,6 @@ urUSMHostAlloc(
35493555
/// @brief USM allocate device memory
35503556
///
35513557
/// @details
3552-
/// - This function must support memory pooling.
35533558
/// - If pUSMDesc is not NULL and pUSMDesc->pool is not NULL the allocation
35543559
/// will be served from a specified memory pool.
35553560
/// - Otherwise, the behavior is implementation-defined.
@@ -3583,6 +3588,8 @@ urUSMHostAlloc(
35833588
/// + `size` is greater than ::UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE.
35843589
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
35853590
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
3591+
/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE
3592+
/// + If any device associated with `hContext` reports `false` for ::UR_DEVICE_INFO_USM_POOL_SUPPORT
35863593
UR_APIEXPORT ur_result_t UR_APICALL
35873594
urUSMDeviceAlloc(
35883595
ur_context_handle_t hContext, ///< [in] handle of the context object
@@ -3597,7 +3604,6 @@ urUSMDeviceAlloc(
35973604
/// @brief USM allocate shared memory
35983605
///
35993606
/// @details
3600-
/// - This function must support memory pooling.
36013607
/// - If pUSMDesc is not NULL and pUSMDesc->pool is not NULL the allocation
36023608
/// will be served from a specified memory pool.
36033609
/// - Otherwise, the behavior is implementation-defined.
@@ -3632,6 +3638,8 @@ urUSMDeviceAlloc(
36323638
/// + If `UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT` and `UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT` are both false.
36333639
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
36343640
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
3641+
/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE
3642+
/// + If any device associated with `hContext` reports `false` for ::UR_DEVICE_INFO_USM_POOL_SUPPORT
36353643
UR_APIEXPORT ur_result_t UR_APICALL
36363644
urUSMSharedAlloc(
36373645
ur_context_handle_t hContext, ///< [in] handle of the context object
@@ -3713,6 +3721,8 @@ urUSMGetMemAllocInfo(
37133721
/// + `::UR_USM_POOL_FLAGS_MASK & pPoolDesc->flags`
37143722
/// - ::UR_RESULT_ERROR_INVALID_VALUE
37153723
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
3724+
/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE
3725+
/// + If any device associated with `hContext` reports `false` for ::UR_DEVICE_INFO_USM_POOL_SUPPORT
37163726
UR_APIEXPORT ur_result_t UR_APICALL
37173727
urUSMPoolCreate(
37183728
ur_context_handle_t hContext, ///< [in] handle of the context object
@@ -3731,6 +3741,7 @@ urUSMPoolCreate(
37313741
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
37323742
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
37333743
/// + `NULL == pPool`
3744+
/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE
37343745
UR_APIEXPORT ur_result_t UR_APICALL
37353746
urUSMPoolRetain(
37363747
ur_usm_pool_handle_t pPool ///< [in][retain] pointer to USM memory pool
@@ -3753,6 +3764,7 @@ urUSMPoolRetain(
37533764
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
37543765
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
37553766
/// + `NULL == pPool`
3767+
/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE
37563768
UR_APIEXPORT ur_result_t UR_APICALL
37573769
urUSMPoolRelease(
37583770
ur_usm_pool_handle_t pPool ///< [in][release] pointer to USM memory pool
@@ -3794,6 +3806,7 @@ typedef enum ur_usm_pool_info_t {
37943806
/// + `pPropValue == NULL && pPropSizeRet == NULL`
37953807
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
37963808
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
3809+
/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE
37973810
UR_APIEXPORT ur_result_t UR_APICALL
37983811
urUSMPoolGetInfo(
37993812
ur_usm_pool_handle_t hPool, ///< [in] handle of the USM memory pool
@@ -9819,9 +9832,9 @@ urUSMReleaseExp(
98199832
///////////////////////////////////////////////////////////////////////////////
98209833
/// @brief Supported peer info
98219834
typedef enum ur_exp_peer_info_t {
9822-
UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED = 0, ///< [uint32_t] 1 if P2P access is supported otherwise P2P access is not
9835+
UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED = 0, ///< [int] 1 if P2P access is supported otherwise P2P access is not
98239836
///< supported.
9824-
UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED = 1, ///< [uint32_t] 1 if atomic operations are supported over the P2P link,
9837+
UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED = 1, ///< [int] 1 if atomic operations are supported over the P2P link,
98259838
///< otherwise such operations are not supported.
98269839
/// @cond
98279840
UR_EXP_PEER_INFO_FORCE_UINT32 = 0x7fffffff

include/ur_print.hpp

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2578,6 +2578,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
25782578
case UR_DEVICE_INFO_GLOBAL_VARIABLE_SUPPORT:
25792579
os << "UR_DEVICE_INFO_GLOBAL_VARIABLE_SUPPORT";
25802580
break;
2581+
case UR_DEVICE_INFO_USM_POOL_SUPPORT:
2582+
os << "UR_DEVICE_INFO_USM_POOL_SUPPORT";
2583+
break;
25812584
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
25822585
os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP";
25832586
break;
@@ -4080,6 +4083,18 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info
40804083

40814084
os << ")";
40824085
} break;
4086+
case UR_DEVICE_INFO_USM_POOL_SUPPORT: {
4087+
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
4088+
if (sizeof(ur_bool_t) > size) {
4089+
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
4090+
return UR_RESULT_ERROR_INVALID_SIZE;
4091+
}
4092+
os << (const void *)(tptr) << " (";
4093+
4094+
os << *tptr;
4095+
4096+
os << ")";
4097+
} break;
40834098
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: {
40844099
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
40854100
if (sizeof(ur_bool_t) > size) {
@@ -10336,9 +10351,9 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_peer_in
1033610351

1033710352
switch (value) {
1033810353
case UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED: {
10339-
const uint32_t *tptr = (const uint32_t *)ptr;
10340-
if (sizeof(uint32_t) > size) {
10341-
os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")";
10354+
const int *tptr = (const int *)ptr;
10355+
if (sizeof(int) > size) {
10356+
os << "invalid size (is: " << size << ", expected: >=" << sizeof(int) << ")";
1034210357
return UR_RESULT_ERROR_INVALID_SIZE;
1034310358
}
1034410359
os << (const void *)(tptr) << " (";
@@ -10348,9 +10363,9 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_peer_in
1034810363
os << ")";
1034910364
} break;
1035010365
case UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED: {
10351-
const uint32_t *tptr = (const uint32_t *)ptr;
10352-
if (sizeof(uint32_t) > size) {
10353-
os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")";
10366+
const int *tptr = (const int *)ptr;
10367+
if (sizeof(int) > size) {
10368+
os << "invalid size (is: " << size << ", expected: >=" << sizeof(int) << ")";
1035410369
return UR_RESULT_ERROR_INVALID_SIZE;
1035510370
}
1035610371
os << (const void *)(tptr) << " (";

scripts/benchmarks/benches/compute.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def setup(self):
2020
if self.built:
2121
return
2222

23-
repo_path = git_clone(self.directory, "compute-benchmarks-repo", "https://github.com/intel/compute-benchmarks.git", "08c41bb8bc1762ad53c6194df6d36bfcceff4aa2")
23+
repo_path = git_clone(self.directory, "compute-benchmarks-repo", "https://github.com/intel/compute-benchmarks.git", "aa6a3b2108bb86202b654ad28129156fa746d41d")
2424
build_path = create_build_path(self.directory, 'compute-benchmarks-build')
2525

2626
configure_command = [
@@ -231,3 +231,26 @@ def bin_args(self) -> list[str]:
231231
"--numberOfElementsY=256",
232232
"--numberOfElementsZ=256",
233233
]
234+
235+
class MemcpyExecute(ComputeBenchmark):
236+
def __init__(self, bench, numOpsPerThread, numThreads, allocSize, iterations):
237+
self.numOpsPerThread = numOpsPerThread
238+
self.numThreads = numThreads
239+
self.allocSize = allocSize
240+
self.iterations = iterations
241+
super().__init__(bench, "multithread_benchmark_ur", "MemcpyExecute")
242+
243+
def name(self):
244+
return f"multithread_benchmark_ur MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize}"
245+
246+
def bin_args(self) -> list[str]:
247+
return [
248+
"--Ioq=1",
249+
"--UseEvents=1",
250+
"--MeasureCompletion=1",
251+
"--UseQueuePerThread=1",
252+
f"--AllocSize={self.allocSize}",
253+
f"--NumThreads={self.numThreads}",
254+
f"--NumOpsPerThread={self.numOpsPerThread}",
255+
f"--iterations={self.iterations}"
256+
]

scripts/benchmarks/main.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
4444
ExecImmediateCopyQueue(cb, 0, 1, 'Device', 'Device', 1024),
4545
ExecImmediateCopyQueue(cb, 1, 1, 'Device', 'Host', 1024),
4646
VectorSum(cb),
47+
MemcpyExecute(cb, 400, 8, 1024, 100),
48+
MemcpyExecute(cb, 400, 8, 102400, 10),
49+
MemcpyExecute(cb, 500, 8, 102400, 10),
50+
MemcpyExecute(cb, 400, 1, 1024, 1000),
51+
MemcpyExecute(cb, 10, 16, 1024, 1000),
52+
MemcpyExecute(cb, 10, 16, 102400, 100),
4753

4854
# *** Velocity benchmarks
4955
Hashtable(vb),

scripts/core/CONTRIB.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ Adapter Change Process
129129
.. _UNIFIED_RUNTIME_REPO:
130130
https://github.com/intel/llvm/blob/sycl/sycl/cmake/modules/FetchUnifiedRuntime.cmake#L119
131131
.. _UNIFIED_RUNTIME_TAG:
132-
https://github.com/intel/llvm/blob/sycl/sycl/cmake/modules/FetchUnifiedRuntime.cmake#L126
132+
https://github.com/intel/llvm/blob/sycl/sycl/cmake/modules/UnifiedRuntimeTag.cmake
133133

134134
Build Environment
135135
=================

scripts/core/EXP-USM-P2P.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ Changelog
6969
+-----------+---------------------------------------------+
7070
| 1.1 | Added USM_P2P_EXTENSION_STRING_EXP ID Macro |
7171
+-----------+---------------------------------------------+
72+
| 1.2 | Switch Info types from uint32_t to int |
73+
+-----------+---------------------------------------------+
7274

7375
Contributors
7476
--------------------------------------------------------------------------------

scripts/core/device.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,8 @@ etors:
441441
desc: "[$x_device_handle_t] The composite device containing this component device."
442442
- name: GLOBAL_VARIABLE_SUPPORT
443443
desc: "[$x_bool_t] return true if the device supports the `EnqueueDeviceGlobalVariableWrite` and `EnqueueDeviceGlobalVariableRead` entry points."
444+
- name: USM_POOL_SUPPORT
445+
desc: "[$x_bool_t] return true if the device supports USM pooling. Pertains to the `USMPool` entry points and usage of the `pool` parameter of the USM alloc entry points."
444446
--- #--------------------------------------------------------------------------
445447
type: function
446448
desc: "Retrieves various information about device"

scripts/core/exp-usm-p2p.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ name: $x_exp_peer_info_t
2424
typed_etors: True
2525
etors:
2626
- name: UR_PEER_ACCESS_SUPPORTED
27-
desc: "[uint32_t] 1 if P2P access is supported otherwise P2P access is not supported."
27+
desc: "[int] 1 if P2P access is supported otherwise P2P access is not supported."
2828
- name: UR_PEER_ATOMICS_SUPPORTED
29-
desc: "[uint32_t] 1 if atomic operations are supported over the P2P link, otherwise such operations are not supported."
29+
desc: "[int] 1 if atomic operations are supported over the P2P link, otherwise such operations are not supported."
3030
--- #--------------------------------------------------------------------------
3131
type: function
3232
desc: "Enable access to peer device memory"

scripts/core/platform.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ params:
4848
returns:
4949
- $X_RESULT_ERROR_INVALID_SIZE:
5050
- "`NumEntries == 0 && phPlatforms != NULL`"
51+
- $X_RESULT_ERROR_INVALID_VALUE:
52+
- "`pNumPlatforms == NULL && phPlatforms == NULL`"
5153
--- #--------------------------------------------------------------------------
5254
type: enum
5355
desc: "Supported platform info"

0 commit comments

Comments
 (0)