Skip to content

Commit fe632e9

Browse files
committed
Merge branch 'sycl' into review/yang/msan_device_global
2 parents 2a85354 + 7eae5c8 commit fe632e9

39 files changed

+949
-488
lines changed

include/ur_api.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5188,7 +5188,6 @@ typedef struct ur_kernel_arg_pointer_properties_t {
51885188
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
51895189
/// + `NULL == hKernel`
51905190
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX
5191-
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE
51925191
UR_APIEXPORT ur_result_t UR_APICALL
51935192
urKernelSetArgPointer(
51945193
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object

scripts/core/kernel.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,6 @@ params:
352352
desc: "[in][optional] Pointer obtained by USM allocation or virtual memory mapping operation. If null then argument value is considered null."
353353
returns:
354354
- $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX
355-
- $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE
356355
--- #--------------------------------------------------------------------------
357356
type: struct
358357
desc: "Properties for for $xKernelSetExecInfo."

scripts/templates/ldrddi.cpp.mako

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -273,11 +273,17 @@ namespace ur_loader
273273
274274
%endif
275275
%endif
276-
## Before we can re-enable the releases we will need ref-counted object_t.
277-
## See unified-runtime github issue #1784
278-
##%if item['release']:
279-
##// release loader handle
280-
##${item['factory']}.release( ${item['name']} );
276+
## Possibly handle release/retain ref counting - there are no ur_exp-image factories
277+
%if 'factory' in item and '_exp_image_' not in item['factory']:
278+
%if item['release']:
279+
// release loader handle
280+
context->factories.${item['factory']}.release( ${item['name']} );
281+
%endif
282+
%if item['retain']:
283+
// increment refcount of handle
284+
context->factories.${item['factory']}.retain( ${item['name']} );
285+
%endif
286+
%endif
281287
%if not item['release'] and not item['retain'] and not '_native_object_' in item['obj'] or th.make_func_name(n, tags, obj) == 'urPlatformCreateWithNativeHandle':
282288
try
283289
{

source/adapters/level_zero/kernel.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -813,12 +813,13 @@ ur_result_t urKernelGetGroupInfo(
813813
(ZeKernelDevice, &kernelProperties));
814814
if (ZeResult || workGroupProperties.maxGroupSize == 0) {
815815
return ReturnValue(
816-
uint64_t{Device->ZeDeviceComputeProperties->maxTotalGroupSize});
816+
size_t{Device->ZeDeviceComputeProperties->maxTotalGroupSize});
817817
}
818-
return ReturnValue(workGroupProperties.maxGroupSize);
818+
// Specification states this returns a size_t.
819+
return ReturnValue(size_t{workGroupProperties.maxGroupSize});
819820
} else {
820821
return ReturnValue(
821-
uint64_t{Device->ZeDeviceComputeProperties->maxTotalGroupSize});
822+
size_t{Device->ZeDeviceComputeProperties->maxTotalGroupSize});
822823
}
823824
}
824825
case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: {
@@ -830,12 +831,12 @@ ur_result_t urKernelGetGroupInfo(
830831
return ReturnValue(WgSize);
831832
}
832833
case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE:
833-
return ReturnValue(uint32_t{Kernel->ZeKernelProperties->localMemSize});
834+
return ReturnValue(size_t{Kernel->ZeKernelProperties->localMemSize});
834835
case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
835836
return ReturnValue(size_t{Device->ZeDeviceProperties->physicalEUSimdWidth});
836837
}
837838
case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: {
838-
return ReturnValue(uint32_t{Kernel->ZeKernelProperties->privateMemSize});
839+
return ReturnValue(size_t{Kernel->ZeKernelProperties->privateMemSize});
839840
}
840841
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE:
841842
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE:

source/adapters/level_zero/v2/kernel.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -512,10 +512,11 @@ ur_result_t urKernelGetGroupInfo(
512512
ZE_CALL_NOCHECK(zeKernelGetProperties, (zeDevice, &kernelProperties));
513513
if (zeResult == ZE_RESULT_SUCCESS &&
514514
workGroupProperties.maxGroupSize != 0) {
515-
return returnValue(workGroupProperties.maxGroupSize);
515+
// Specification states this returns a size_t.
516+
return returnValue(size_t{workGroupProperties.maxGroupSize});
516517
}
517518
return returnValue(
518-
uint64_t{hDevice->ZeDeviceComputeProperties->maxTotalGroupSize});
519+
size_t{hDevice->ZeDeviceComputeProperties->maxTotalGroupSize});
519520
}
520521
case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: {
521522
auto props = hKernel->getProperties(hDevice);
@@ -527,15 +528,15 @@ ur_result_t urKernelGetGroupInfo(
527528
}
528529
case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: {
529530
auto props = hKernel->getProperties(hDevice);
530-
return returnValue(uint32_t{props.localMemSize});
531+
return returnValue(size_t{props.localMemSize});
531532
}
532533
case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
533534
return returnValue(
534535
size_t{hDevice->ZeDeviceProperties->physicalEUSimdWidth});
535536
}
536537
case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: {
537538
auto props = hKernel->getProperties(hDevice);
538-
return returnValue(uint32_t{props.privateMemSize});
539+
return returnValue(size_t{props.privateMemSize});
539540
}
540541
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE:
541542
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE:

source/adapters/native_cpu/enqueue.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,12 +138,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
138138
#else
139139
bool isLocalSizeOne =
140140
ndr.LocalSize[0] == 1 && ndr.LocalSize[1] == 1 && ndr.LocalSize[2] == 1;
141-
if (isLocalSizeOne && ndr.GlobalSize[0] > numParallelThreads) {
141+
if (isLocalSizeOne && ndr.GlobalSize[0] > numParallelThreads &&
142+
!hKernel->hasLocalArgs()) {
142143
// If the local size is one, we make the assumption that we are running a
143144
// parallel_for over a sycl::range.
144-
// Todo: we could add compiler checks and
145-
// kernel properties for this (e.g. check that no barriers are called, no
146-
// local memory args).
145+
// Todo: we could add more compiler checks and
146+
// kernel properties for this (e.g. check that no barriers are called).
147147

148148
// Todo: this assumes that dim 0 is the best dimension over which we want to
149149
// parallelize

source/adapters/native_cpu/kernel.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,9 @@ struct ur_kernel_handle_t_ : RefCounted {
142142
_localMemPoolSize = reqSize;
143143
}
144144

145-
// To be called before executing a work group
145+
bool hasLocalArgs() const { return !_localArgInfo.empty(); }
146+
147+
// To be called before executing a work group if local args are present
146148
void handleLocalArgs(size_t numParallelThread, size_t threadId) {
147149
// For each local argument we have size*numthreads
148150
size_t offset = 0;

source/common/ur_singleton.hpp

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,26 @@
1111
#ifndef UR_SINGLETON_H
1212
#define UR_SINGLETON_H 1
1313

14+
#include <cassert>
1415
#include <memory>
1516
#include <mutex>
1617
#include <unordered_map>
1718

1819
//////////////////////////////////////////////////////////////////////////
1920
/// a abstract factory for creation of singleton objects
2021
template <typename singleton_tn, typename key_tn> class singleton_factory_t {
22+
struct entry_t {
23+
std::unique_ptr<singleton_tn> ptr;
24+
size_t ref_count;
25+
};
26+
2127
protected:
2228
using singleton_t = singleton_tn;
2329
using key_t = typename std::conditional<std::is_pointer<key_tn>::value,
2430
size_t, key_tn>::type;
2531

2632
using ptr_t = std::unique_ptr<singleton_t>;
27-
using map_t = std::unordered_map<key_t, ptr_t>;
33+
using map_t = std::unordered_map<key_t, entry_t>;
2834

2935
std::mutex mut; ///< lock for thread-safety
3036
map_t map; ///< single instance of singleton for each unique key
@@ -60,16 +66,31 @@ template <typename singleton_tn, typename key_tn> class singleton_factory_t {
6066
if (map.end() == iter) {
6167
auto ptr =
6268
std::make_unique<singleton_t>(std::forward<Ts>(params)...);
63-
iter = map.emplace(key, std::move(ptr)).first;
69+
iter = map.emplace(key, entry_t{std::move(ptr), 0}).first;
70+
} else {
71+
iter->second.ref_count++;
6472
}
65-
return iter->second.get();
73+
return iter->second.ptr.get();
74+
}
75+
76+
void retain(key_tn key) {
77+
std::lock_guard<std::mutex> lk(mut);
78+
auto iter = map.find(getKey(key));
79+
assert(iter != map.end());
80+
iter->second.ref_count++;
6681
}
6782

6883
//////////////////////////////////////////////////////////////////////////
6984
/// once the key is no longer valid, release the singleton
7085
void release(key_tn key) {
7186
std::lock_guard<std::mutex> lk(mut);
72-
map.erase(getKey(key));
87+
auto iter = map.find(getKey(key));
88+
assert(iter != map.end());
89+
if (iter->second.ref_count == 0) {
90+
map.erase(iter);
91+
} else {
92+
iter->second.ref_count--;
93+
}
7394
}
7495

7596
void clear() {

0 commit comments

Comments
 (0)