Skip to content

Commit 9d45ead

Browse files
authored
[SYCL][CUDA] Using Custom context by default (#1742)
Performance analysis shows is better the default behaviour of SYCL Queues on CUDA backend is to use the non-primary context. Primary context will be exposed for interop with CUDA Runtime API on a separate extension as a property. Signed-off-by: Ruyman Reyes <[email protected]>
1 parent 857ee51 commit 9d45ead

File tree

3 files changed

+32
-9
lines changed

3 files changed

+32
-9
lines changed

sycl/plugins/cuda/pi_cuda.cpp

+20-6
Original file line numberDiff line numberDiff line change
@@ -3340,6 +3340,7 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
33403340
pi_event *retEvent, void **ret_map) {
33413341

33423342
assert(ret_map != nullptr);
3343+
assert(command_queue != nullptr);
33433344

33443345
pi_result ret_err = PI_INVALID_OPERATION;
33453346

@@ -3361,9 +3362,15 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
33613362
num_events_in_wait_list, event_wait_list, retEvent);
33623363
} else {
33633364
if (retEvent) {
3364-
*retEvent =
3365-
_pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_MAP, command_queue);
3366-
(*retEvent)->record();
3365+
try {
3366+
ScopedContext active(command_queue->get_context());
3367+
3368+
*retEvent = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_MAP,
3369+
command_queue);
3370+
(*retEvent)->record();
3371+
} catch (pi_result error) {
3372+
ret_err = error;
3373+
}
33673374
}
33683375
}
33693376

@@ -3380,6 +3387,7 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
33803387
pi_event *retEvent) {
33813388
pi_result ret_err = PI_SUCCESS;
33823389

3390+
assert(command_queue != nullptr);
33833391
assert(mapped_ptr != nullptr);
33843392
assert(memobj != nullptr);
33853393
assert(memobj->get_map_ptr() != nullptr);
@@ -3393,9 +3401,15 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
33933401
retEvent);
33943402
} else {
33953403
if (retEvent) {
3396-
*retEvent = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_UNMAP,
3397-
command_queue);
3398-
(*retEvent)->record();
3404+
try {
3405+
ScopedContext active(command_queue->get_context());
3406+
3407+
*retEvent = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_UNMAP,
3408+
command_queue);
3409+
(*retEvent)->record();
3410+
} catch (pi_result error) {
3411+
ret_err = error;
3412+
}
33993413
}
34003414
}
34013415

sycl/source/detail/context_impl.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,9 @@ context_impl::context_impl(const vector_class<cl::sycl::device> Devices,
4343

4444
if (MPlatform->is_cuda()) {
4545
#if USE_PI_CUDA
46-
const pi_context_properties props[] = {PI_CONTEXT_PROPERTIES_CUDA_PRIMARY,
47-
UseCUDAPrimaryContext, 0};
46+
const pi_context_properties props[] = {
47+
static_cast<pi_context_properties>(PI_CONTEXT_PROPERTIES_CUDA_PRIMARY),
48+
static_cast<pi_context_properties>(UseCUDAPrimaryContext), 0};
4849

4950
getPlugin().call<PiApiKind::piContextCreate>(props, DeviceIds.size(),
5051
DeviceIds.data(), nullptr, nullptr, &MContext);

sycl/source/detail/queue_impl.hpp

+9-1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ using DeviceImplPtr = shared_ptr_class<detail::device_impl>;
3333
/// Sets max number of queues supported by FPGA RT.
3434
const size_t MaxNumQueues = 256;
3535

36+
//// Possible CUDA context types supported by PI CUDA backend
37+
/// TODO: Implement this as a property once there is an extension document
38+
enum class cuda_context_type : char { primary, custom };
39+
40+
/// Default context type created for CUDA backend
41+
constexpr cuda_context_type DefaultContextType = cuda_context_type::custom;
42+
3643
enum QueueOrder { Ordered, OOO };
3744

3845
class queue_impl {
@@ -50,7 +57,8 @@ class queue_impl {
5057
const property_list &PropList)
5158
: queue_impl(Device,
5259
detail::getSyclObjImpl(context(
53-
createSyclObjFromImpl<device>(Device), {}, true)),
60+
createSyclObjFromImpl<device>(Device), {},
61+
(DefaultContextType == cuda_context_type::primary))),
5462
AsyncHandler, Order, PropList){};
5563

5664
/// Constructs a SYCL queue with an async_handler and property_list provided

0 commit comments

Comments
 (0)