Skip to content

Commit f47d1b6

Browse files
authored
Merge pull request #2228 from nrspruit/copy_engine_refactor
[L0] Refactor Copy Engine Usage checks for Performance
2 parents 5df5530 + 781e576 commit f47d1b6

File tree

3 files changed

+31
-42
lines changed

3 files changed

+31
-42
lines changed

source/adapters/level_zero/command_buffer.cpp

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -784,19 +784,10 @@ ur_result_t urCommandBufferAppendUSMMemcpyExp(
784784
std::ignore = Event;
785785
std::ignore = Command;
786786

787-
bool PreferCopyEngine = !IsDevicePointer(CommandBuffer->Context, Src) ||
788-
!IsDevicePointer(CommandBuffer->Context, Dst);
789-
// For better performance, Copy Engines are not preferred given Shared
790-
// pointers on DG2.
791-
if (CommandBuffer->Device->isDG2() &&
792-
(IsSharedPointer(CommandBuffer->Context, Src) ||
793-
IsSharedPointer(CommandBuffer->Context, Dst))) {
794-
PreferCopyEngine = false;
795-
}
796-
PreferCopyEngine |= UseCopyEngineForD2DCopy;
797-
798787
return enqueueCommandBufferMemCopyHelper(
799-
UR_COMMAND_USM_MEMCPY, CommandBuffer, Dst, Src, Size, PreferCopyEngine,
788+
UR_COMMAND_USM_MEMCPY, CommandBuffer, Dst, Src, Size,
789+
PreferCopyEngineUsage(CommandBuffer->Device, CommandBuffer->Context, Src,
790+
Dst),
800791
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
801792
}
802793

source/adapters/level_zero/memory.cpp

Lines changed: 25 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,27 @@ bool IsSharedPointer(ur_context_handle_t Context, const void *Ptr) {
5757
return (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_SHARED);
5858
}
5959

60+
// Helper Function to check if the Copy Engine should be preferred given the
61+
// types of memory used.
62+
bool PreferCopyEngineUsage(ur_device_handle_t Device,
63+
ur_context_handle_t Context, const void *Src,
64+
void *Dst) {
65+
bool PreferCopyEngine = false;
66+
// Given Integrated Devices, Copy Engines are not preferred for any Copy
67+
// operations.
68+
if (!Device->isIntegrated()) {
69+
// Given non D2D Copies, for better performance, Copy Engines are preferred
70+
// only if one has both the Main and Link Copy Engines.
71+
if (Device->hasLinkCopyEngine() && Device->hasMainCopyEngine() &&
72+
(!IsDevicePointer(Context, Src) || !IsDevicePointer(Context, Dst))) {
73+
PreferCopyEngine = true;
74+
}
75+
}
76+
// Temporary option added to use force engine for D2D copy
77+
PreferCopyEngine |= UseCopyEngineForD2DCopy;
78+
return PreferCopyEngine;
79+
}
80+
6081
// Shared by all memory read/write/copy PI interfaces.
6182
// PI interfaces must have queue's and destination buffer's mutexes locked for
6283
// exclusive use and source buffer's mutex locked for shared use on entry.
@@ -1189,23 +1210,10 @@ ur_result_t urEnqueueUSMMemcpy(
11891210
) {
11901211
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
11911212

1192-
// Device to Device copies are found to execute slower on copy engine
1193-
// (versus compute engine).
1194-
bool PreferCopyEngine = !IsDevicePointer(Queue->Context, Src) ||
1195-
!IsDevicePointer(Queue->Context, Dst);
1196-
// For better performance, Copy Engines are not preferred given Shared
1197-
// pointers on DG2.
1198-
if (Queue->Device->isDG2() && (IsSharedPointer(Queue->Context, Src) ||
1199-
IsSharedPointer(Queue->Context, Dst))) {
1200-
PreferCopyEngine = false;
1201-
}
1202-
1203-
// Temporary option added to use copy engine for D2D copy
1204-
PreferCopyEngine |= UseCopyEngineForD2DCopy;
1205-
12061213
return enqueueMemCopyHelper( // TODO: do we need a new command type for this?
12071214
UR_COMMAND_MEM_BUFFER_COPY, Queue, Dst, Blocking, Size, Src,
1208-
NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine);
1215+
NumEventsInWaitList, EventWaitList, OutEvent,
1216+
PreferCopyEngineUsage(Queue->Device, Queue->Context, Src, Dst));
12091217
}
12101218

12111219
ur_result_t urEnqueueUSMPrefetch(
@@ -1396,26 +1404,13 @@ ur_result_t urEnqueueUSMMemcpy2D(
13961404

13971405
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
13981406

1399-
// Device to Device copies are found to execute slower on copy engine
1400-
// (versus compute engine).
1401-
bool PreferCopyEngine = !IsDevicePointer(Queue->Context, Src) ||
1402-
!IsDevicePointer(Queue->Context, Dst);
1403-
// For better performance, Copy Engines are not preferred given Shared
1404-
// pointers on DG2.
1405-
if (Queue->Device->isDG2() && (IsSharedPointer(Queue->Context, Src) ||
1406-
IsSharedPointer(Queue->Context, Dst))) {
1407-
PreferCopyEngine = false;
1408-
}
1409-
1410-
// Temporary option added to use copy engine for D2D copy
1411-
PreferCopyEngine |= UseCopyEngineForD2DCopy;
1412-
14131407
return enqueueMemCopyRectHelper( // TODO: do we need a new command type for
14141408
// this?
14151409
UR_COMMAND_MEM_BUFFER_COPY_RECT, Queue, Src, Dst, ZeroOffset, ZeroOffset,
14161410
Region, SrcPitch, DstPitch, 0, /*SrcSlicePitch=*/
14171411
0, /*DstSlicePitch=*/
1418-
Blocking, NumEventsInWaitList, EventWaitList, Event, PreferCopyEngine);
1412+
Blocking, NumEventsInWaitList, EventWaitList, Event,
1413+
PreferCopyEngineUsage(Queue->Device, Queue->Context, Src, Dst));
14191414
}
14201415

14211416
static ur_result_t ur2zeImageDesc(const ur_image_format_t *ImageFormat,

source/adapters/level_zero/memory.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ struct ur_device_handle_t_;
3030

3131
bool IsDevicePointer(ur_context_handle_t Context, const void *Ptr);
3232
bool IsSharedPointer(ur_context_handle_t Context, const void *Ptr);
33+
bool PreferCopyEngineUsage(ur_device_handle_t Device,
34+
ur_context_handle_t Context, const void *Src,
35+
void *Dst);
3336

3437
// This is an experimental option to test performance of device to device copy
3538
// operations on copy engines (versus compute engine)

0 commit comments

Comments
 (0)