@@ -57,6 +57,27 @@ bool IsSharedPointer(ur_context_handle_t Context, const void *Ptr) {
57
57
return (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_SHARED);
58
58
}
59
59
60
+ // Helper Function to check if the Copy Engine should be preferred given the
61
+ // types of memory used.
62
+ bool PreferCopyEngineUsage (ur_device_handle_t Device,
63
+ ur_context_handle_t Context, const void *Src,
64
+ void *Dst) {
65
+ bool PreferCopyEngine = false ;
66
+ // Given Integrated Devices, Copy Engines are not preferred for any Copy
67
+ // operations.
68
+ if (!Device->isIntegrated ()) {
69
+ // Given non D2D Copies, for better performance, Copy Engines are preferred
70
+ // only if one has both the Main and Link Copy Engines.
71
+ if (Device->hasLinkCopyEngine () && Device->hasMainCopyEngine () &&
72
+ (!IsDevicePointer (Context, Src) || !IsDevicePointer (Context, Dst))) {
73
+ PreferCopyEngine = true ;
74
+ }
75
+ }
76
+ // Temporary option added to use force engine for D2D copy
77
+ PreferCopyEngine |= UseCopyEngineForD2DCopy;
78
+ return PreferCopyEngine;
79
+ }
80
+
60
81
// Shared by all memory read/write/copy PI interfaces.
61
82
// PI interfaces must have queue's and destination buffer's mutexes locked for
62
83
// exclusive use and source buffer's mutex locked for shared use on entry.
@@ -1189,23 +1210,10 @@ ur_result_t urEnqueueUSMMemcpy(
1189
1210
) {
1190
1211
std::scoped_lock<ur_shared_mutex> lock (Queue->Mutex );
1191
1212
1192
- // Device to Device copies are found to execute slower on copy engine
1193
- // (versus compute engine).
1194
- bool PreferCopyEngine = !IsDevicePointer (Queue->Context , Src) ||
1195
- !IsDevicePointer (Queue->Context , Dst);
1196
- // For better performance, Copy Engines are not preferred given Shared
1197
- // pointers on DG2.
1198
- if (Queue->Device ->isDG2 () && (IsSharedPointer (Queue->Context , Src) ||
1199
- IsSharedPointer (Queue->Context , Dst))) {
1200
- PreferCopyEngine = false ;
1201
- }
1202
-
1203
- // Temporary option added to use copy engine for D2D copy
1204
- PreferCopyEngine |= UseCopyEngineForD2DCopy;
1205
-
1206
1213
return enqueueMemCopyHelper ( // TODO: do we need a new command type for this?
1207
1214
UR_COMMAND_MEM_BUFFER_COPY, Queue, Dst, Blocking, Size, Src,
1208
- NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine);
1215
+ NumEventsInWaitList, EventWaitList, OutEvent,
1216
+ PreferCopyEngineUsage (Queue->Device , Queue->Context , Src, Dst));
1209
1217
}
1210
1218
1211
1219
ur_result_t urEnqueueUSMPrefetch (
@@ -1396,26 +1404,13 @@ ur_result_t urEnqueueUSMMemcpy2D(
1396
1404
1397
1405
std::scoped_lock<ur_shared_mutex> lock (Queue->Mutex );
1398
1406
1399
- // Device to Device copies are found to execute slower on copy engine
1400
- // (versus compute engine).
1401
- bool PreferCopyEngine = !IsDevicePointer (Queue->Context , Src) ||
1402
- !IsDevicePointer (Queue->Context , Dst);
1403
- // For better performance, Copy Engines are not preferred given Shared
1404
- // pointers on DG2.
1405
- if (Queue->Device ->isDG2 () && (IsSharedPointer (Queue->Context , Src) ||
1406
- IsSharedPointer (Queue->Context , Dst))) {
1407
- PreferCopyEngine = false ;
1408
- }
1409
-
1410
- // Temporary option added to use copy engine for D2D copy
1411
- PreferCopyEngine |= UseCopyEngineForD2DCopy;
1412
-
1413
1407
return enqueueMemCopyRectHelper ( // TODO: do we need a new command type for
1414
1408
// this?
1415
1409
UR_COMMAND_MEM_BUFFER_COPY_RECT, Queue, Src, Dst, ZeroOffset, ZeroOffset,
1416
1410
Region, SrcPitch, DstPitch, 0 , /* SrcSlicePitch=*/
1417
1411
0 , /* DstSlicePitch=*/
1418
- Blocking, NumEventsInWaitList, EventWaitList, Event, PreferCopyEngine);
1412
+ Blocking, NumEventsInWaitList, EventWaitList, Event,
1413
+ PreferCopyEngineUsage (Queue->Device , Queue->Context , Src, Dst));
1419
1414
}
1420
1415
1421
1416
static ur_result_t ur2zeImageDesc (const ur_image_format_t *ImageFormat,
0 commit comments