Skip to content

Commit a69e1b5

Browse files
Merge pull request #1906 from nrspruit/flex_gpu_copy_engine
[L0] Add check for Intel Flex/Arc for disabling use of copy engines.
2 parents 6539561 + b816700 commit a69e1b5

File tree

5 files changed

+46
-0
lines changed

5 files changed

+46
-0
lines changed

source/adapters/level_zero/command_buffer.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -921,6 +921,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp(
921921

922922
bool PreferCopyEngine = !IsDevicePointer(CommandBuffer->Context, Src) ||
923923
!IsDevicePointer(CommandBuffer->Context, Dst);
924+
// For better performance, Copy Engines are not preferred given Shared
925+
// pointers on DG2.
926+
if (CommandBuffer->Device->isDG2() &&
927+
(IsSharedPointer(CommandBuffer->Context, Src) ||
928+
IsSharedPointer(CommandBuffer->Context, Dst))) {
929+
PreferCopyEngine = false;
930+
}
924931
PreferCopyEngine |= UseCopyEngineForD2DCopy;
925932

926933
return enqueueCommandBufferMemCopyHelper(

source/adapters/level_zero/device.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,9 @@ struct ur_device_handle_t_ : _ur_object {
189189
(ZeDeviceProperties->deviceId & 0xff0) == 0xb60;
190190
}
191191

192+
// Checks if this GPU is an Intel Flex GPU or Intel Arc Alchemist
193+
bool isDG2() { return (ZeDeviceProperties->deviceId & 0xff00) == 0x5600; }
194+
192195
bool isIntegrated() {
193196
return (ZeDeviceProperties->flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED);
194197
}

source/adapters/level_zero/kernel.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,11 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableWrite(
613613
// Copy engine is preferred only for host to device transfer.
614614
// Device to device transfers run faster on compute engines.
615615
bool PreferCopyEngine = !IsDevicePointer(Queue->Context, Src);
616+
// For better performance, Copy Engines are not preferred given Shared
617+
// pointers on DG2.
618+
if (Queue->Device->isDG2() && IsSharedPointer(Queue->Context, Src)) {
619+
PreferCopyEngine = false;
620+
}
616621

617622
// Temporary option added to use copy engine for D2D copy
618623
PreferCopyEngine |= UseCopyEngineForD2DCopy;
@@ -663,6 +668,11 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableRead(
663668
// Copy engine is preferred only for host to device transfer.
664669
// Device to device transfers run faster on compute engines.
665670
bool PreferCopyEngine = !IsDevicePointer(Queue->Context, Dst);
671+
// For better performance, Copy Engines are not preferred given Shared
672+
// pointers on DG2.
673+
if (Queue->Device->isDG2() && IsSharedPointer(Queue->Context, Dst)) {
674+
PreferCopyEngine = false;
675+
}
666676

667677
// Temporary option added to use copy engine for D2D copy
668678
PreferCopyEngine |= UseCopyEngineForD2DCopy;

source/adapters/level_zero/memory.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,19 @@ bool IsDevicePointer(ur_context_handle_t Context, const void *Ptr) {
4242
return (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_DEVICE);
4343
}
4444

45+
// Helper function to check if a pointer is a shared pointer.
46+
bool IsSharedPointer(ur_context_handle_t Context, const void *Ptr) {
47+
ze_device_handle_t ZeDeviceHandle;
48+
ZeStruct<ze_memory_allocation_properties_t> ZeMemoryAllocationProperties;
49+
50+
// Query memory type of the pointer
51+
ZE2UR_CALL(zeMemGetAllocProperties,
52+
(Context->ZeContext, Ptr, &ZeMemoryAllocationProperties,
53+
&ZeDeviceHandle));
54+
55+
return (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_SHARED);
56+
}
57+
4558
// Shared by all memory read/write/copy PI interfaces.
4659
// PI interfaces must have queue's and destination buffer's mutexes locked for
4760
// exclusive use and source buffer's mutex locked for shared use on entry.
@@ -1191,6 +1204,12 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy(
11911204
// (versus compute engine).
11921205
bool PreferCopyEngine = !IsDevicePointer(Queue->Context, Src) ||
11931206
!IsDevicePointer(Queue->Context, Dst);
1207+
// For better performance, Copy Engines are not preferred given Shared
1208+
// pointers on DG2.
1209+
if (Queue->Device->isDG2() && (IsSharedPointer(Queue->Context, Src) ||
1210+
IsSharedPointer(Queue->Context, Dst))) {
1211+
PreferCopyEngine = false;
1212+
}
11941213

11951214
// Temporary option added to use copy engine for D2D copy
11961215
PreferCopyEngine |= UseCopyEngineForD2DCopy;
@@ -1390,6 +1409,12 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy2D(
13901409
// (versus compute engine).
13911410
bool PreferCopyEngine = !IsDevicePointer(Queue->Context, Src) ||
13921411
!IsDevicePointer(Queue->Context, Dst);
1412+
// For better performance, Copy Engines are not preferred given Shared
1413+
// pointers on DG2.
1414+
if (Queue->Device->isDG2() && (IsSharedPointer(Queue->Context, Src) ||
1415+
IsSharedPointer(Queue->Context, Dst))) {
1416+
PreferCopyEngine = false;
1417+
}
13931418

13941419
// Temporary option added to use copy engine for D2D copy
13951420
PreferCopyEngine |= UseCopyEngineForD2DCopy;

source/adapters/level_zero/memory.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ using ur_queue_handle_legacy_t = ur_queue_handle_legacy_t_ *;
3232
struct ur_device_handle_t_;
3333

3434
bool IsDevicePointer(ur_context_handle_t Context, const void *Ptr);
35+
bool IsSharedPointer(ur_context_handle_t Context, const void *Ptr);
3536

3637
// This is an experimental option to test performance of device to device copy
3738
// operations on copy engines (versus compute engine)

0 commit comments

Comments
 (0)