Skip to content

Commit 0e32bb5

Browse files
authored
Merge pull request #1415 from GeorgeWeb/georgi/hip-atomic-memory-caps
[HIP] Enable more ordering and scope capabilities for atomic memory ops
2 parents 2c4303c + 08b19b2 commit 0e32bb5

File tree

1 file changed

+24
-7
lines changed

1 file changed

+24
-7
lines changed

source/adapters/hip/device.cpp

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -779,19 +779,36 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
779779
ur_memory_order_capability_flags_t Capabilities =
780780
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED |
781781
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE |
782-
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE;
782+
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE |
783+
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL;
784+
#if __HIP_PLATFORM_NVIDIA__
785+
// Nvidia introduced fence.sc for seq_cst only since SM 7.0.
786+
int Major = 0;
787+
UR_CHECK_ERROR(hipDeviceGetAttribute(
788+
&Major, hipDeviceAttributeComputeCapabilityMajor, hDevice->get()));
789+
if (Major >= 7)
790+
Capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST;
791+
#else
792+
Capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST;
793+
#endif
783794
return ReturnValue(Capabilities);
784795
}
785796
case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
786-
// SYCL2020 4.6.4.2 minimum mandated capabilities for
787-
// atomic_fence/memory_scope_capabilities.
788-
// Because scopes are hierarchical, wider scopes support all narrower
789-
// scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and
790-
// WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382)
791797
ur_memory_scope_capability_flags_t Capabilities =
792798
UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM |
793799
UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP |
794-
UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP;
800+
UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP |
801+
UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE;
802+
#if __HIP_PLATFORM_NVIDIA__
803+
// Nvidia introduced system scope atomics only since SM 6.0.
804+
int Major = 0;
805+
UR_CHECK_ERROR(hipDeviceGetAttribute(
806+
&Major, hipDeviceAttributeComputeCapabilityMajor, hDevice->get()));
807+
if (Major >= 6)
808+
Capabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM;
809+
#else
810+
Capabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM;
811+
#endif
795812
return ReturnValue(Capabilities);
796813
}
797814
case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: {

0 commit comments

Comments
 (0)