Skip to content

Commit 2c4303c

Browse files
authored
Merge pull request #1414 from GeorgeWeb/georgi/hip-fences
[HIP] Enable more ordering and scope capabilities for atomic fences
2 parents abe85cc + 3e011c7 commit 2c4303c

File tree

1 file changed

+20
-2
lines changed

1 file changed

+20
-2
lines changed

source/adapters/hip/device.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -782,8 +782,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
782782
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE;
783783
return ReturnValue(Capabilities);
784784
}
785-
case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
786-
case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: {
785+
case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
787786
// SYCL2020 4.6.4.2 minimum mandated capabilities for
788787
// atomic_fence/memory_scope_capabilities.
789788
// Because scopes are hierarchical, wider scopes support all narrower
@@ -795,6 +794,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
795794
UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP;
796795
return ReturnValue(Capabilities);
797796
}
797+
case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: {
798+
constexpr ur_memory_scope_capability_flags_t Capabilities =
799+
UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM |
800+
UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP |
801+
UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP |
802+
UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE |
803+
UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM;
804+
return ReturnValue(Capabilities);
805+
}
798806
case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: {
799807
// SYCL2020 4.6.4.2 minimum mandated capabilities for
800808
// atomic_fence_order_capabilities.
@@ -803,6 +811,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
803811
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE |
804812
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE |
805813
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL;
814+
#ifdef __HIP_PLATFORM_NVIDIA__
815+
// Nvidia introduced fence.sc for seq_cst only since SM 7.0.
816+
int Major = 0;
817+
UR_CHECK_ERROR(hipDeviceGetAttribute(
818+
&Major, hipDeviceAttributeComputeCapabilityMajor, hDevice->get()));
819+
if (Major >= 7)
820+
Capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST;
821+
#else
822+
Capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST;
823+
#endif
806824
return ReturnValue(Capabilities);
807825
}
808826
case UR_DEVICE_INFO_DEVICE_ID: {

0 commit comments

Comments
 (0)