Skip to content

Commit 5216a9b

Browse files
committed
[Libmoptarget] Enable the shared allocator for AMDGPU
Currently, the AMDGPU plugin did not support the `TARGET_ALLOC_SHARED` allocation kind. We used the fine-grained memory allocator for the "host" alloc when this is most likely not what is intended. Fine-grained memory can be accessed by all agents, so it should be considered shared. This patch removes the use of fine-grained memory for the host allocator. A later patch will add support for this via the `hsa_amd_memory_lock` method. Reviewed By: kevinsala Differential Revision: https://reviews.llvm.org/D143771
1 parent fa1eb2e commit 5216a9b

File tree

4 files changed

+11
-16
lines changed

4 files changed

+11
-16
lines changed

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
#endif
5151
#else
5252
#include "hsa/hsa.h"
53-
#include "hsa_ext_amd.h"
53+
#include "hsa/hsa_ext_amd.h"
5454
#endif
5555

5656
namespace llvm {
@@ -1438,7 +1438,7 @@ struct AMDHostDeviceTy : public AMDGenericDeviceTy {
14381438
if (auto Err = ArgsMemoryManager.init(getArgsMemoryPool()))
14391439
return Err;
14401440

1441-
if (auto Err = PinnedMemoryManager.init(getHostMemoryPool()))
1441+
if (auto Err = PinnedMemoryManager.init(getFineGrainedMemoryPool()))
14421442
return Err;
14431443

14441444
return Plugin::success();
@@ -1478,8 +1478,8 @@ struct AMDHostDeviceTy : public AMDGenericDeviceTy {
14781478
/// Get one of the host agents. Return always the first agent.
14791479
hsa_agent_t getAgent() const override { return Agents[0]; }
14801480

1481-
/// Get a memory pool for host pinned allocations.
1482-
AMDGPUMemoryPoolTy &getHostMemoryPool() {
1481+
/// Get a memory pool for fine-grained allocations.
1482+
AMDGPUMemoryPoolTy &getFineGrainedMemoryPool() {
14831483
assert(!FineGrainedMemoryPools.empty() && "No fine-grained mempool");
14841484
// Retrive any memory pool.
14851485
return *FineGrainedMemoryPools[0];
@@ -1762,12 +1762,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
17621762
MemoryPool = CoarseGrainedMemoryPools[0];
17631763
break;
17641764
case TARGET_ALLOC_HOST:
1765-
MemoryPool = &HostDevice.getHostMemoryPool();
17661765
break;
17671766
case TARGET_ALLOC_SHARED:
1768-
// TODO: Not supported yet. We could look at fine-grained host memory
1769-
// pools that are accessible by this device. The allocation should be made
1770-
// explicitly accessible if it is not yet.
1767+
MemoryPool = &HostDevice.getFineGrainedMemoryPool();
17711768
break;
17721769
}
17731770

@@ -2626,12 +2623,9 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
26262623
MemoryPool = CoarseGrainedMemoryPools[0];
26272624
break;
26282625
case TARGET_ALLOC_HOST:
2629-
MemoryPool = &HostDevice.getHostMemoryPool();
26302626
break;
26312627
case TARGET_ALLOC_SHARED:
2632-
// TODO: Not supported yet. We could look at fine-grained host memory
2633-
// pools that are accessible by this device. The allocation should be made
2634-
// explicitly accessible if it is not yet.
2628+
MemoryPool = &HostDevice.getFineGrainedMemoryPool();
26352629
break;
26362630
}
26372631

@@ -2647,10 +2641,10 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
26472641
return nullptr;
26482642
}
26492643

2650-
if (Kind == TARGET_ALLOC_HOST && Alloc) {
2644+
if (Alloc && (Kind == TARGET_ALLOC_HOST || Kind == TARGET_ALLOC_SHARED)) {
26512645
auto &KernelAgents = Plugin::get<AMDGPUPluginTy>().getKernelAgents();
26522646

2653-
// Enable all kernel agents to access the host pinned buffer.
2647+
// Enable all kernel agents to access the buffer.
26542648
if (auto Err = MemoryPool->enableAccess(Alloc, Size, KernelAgents)) {
26552649
REPORT("%s\n", toString(std::move(Err)).data());
26562650
return nullptr;

openmp/libomptarget/test/api/omp_device_managed_memory.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
2-
// REQUIRES: nvptx64-nvidia-cuda
1+
// RUN: %libomptarget-compile-run-and-check-generic
32

43
#include <omp.h>
54
#include <stdio.h>

openmp/libomptarget/test/api/omp_host_pinned_memory.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// RUN: %libomptarget-compile-run-and-check-generic
2+
// UNSUPPORTED: amdgcn-amd-amdhsa
23

34
#include <omp.h>
45
#include <stdio.h>

openmp/libomptarget/test/api/omp_host_pinned_memory_alloc.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// RUN: %libomptarget-compile-run-and-check-generic
2+
// UNSUPPORTED: amdgcn-amd-amdhsa
23

34
#include <omp.h>
45
#include <stdio.h>

0 commit comments

Comments
 (0)