Skip to content

Commit 5d560b6

Browse files
committed
[Libomptarget] Implement the host memory allocator with fine grained memory
This patch should enable the "Host" allocation using fine-grained memory. As far as I understand, this is HSA managed memory that is availible to the host, but can be accessed by the device as well. The original patch that introduced these extensions just stipulated that it's "non-migratable" memory, which is most likely true because it's managed by the host but accessible by the device. This should work sufficiently well for what we expect the "host" allocation to do. Depends on D143771 Reviewed By: kevinsala Differential Revision: https://reviews.llvm.org/D143775
1 parent 5216a9b commit 5d560b6

File tree

3 files changed

+9
-3
lines changed

3 files changed

+9
-3
lines changed

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1485,6 +1485,12 @@ struct AMDHostDeviceTy : public AMDGenericDeviceTy {
14851485
return *FineGrainedMemoryPools[0];
14861486
}
14871487

1488+
AMDGPUMemoryPoolTy &getCoarseGrainedMemoryPool() {
1489+
assert(!CoarseGrainedMemoryPools.empty() && "No coarse-grained mempool");
1490+
// Retrive any memory pool.
1491+
return *CoarseGrainedMemoryPools[0];
1492+
}
1493+
14881494
/// Get a memory pool for kernel args allocations.
14891495
AMDGPUMemoryPoolTy &getArgsMemoryPool() {
14901496
assert(!ArgsMemoryPools.empty() && "No kernelargs mempool");
@@ -1762,6 +1768,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
17621768
MemoryPool = CoarseGrainedMemoryPools[0];
17631769
break;
17641770
case TARGET_ALLOC_HOST:
1771+
MemoryPool = &HostDevice.getFineGrainedMemoryPool();
17651772
break;
17661773
case TARGET_ALLOC_SHARED:
17671774
MemoryPool = &HostDevice.getFineGrainedMemoryPool();
@@ -2623,6 +2630,7 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
26232630
MemoryPool = CoarseGrainedMemoryPools[0];
26242631
break;
26252632
case TARGET_ALLOC_HOST:
2633+
MemoryPool = &HostDevice.getFineGrainedMemoryPool();
26262634
break;
26272635
case TARGET_ALLOC_SHARED:
26282636
MemoryPool = &HostDevice.getFineGrainedMemoryPool();

openmp/libomptarget/test/api/omp_host_pinned_memory.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
// RUN: %libomptarget-compile-run-and-check-generic
2-
// UNSUPPORTED: amdgcn-amd-amdhsa
32

43
#include <omp.h>
54
#include <stdio.h>

openmp/libomptarget/test/api/omp_host_pinned_memory_alloc.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
// RUN: %libomptarget-compile-run-and-check-generic
2-
// UNSUPPORTED: amdgcn-amd-amdhsa
32

43
#include <omp.h>
54
#include <stdio.h>
@@ -20,7 +19,7 @@ int main() {
2019
for (int i = 0; i < N; ++i)
2120
sum += hst_ptr[i];
2221

23-
omp_free(hst_ptr, llvm_omp_target_shared_mem_alloc);
22+
omp_free(hst_ptr, llvm_omp_target_host_mem_alloc);
2423
// CHECK: PASS
2524
if (sum == N)
2625
printf("PASS\n");

0 commit comments

Comments
 (0)