Skip to content

Commit f0580fd

Browse files
committed
rcache/grdma: Replace cuda functions with accelerator functions
Signed-off-by: William Zhang <wilzhang@amazon.com>
1 parent 0d122f7 commit f0580fd

File tree

4 files changed

+40
-32
lines changed

4 files changed

+40
-32
lines changed

opal/class/opal_free_list.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ static void opal_free_list_construct(opal_free_list_t *fl)
5252
fl->fl_mpool = NULL;
5353
fl->fl_rcache = NULL;
5454
/* default flags */
55-
fl->fl_rcache_reg_flags = MCA_RCACHE_FLAGS_CACHE_BYPASS | MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM;
55+
fl->fl_rcache_reg_flags = MCA_RCACHE_FLAGS_CACHE_BYPASS | MCA_RCACHE_FLAGS_ACCELERATOR_REGISTER_MEM;
5656
fl->ctx = NULL;
5757
OBJ_CONSTRUCT(&(fl->fl_allocations), opal_list_t);
5858
}
@@ -190,7 +190,7 @@ int opal_free_list_grow_st(opal_free_list_t *flist, size_t num_elements,
190190
buffer_size = num_elements * elem_size;
191191
align = flist->fl_payload_buffer_alignment;
192192

193-
if (MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM & flist->fl_rcache_reg_flags) {
193+
if (MCA_RCACHE_FLAGS_ACCELERATOR_REGISTER_MEM & flist->fl_rcache_reg_flags) {
194194
size_t pagesize = opal_getpagesize();
195195
/* CUDA cannot handle registering overlapping regions, so make
196196
* sure each region is page sized and page aligned. */

opal/mca/btl/smcuda/btl_smcuda.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1006,7 +1006,7 @@ mca_btl_smcuda_register_mem(struct mca_btl_base_module_t *btl,
10061006

10071007
#if OPAL_CUDA_GDR_SUPPORT
10081008
if (MCA_BTL_REG_FLAG_CUDA_GPU_MEM & flags) {
1009-
rcache_flags |= MCA_RCACHE_FLAGS_CUDA_GPU_MEM;
1009+
rcache_flags |= MCA_RCACHE_FLAGS_ACCELERATOR_MEM;
10101010
}
10111011
#endif
10121012

opal/mca/rcache/grdma/rcache_grdma_module.c

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,6 @@
4242
#include "opal/mca/rcache/base/base.h"
4343
#include "opal/mca/rcache/rcache.h"
4444
#include "opal/mca/accelerator/accelerator.h"
45-
#if OPAL_CUDA_GDR_SUPPORT
46-
#include "opal/cuda/common_cuda.h"
47-
#endif /* OPAL_CUDA_GDR_SUPPORT */
4845
#include "opal/align.h"
4946
#include "opal/util/sys_limits.h"
5047
#include "rcache_grdma.h"
@@ -61,6 +58,7 @@ static int mca_rcache_grdma_invalidate_range(mca_rcache_base_module_t *rcache, v
6158
static void mca_rcache_grdma_finalize(mca_rcache_base_module_t *rcache);
6259
static bool mca_rcache_grdma_evict(mca_rcache_base_module_t *rcache);
6360
static int mca_rcache_grdma_add_to_gc(mca_rcache_base_registration_t *grdma_reg);
61+
static int check_for_accelerator_freed_memory(mca_rcache_base_module_t *rcache, void *addr, size_t size);
6462

6563
static inline bool registration_flags_cacheable(uint32_t flags)
6664
{
@@ -75,9 +73,6 @@ static inline bool registration_is_cacheable(mca_rcache_base_registration_t *reg
7573
return registration_flags_cacheable(reg->flags);
7674
}
7775

78-
#if OPAL_CUDA_GDR_SUPPORT
79-
static int check_for_cuda_freed_memory(mca_rcache_base_module_t *rcache, void *addr, size_t size);
80-
#endif /* OPAL_CUDA_GDR_SUPPORT */
8176
static void mca_rcache_grdma_cache_contructor(mca_rcache_grdma_cache_t *cache)
8277
{
8378
memset((void *) ((uintptr_t) cache + sizeof(cache->super)), 0,
@@ -328,8 +323,7 @@ static int mca_rcache_grdma_register(mca_rcache_base_module_t *rcache, void *add
328323
base = OPAL_DOWN_ALIGN_PTR(addr, page_size, unsigned char *);
329324
bound = OPAL_ALIGN_PTR((intptr_t) addr + size, page_size, unsigned char *) - 1;
330325

331-
#if OPAL_CUDA_GDR_SUPPORT
332-
if (flags & MCA_RCACHE_FLAGS_CUDA_GPU_MEM) {
326+
if (flags & MCA_RCACHE_FLAGS_ACCELERATOR_MEM) {
333327
size_t psize;
334328
int res = opal_accelerator.get_address_range(MCA_ACCELERATOR_NO_DEVICE_ID, addr, (void **)&base, &psize);
335329
if (OPAL_SUCCESS != res) {
@@ -338,9 +332,8 @@ static int mca_rcache_grdma_register(mca_rcache_base_module_t *rcache, void *add
338332
bound = base + psize - 1;
339333
/* Check to see if this memory is in the cache and if it has been freed. If so,
340334
* this call will boot it out of the cache. */
341-
check_for_cuda_freed_memory(rcache, base, psize);
335+
check_for_accelerator_freed_memory(rcache, base, psize);
342336
}
343-
#endif /* OPAL_CUDA_GDR_SUPPORT */
344337

345338
do_unregistration_gc(rcache);
346339

@@ -378,11 +371,9 @@ static int mca_rcache_grdma_register(mca_rcache_base_module_t *rcache, void *add
378371
grdma_reg->flags = flags;
379372
grdma_reg->access_flags = access_flags;
380373
grdma_reg->ref_count = 1;
381-
#if OPAL_CUDA_GDR_SUPPORT
382-
if (flags & MCA_RCACHE_FLAGS_CUDA_GPU_MEM) {
383-
mca_common_cuda_get_buffer_id(grdma_reg);
374+
if (flags & MCA_RCACHE_FLAGS_ACCELERATOR_MEM) {
375+
opal_accelerator.get_buffer_id(MCA_ACCELERATOR_NO_DEVICE_ID, grdma_reg->base, &grdma_reg->gpu_bufID);
384376
}
385-
#endif /* OPAL_CUDA_GDR_SUPPORT */
386377

387378
while (OPAL_ERR_OUT_OF_RESOURCE
388379
== (rc = rcache_grdma->resources.register_mem(rcache_grdma->resources.reg_data, base,
@@ -538,15 +529,34 @@ static int mca_rcache_grdma_invalidate_range(mca_rcache_base_module_t *rcache, v
538529
&args);
539530
}
540531

532+
/* Check to see if the memory was freed between the time it was stored in
533+
* the registration cache and now. Return true if the memory was previously
534+
* freed. This is indicated by the BUFFER_ID value in the registration cache
535+
* not matching the BUFFER_ID of the buffer we are checking. Return false
536+
* if the registration is still good.
537+
*/
538+
static bool mca_rcache_accelerator_previously_freed_memory(mca_rcache_base_registration_t *reg)
539+
{
540+
int res;
541+
opal_accelerator_buffer_id_t buf_id;
542+
unsigned char *dbuf = reg->base;
543+
opal_accelerator.get_buffer_id(MCA_ACCELERATOR_NO_DEVICE_ID, dbuf, &buf_id);
544+
if (OPAL_UNLIKELY(res != OPAL_SUCCESS)) {
545+
return true;
546+
}
547+
if (buf_id != reg->gpu_bufID) {
548+
return true;
549+
} else {
550+
return false;
551+
}
552+
}
553+
541554
/* Make sure this registration request is not stale. In other words, ensure
542555
* that we do not have a cuMemAlloc, cuMemFree, cuMemAlloc state. If we do
543556
* kick out the regisrations and deregister. This function needs to be called
544557
* with the rcache->vma_module->vma_lock held. */
545-
#if OPAL_CUDA_GDR_SUPPORT
546-
547-
static int check_for_cuda_freed_memory(mca_rcache_base_module_t *rcache, void *addr, size_t size)
558+
static int check_for_accelerator_freed_memory(mca_rcache_base_module_t *rcache, void *addr, size_t size)
548559
{
549-
unsigned long long buf_id;
550560
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
551561
mca_rcache_base_registration_t *reg;
552562

@@ -556,7 +566,7 @@ static int check_for_cuda_freed_memory(mca_rcache_base_module_t *rcache, void *a
556566
}
557567

558568
/* If not previously freed memory, just return 0 */
559-
if (!(mca_common_cuda_previously_freed_memory(reg))) {
569+
if (!(mca_rcache_accelerator_previously_freed_memory(reg))) {
560570
return OPAL_SUCCESS;
561571
}
562572

@@ -566,7 +576,6 @@ static int check_for_cuda_freed_memory(mca_rcache_base_module_t *rcache, void *a
566576
return mca_rcache_base_vma_iterate(rcache_grdma->cache->vma_module, addr, size, true, gc_add,
567577
NULL);
568578
}
569-
#endif /* OPAL_CUDA_GDR_SUPPORT */
570579

571580
static void mca_rcache_grdma_finalize(mca_rcache_base_module_t *rcache)
572581
{

opal/mca/rcache/rcache.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "opal/mca/mca.h"
2929
#include "opal/mca/mpool/mpool.h"
3030
#include "opal/mca/threads/mutex.h"
31+
#include "opal/mca/accelerator/accelerator.h"
3132

3233
/* forward-declaration of rcache module structure */
3334
struct mca_rcache_base_module_t;
@@ -40,10 +41,10 @@ enum {
4041
MCA_RCACHE_FLAGS_PERSIST = 0x0002,
4142
/** registation requires strong ordering (disables relaxed ordering) */
4243
MCA_RCACHE_FLAGS_SO_MEM = 0x0004,
43-
/** address range is cuda buffer */
44-
MCA_RCACHE_FLAGS_CUDA_GPU_MEM = 0x0008,
45-
/** register with common cuda */
46-
MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM = 0x0010,
44+
/** address range is accelerator buffer */
45+
MCA_RCACHE_FLAGS_ACCELERATOR_MEM = 0x0008,
46+
/** register with accelerator framework */
47+
MCA_RCACHE_FLAGS_ACCELERATOR_REGISTER_MEM = 0x0010,
4748
/** invalid registration (no valid for passing to rcache register) */
4849
MCA_RCACHE_FLAGS_INVALID = 0x0080,
4950
/** reserved for rcache module */
@@ -88,18 +89,16 @@ struct mca_rcache_base_registration_t {
8889
unsigned char *base;
8990
/** bound of registered region */
9091
unsigned char *bound;
91-
/** artifact of old mpool/rcache architecture. used by cuda code */
92+
/** artifact of old mpool/rcache architecture. */
9293
unsigned char *alloc_base;
9394
/** number of outstanding references */
9495
opal_atomic_int32_t ref_count;
9596
/** registration flags */
9697
opal_atomic_uint32_t flags;
9798
/** internal rcache context */
9899
void *rcache_context;
99-
#if OPAL_CUDA_GDR_SUPPORT
100-
/** CUDA gpu buffer identifier */
101-
unsigned long long gpu_bufID;
102-
#endif /* OPAL_CUDA_GDR_SUPPORT */
100+
/** Accelerator buffer identifier */
101+
opal_accelerator_buffer_id_t gpu_bufID;
103102
/** registration access flags */
104103
int32_t access_flags;
105104
unsigned char padding[64];

0 commit comments

Comments
 (0)