rcache/grdma: Replace cuda functions with accelerator functions

wckzhang · wckzhang · commit f0580fdd3854 · 2022-09-27T15:13:51.000Z
Signed-off-by: William Zhang &lt;wilzhang@amazon.com&gt;
diff --git a/opal/class/opal_free_list.c b/opal/class/opal_free_list.c
@@ -52,7 +52,7 @@ static void opal_free_list_construct(opal_free_list_t *fl)
     fl->fl_mpool = NULL;
     fl->fl_rcache = NULL;
     /* default flags */
-    fl->fl_rcache_reg_flags = MCA_RCACHE_FLAGS_CACHE_BYPASS | MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM;
+    fl->fl_rcache_reg_flags = MCA_RCACHE_FLAGS_CACHE_BYPASS | MCA_RCACHE_FLAGS_ACCELERATOR_REGISTER_MEM;
     fl->ctx = NULL;
     OBJ_CONSTRUCT(&(fl->fl_allocations), opal_list_t);
 }
@@ -190,7 +190,7 @@ int opal_free_list_grow_st(opal_free_list_t *flist, size_t num_elements,
         buffer_size = num_elements * elem_size;
         align = flist->fl_payload_buffer_alignment;
 
-        if (MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM & flist->fl_rcache_reg_flags) {
+        if (MCA_RCACHE_FLAGS_ACCELERATOR_REGISTER_MEM & flist->fl_rcache_reg_flags) {
             size_t pagesize = opal_getpagesize();
             /* CUDA cannot handle registering overlapping regions, so make
              * sure each region is page sized and page aligned. */
diff --git a/opal/mca/btl/smcuda/btl_smcuda.c b/opal/mca/btl/smcuda/btl_smcuda.c
@@ -1006,7 +1006,7 @@ mca_btl_smcuda_register_mem(struct mca_btl_base_module_t *btl,
 
 #if OPAL_CUDA_GDR_SUPPORT
     if (MCA_BTL_REG_FLAG_CUDA_GPU_MEM & flags) {
-        rcache_flags |= MCA_RCACHE_FLAGS_CUDA_GPU_MEM;
+        rcache_flags |= MCA_RCACHE_FLAGS_ACCELERATOR_MEM;
     }
 #endif
 
diff --git a/opal/mca/rcache/grdma/rcache_grdma_module.c b/opal/mca/rcache/grdma/rcache_grdma_module.c
@@ -42,9 +42,6 @@
 #include "opal/mca/rcache/base/base.h"
 #include "opal/mca/rcache/rcache.h"
 #include "opal/mca/accelerator/accelerator.h"
-#if OPAL_CUDA_GDR_SUPPORT
-#include "opal/cuda/common_cuda.h"
-#endif /* OPAL_CUDA_GDR_SUPPORT */
 #include "opal/align.h"
 #include "opal/util/sys_limits.h"
 #include "rcache_grdma.h"
@@ -61,6 +58,7 @@ static int mca_rcache_grdma_invalidate_range(mca_rcache_base_module_t *rcache, v
 static void mca_rcache_grdma_finalize(mca_rcache_base_module_t *rcache);
 static bool mca_rcache_grdma_evict(mca_rcache_base_module_t *rcache);
 static int mca_rcache_grdma_add_to_gc(mca_rcache_base_registration_t *grdma_reg);
+static int check_for_accelerator_freed_memory(mca_rcache_base_module_t *rcache, void *addr, size_t size);
 
 static inline bool registration_flags_cacheable(uint32_t flags)
 {
@@ -75,9 +73,6 @@ static inline bool registration_is_cacheable(mca_rcache_base_registration_t *reg
     return registration_flags_cacheable(reg->flags);
 }
 
-#if OPAL_CUDA_GDR_SUPPORT
-static int check_for_cuda_freed_memory(mca_rcache_base_module_t *rcache, void *addr, size_t size);
-#endif /* OPAL_CUDA_GDR_SUPPORT */
 static void mca_rcache_grdma_cache_contructor(mca_rcache_grdma_cache_t *cache)
 {
     memset((void *) ((uintptr_t) cache + sizeof(cache->super)), 0,
@@ -328,8 +323,7 @@ static int mca_rcache_grdma_register(mca_rcache_base_module_t *rcache, void *add
     base = OPAL_DOWN_ALIGN_PTR(addr, page_size, unsigned char *);
     bound = OPAL_ALIGN_PTR((intptr_t) addr + size, page_size, unsigned char *) - 1;
 
-#if OPAL_CUDA_GDR_SUPPORT
-    if (flags & MCA_RCACHE_FLAGS_CUDA_GPU_MEM) {
+    if (flags & MCA_RCACHE_FLAGS_ACCELERATOR_MEM) {
         size_t psize;
         int res = opal_accelerator.get_address_range(MCA_ACCELERATOR_NO_DEVICE_ID, addr, (void **)&base, &psize);
         if (OPAL_SUCCESS != res) {
@@ -338,9 +332,8 @@ static int mca_rcache_grdma_register(mca_rcache_base_module_t *rcache, void *add
         bound = base + psize - 1;
         /* Check to see if this memory is in the cache and if it has been freed. If so,
          * this call will boot it out of the cache. */
-        check_for_cuda_freed_memory(rcache, base, psize);
+        check_for_accelerator_freed_memory(rcache, base, psize);
     }
-#endif /* OPAL_CUDA_GDR_SUPPORT */
 
     do_unregistration_gc(rcache);
 
@@ -378,11 +371,9 @@ static int mca_rcache_grdma_register(mca_rcache_base_module_t *rcache, void *add
     grdma_reg->flags = flags;
     grdma_reg->access_flags = access_flags;
     grdma_reg->ref_count = 1;
-#if OPAL_CUDA_GDR_SUPPORT
-    if (flags & MCA_RCACHE_FLAGS_CUDA_GPU_MEM) {
-        mca_common_cuda_get_buffer_id(grdma_reg);
+    if (flags & MCA_RCACHE_FLAGS_ACCELERATOR_MEM) {
+        opal_accelerator.get_buffer_id(MCA_ACCELERATOR_NO_DEVICE_ID, grdma_reg->base, &grdma_reg->gpu_bufID);
     }
-#endif /* OPAL_CUDA_GDR_SUPPORT */
 
     while (OPAL_ERR_OUT_OF_RESOURCE
            == (rc = rcache_grdma->resources.register_mem(rcache_grdma->resources.reg_data, base,
@@ -538,15 +529,34 @@ static int mca_rcache_grdma_invalidate_range(mca_rcache_base_module_t *rcache, v
                                        &args);
 }
 
+/* Check to see if the memory was freed between the time it was stored in
+ * the registration cache and now.  Return true if the memory was previously
+ * freed.  This is indicated by the BUFFER_ID value in the registration cache
+ * not matching the BUFFER_ID of the buffer we are checking.  Return false
+ * if the registration is still good.
+ */
+static bool mca_rcache_accelerator_previously_freed_memory(mca_rcache_base_registration_t *reg)
+{
+    int res;
+    opal_accelerator_buffer_id_t buf_id;
+    unsigned char *dbuf = reg->base;
+    opal_accelerator.get_buffer_id(MCA_ACCELERATOR_NO_DEVICE_ID, dbuf, &buf_id);
+    if (OPAL_UNLIKELY(res != OPAL_SUCCESS)) {
+        return true;
+    }
+    if (buf_id != reg->gpu_bufID) {
+        return true;
+    } else {
+        return false;
+    }
+}
+
 /* Make sure this registration request is not stale.  In other words, ensure
  * that we do not have a cuMemAlloc, cuMemFree, cuMemAlloc state.  If we do
  * kick out the regisrations and deregister.  This function needs to be called
  * with the rcache->vma_module->vma_lock held. */
-#if OPAL_CUDA_GDR_SUPPORT
-
-static int check_for_cuda_freed_memory(mca_rcache_base_module_t *rcache, void *addr, size_t size)
+static int check_for_accelerator_freed_memory(mca_rcache_base_module_t *rcache, void *addr, size_t size)
 {
-    unsigned long long buf_id;
     mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
     mca_rcache_base_registration_t *reg;
 
@@ -556,7 +566,7 @@ static int check_for_cuda_freed_memory(mca_rcache_base_module_t *rcache, void *a
     }
 
     /* If not previously freed memory, just return 0 */
-    if (!(mca_common_cuda_previously_freed_memory(reg))) {
+    if (!(mca_rcache_accelerator_previously_freed_memory(reg))) {
         return OPAL_SUCCESS;
     }
 
@@ -566,7 +576,6 @@ static int check_for_cuda_freed_memory(mca_rcache_base_module_t *rcache, void *a
     return mca_rcache_base_vma_iterate(rcache_grdma->cache->vma_module, addr, size, true, gc_add,
                                        NULL);
 }
-#endif /* OPAL_CUDA_GDR_SUPPORT */
 
 static void mca_rcache_grdma_finalize(mca_rcache_base_module_t *rcache)
 {
diff --git a/opal/mca/rcache/rcache.h b/opal/mca/rcache/rcache.h
@@ -28,6 +28,7 @@
 #include "opal/mca/mca.h"
 #include "opal/mca/mpool/mpool.h"
 #include "opal/mca/threads/mutex.h"
+#include "opal/mca/accelerator/accelerator.h"
 
 /* forward-declaration of rcache module structure */
 struct mca_rcache_base_module_t;
@@ -40,10 +41,10 @@ enum {
     MCA_RCACHE_FLAGS_PERSIST = 0x0002,
     /** registation requires strong ordering (disables relaxed ordering) */
     MCA_RCACHE_FLAGS_SO_MEM = 0x0004,
-    /** address range is cuda buffer */
-    MCA_RCACHE_FLAGS_CUDA_GPU_MEM = 0x0008,
-    /** register with common cuda */
-    MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM = 0x0010,
+    /** address range is accelerator buffer */
+    MCA_RCACHE_FLAGS_ACCELERATOR_MEM = 0x0008,
+    /** register with accelerator framework */
+    MCA_RCACHE_FLAGS_ACCELERATOR_REGISTER_MEM = 0x0010,
     /** invalid registration (no valid for passing to rcache register) */
     MCA_RCACHE_FLAGS_INVALID = 0x0080,
     /** reserved for rcache module */
@@ -88,18 +89,16 @@ struct mca_rcache_base_registration_t {
     unsigned char *base;
     /** bound of registered region */
     unsigned char *bound;
-    /** artifact of old mpool/rcache architecture. used by cuda code */
+    /** artifact of old mpool/rcache architecture. */
     unsigned char *alloc_base;
     /** number of outstanding references */
     opal_atomic_int32_t ref_count;
     /** registration flags */
     opal_atomic_uint32_t flags;
     /** internal rcache context */
     void *rcache_context;
-#if OPAL_CUDA_GDR_SUPPORT
-    /** CUDA gpu buffer identifier */
-    unsigned long long gpu_bufID;
-#endif /* OPAL_CUDA_GDR_SUPPORT */
+    /** Accelerator buffer identifier */
+    opal_accelerator_buffer_id_t gpu_bufID;
     /** registration access flags */
     int32_t access_flags;
     unsigned char padding[64];

Original file line number	Diff line number	Diff line change
`@@ -1006,7 +1006,7 @@ mca_btl_smcuda_register_mem(struct mca_btl_base_module_t *btl,`
`1006`	`1006`
`1007`	`1007`	`#if OPAL_CUDA_GDR_SUPPORT`
`1008`	`1008`	`if (MCA_BTL_REG_FLAG_CUDA_GPU_MEM & flags) {`
`1009`		`- rcache_flags \|= MCA_RCACHE_FLAGS_CUDA_GPU_MEM;`
	`1009`	`+ rcache_flags \|= MCA_RCACHE_FLAGS_ACCELERATOR_MEM;`
`1010`	`1010`	`}`
`1011`	`1011`	`#endif`
`1012`	`1012`