Skip to content

Commit afd23c9

Browse files
authored
Merge pull request #10855 from wckzhang/buffer_id
opal/accelerator: Add get_buffer_id API
2 parents 5e5bacd + f0580fd commit afd23c9

File tree

8 files changed

+110
-36
lines changed

8 files changed

+110
-36
lines changed

opal/class/opal_free_list.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ static void opal_free_list_construct(opal_free_list_t *fl)
5252
fl->fl_mpool = NULL;
5353
fl->fl_rcache = NULL;
5454
/* default flags */
55-
fl->fl_rcache_reg_flags = MCA_RCACHE_FLAGS_CACHE_BYPASS | MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM;
55+
fl->fl_rcache_reg_flags = MCA_RCACHE_FLAGS_CACHE_BYPASS | MCA_RCACHE_FLAGS_ACCELERATOR_REGISTER_MEM;
5656
fl->ctx = NULL;
5757
OBJ_CONSTRUCT(&(fl->fl_allocations), opal_list_t);
5858
}
@@ -190,7 +190,7 @@ int opal_free_list_grow_st(opal_free_list_t *flist, size_t num_elements,
190190
buffer_size = num_elements * elem_size;
191191
align = flist->fl_payload_buffer_alignment;
192192

193-
if (MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM & flist->fl_rcache_reg_flags) {
193+
if (MCA_RCACHE_FLAGS_ACCELERATOR_REGISTER_MEM & flist->fl_rcache_reg_flags) {
194194
size_t pagesize = opal_getpagesize();
195195
/* CUDA cannot handle registering overlapping regions, so make
196196
* sure each region is page sized and page aligned. */

opal/mca/accelerator/accelerator.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@
7979

8080
BEGIN_C_DECLS
8181

82-
8382
#define MCA_ACCELERATOR_NO_DEVICE_ID -1
8483
/**
8584
* Accelerator flags
@@ -103,6 +102,8 @@ typedef enum {
103102
MCA_ACCELERATOR_TRANSFER_DTOD,
104103
} opal_accelerator_transfer_type_t;
105104

105+
typedef uint64_t opal_accelerator_buffer_id_t;
106+
106107
struct opal_accelerator_stream_t {
107108
opal_object_t super;
108109
/* Stream object */
@@ -359,6 +360,20 @@ typedef int (*opal_accelerator_base_module_get_device_fn_t)(
359360
typedef int (*opal_accelerator_base_module_device_can_access_peer_fn_t)(
360361
int *access, int dev1, int dev2);
361362

363+
/**
364+
* Retrieves current device id for a device associated with the local process.
365+
* If MCA_ACCELERATOR_NO_DEVICE_ID is provided, there is no device/process pairing.
366+
*
367+
* @param[IN] dev_id ID of the device or MCA_ACCELERATOR_NO_DEVICE_ID
368+
* @param[IN] addr Buffer pointer to check
369+
* @param[OUT] buf_id ID of the given buffer
370+
*
371+
*
372+
* @return OPAL_SUCCESS or error status on failure
373+
*/
374+
typedef int (*opal_accelerator_base_module_get_buffer_id_fn_t)(
375+
int dev_id, const void *addr, opal_accelerator_buffer_id_t *buf_id);
376+
362377
/*
363378
* the standard public API data structure
364379
*/
@@ -384,6 +399,8 @@ typedef struct {
384399

385400
opal_accelerator_base_module_get_device_fn_t get_device;
386401
opal_accelerator_base_module_device_can_access_peer_fn_t device_can_access_peer;
402+
403+
opal_accelerator_base_module_get_buffer_id_fn_t get_buffer_id;
387404
} opal_accelerator_base_module_t;
388405

389406
/**

opal/mca/accelerator/cuda/accelerator_cuda.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ static int accelerator_cuda_host_unregister(int dev_id, void *ptr);
4747
static int accelerator_cuda_get_device(int *dev_id);
4848
static int accelerator_cuda_device_can_access_peer( int *access, int dev1, int dev2);
4949

50+
static int accelerator_cuda_get_buffer_id(int dev_id, const void *addr, opal_accelerator_buffer_id_t *buf_id);
51+
5052
opal_accelerator_base_module_t opal_accelerator_cuda_module =
5153
{
5254
accelerator_cuda_check_addr,
@@ -68,7 +70,9 @@ opal_accelerator_base_module_t opal_accelerator_cuda_module =
6870
accelerator_cuda_host_unregister,
6971

7072
accelerator_cuda_get_device,
71-
accelerator_cuda_device_can_access_peer
73+
accelerator_cuda_device_can_access_peer,
74+
75+
accelerator_cuda_get_buffer_id
7276
};
7377

7478
static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *flags)
@@ -538,3 +542,30 @@ static int accelerator_cuda_device_can_access_peer(int *access, int dev1, int de
538542
}
539543
return 0;
540544
}
545+
546+
/*
547+
* Get the buffer ID from the memory.
548+
* This is needed to ensure the cached registration is not stale. If
549+
* we fail to get buffer ID, print an error and set buffer ID to 0.
550+
* Also set SYNC_MEMOPS on any GPU registration to ensure that
551+
* synchronous copies complete before the buffer is accessed.
552+
*/
553+
static int accelerator_cuda_get_buffer_id(int dev_id, const void *addr, opal_accelerator_buffer_id_t *buf_id)
554+
{
555+
CUresult result;
556+
int enable = 1;
557+
result = opal_accelerator_cuda_func.cuPointerGetAttribute((unsigned long long *)buf_id, CU_POINTER_ATTRIBUTE_BUFFER_ID, (CUdeviceptr) addr);
558+
if (OPAL_UNLIKELY(result != CUDA_SUCCESS)) {
559+
opal_show_help("help-accelerator-cuda.txt", "bufferID failed", true, OPAL_PROC_MY_HOSTNAME,
560+
result);
561+
return result;
562+
}
563+
result = opal_accelerator_cuda_func.cuPointerSetAttribute(&enable, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,
564+
(CUdeviceptr) addr);
565+
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
566+
opal_show_help("help-accelerator-cuda.txt", "cuPointerSetAttribute failed", true,
567+
OPAL_PROC_MY_HOSTNAME, result, addr);
568+
return result;
569+
}
570+
return OPAL_SUCCESS;
571+
}

opal/mca/accelerator/null/accelerator_null_component.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ static int accelerator_null_host_unregister(int dev_id, void *ptr);
6161
static int accelerator_null_get_device(int *dev_id);
6262
static int accelerator_null_device_can_access_peer(int *access, int dev1, int dev2);
6363

64+
static int accelerator_null_get_buffer_id(int dev_id, const void *addr, opal_accelerator_buffer_id_t *buf_id);
65+
6466
/*
6567
* Instantiate the public struct with all of our public information
6668
* and pointers to our public functions in it
@@ -120,7 +122,9 @@ opal_accelerator_base_module_t opal_accelerator_null_module =
120122
accelerator_null_host_unregister,
121123

122124
accelerator_null_get_device,
123-
accelerator_null_device_can_access_peer
125+
accelerator_null_device_can_access_peer,
126+
127+
accelerator_null_get_buffer_id
124128
};
125129

126130
static int accelerator_null_open(void)
@@ -235,3 +239,8 @@ static int accelerator_null_device_can_access_peer( int *access, int dev1, int d
235239
{
236240
return OPAL_ERR_NOT_IMPLEMENTED;
237241
}
242+
243+
static int accelerator_null_get_buffer_id(int dev_id, const void *addr, opal_accelerator_buffer_id_t *buf_id)
244+
{
245+
return OPAL_ERR_NOT_IMPLEMENTED;
246+
}

opal/mca/accelerator/rocm/accelerator_rocm_module.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ static int mca_accelerator_rocm_host_unregister(int dev_id, void *ptr);
3737
static int mca_accelerator_rocm_get_device(int *dev_id);
3838
static int mca_accelerator_rocm_device_can_access_peer( int *access, int dev1, int dev2);
3939

40+
static int mca_accelerator_rocm_get_buffer_id(int dev_id, const void *addr, opal_accelerator_buffer_id_t *buf_id);
4041

4142
opal_accelerator_base_module_t opal_accelerator_rocm_module =
4243
{
@@ -59,7 +60,9 @@ opal_accelerator_base_module_t opal_accelerator_rocm_module =
5960
mca_accelerator_rocm_host_unregister,
6061

6162
mca_accelerator_rocm_get_device,
62-
mca_accelerator_rocm_device_can_access_peer
63+
mca_accelerator_rocm_device_can_access_peer,
64+
65+
mca_accelerator_rocm_get_buffer_id
6366
};
6467

6568

@@ -483,3 +486,9 @@ static int mca_accelerator_rocm_device_can_access_peer(int *access, int dev1, in
483486

484487
return OPAL_SUCCESS;
485488
}
489+
490+
static int accelerator_rocm_get_buffer_id(int dev_id, const void *addr, opal_accelerator_buffer_id_t *buf_id)
491+
{
492+
*buf_id = 0;
493+
return OPAL_SUCCESS;
494+
}

opal/mca/btl/smcuda/btl_smcuda.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1006,7 +1006,7 @@ mca_btl_smcuda_register_mem(struct mca_btl_base_module_t *btl,
10061006

10071007
#if OPAL_CUDA_GDR_SUPPORT
10081008
if (MCA_BTL_REG_FLAG_CUDA_GPU_MEM & flags) {
1009-
rcache_flags |= MCA_RCACHE_FLAGS_CUDA_GPU_MEM;
1009+
rcache_flags |= MCA_RCACHE_FLAGS_ACCELERATOR_MEM;
10101010
}
10111011
#endif
10121012

opal/mca/rcache/grdma/rcache_grdma_module.c

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,6 @@
4242
#include "opal/mca/rcache/base/base.h"
4343
#include "opal/mca/rcache/rcache.h"
4444
#include "opal/mca/accelerator/accelerator.h"
45-
#if OPAL_CUDA_GDR_SUPPORT
46-
#include "opal/cuda/common_cuda.h"
47-
#endif /* OPAL_CUDA_GDR_SUPPORT */
4845
#include "opal/align.h"
4946
#include "opal/util/sys_limits.h"
5047
#include "rcache_grdma.h"
@@ -61,6 +58,7 @@ static int mca_rcache_grdma_invalidate_range(mca_rcache_base_module_t *rcache, v
6158
static void mca_rcache_grdma_finalize(mca_rcache_base_module_t *rcache);
6259
static bool mca_rcache_grdma_evict(mca_rcache_base_module_t *rcache);
6360
static int mca_rcache_grdma_add_to_gc(mca_rcache_base_registration_t *grdma_reg);
61+
static int check_for_accelerator_freed_memory(mca_rcache_base_module_t *rcache, void *addr, size_t size);
6462

6563
static inline bool registration_flags_cacheable(uint32_t flags)
6664
{
@@ -75,9 +73,6 @@ static inline bool registration_is_cacheable(mca_rcache_base_registration_t *reg
7573
return registration_flags_cacheable(reg->flags);
7674
}
7775

78-
#if OPAL_CUDA_GDR_SUPPORT
79-
static int check_for_cuda_freed_memory(mca_rcache_base_module_t *rcache, void *addr, size_t size);
80-
#endif /* OPAL_CUDA_GDR_SUPPORT */
8176
static void mca_rcache_grdma_cache_contructor(mca_rcache_grdma_cache_t *cache)
8277
{
8378
memset((void *) ((uintptr_t) cache + sizeof(cache->super)), 0,
@@ -328,8 +323,7 @@ static int mca_rcache_grdma_register(mca_rcache_base_module_t *rcache, void *add
328323
base = OPAL_DOWN_ALIGN_PTR(addr, page_size, unsigned char *);
329324
bound = OPAL_ALIGN_PTR((intptr_t) addr + size, page_size, unsigned char *) - 1;
330325

331-
#if OPAL_CUDA_GDR_SUPPORT
332-
if (flags & MCA_RCACHE_FLAGS_CUDA_GPU_MEM) {
326+
if (flags & MCA_RCACHE_FLAGS_ACCELERATOR_MEM) {
333327
size_t psize;
334328
int res = opal_accelerator.get_address_range(MCA_ACCELERATOR_NO_DEVICE_ID, addr, (void **)&base, &psize);
335329
if (OPAL_SUCCESS != res) {
@@ -338,9 +332,8 @@ static int mca_rcache_grdma_register(mca_rcache_base_module_t *rcache, void *add
338332
bound = base + psize - 1;
339333
/* Check to see if this memory is in the cache and if it has been freed. If so,
340334
* this call will boot it out of the cache. */
341-
check_for_cuda_freed_memory(rcache, base, psize);
335+
check_for_accelerator_freed_memory(rcache, base, psize);
342336
}
343-
#endif /* OPAL_CUDA_GDR_SUPPORT */
344337

345338
do_unregistration_gc(rcache);
346339

@@ -378,11 +371,9 @@ static int mca_rcache_grdma_register(mca_rcache_base_module_t *rcache, void *add
378371
grdma_reg->flags = flags;
379372
grdma_reg->access_flags = access_flags;
380373
grdma_reg->ref_count = 1;
381-
#if OPAL_CUDA_GDR_SUPPORT
382-
if (flags & MCA_RCACHE_FLAGS_CUDA_GPU_MEM) {
383-
mca_common_cuda_get_buffer_id(grdma_reg);
374+
if (flags & MCA_RCACHE_FLAGS_ACCELERATOR_MEM) {
375+
opal_accelerator.get_buffer_id(MCA_ACCELERATOR_NO_DEVICE_ID, grdma_reg->base, &grdma_reg->gpu_bufID);
384376
}
385-
#endif /* OPAL_CUDA_GDR_SUPPORT */
386377

387378
while (OPAL_ERR_OUT_OF_RESOURCE
388379
== (rc = rcache_grdma->resources.register_mem(rcache_grdma->resources.reg_data, base,
@@ -538,15 +529,34 @@ static int mca_rcache_grdma_invalidate_range(mca_rcache_base_module_t *rcache, v
538529
&args);
539530
}
540531

532+
/* Check to see if the memory was freed between the time it was stored in
533+
* the registration cache and now. Return true if the memory was previously
534+
* freed. This is indicated by the BUFFER_ID value in the registration cache
535+
* not matching the BUFFER_ID of the buffer we are checking. Return false
536+
* if the registration is still good.
537+
*/
538+
static bool mca_rcache_accelerator_previously_freed_memory(mca_rcache_base_registration_t *reg)
539+
{
540+
int res;
541+
opal_accelerator_buffer_id_t buf_id;
542+
unsigned char *dbuf = reg->base;
543+
opal_accelerator.get_buffer_id(MCA_ACCELERATOR_NO_DEVICE_ID, dbuf, &buf_id);
544+
if (OPAL_UNLIKELY(res != OPAL_SUCCESS)) {
545+
return true;
546+
}
547+
if (buf_id != reg->gpu_bufID) {
548+
return true;
549+
} else {
550+
return false;
551+
}
552+
}
553+
541554
/* Make sure this registration request is not stale. In other words, ensure
542555
* that we do not have a cuMemAlloc, cuMemFree, cuMemAlloc state. If we do
543556
* kick out the regisrations and deregister. This function needs to be called
544557
* with the rcache->vma_module->vma_lock held. */
545-
#if OPAL_CUDA_GDR_SUPPORT
546-
547-
static int check_for_cuda_freed_memory(mca_rcache_base_module_t *rcache, void *addr, size_t size)
558+
static int check_for_accelerator_freed_memory(mca_rcache_base_module_t *rcache, void *addr, size_t size)
548559
{
549-
unsigned long long buf_id;
550560
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
551561
mca_rcache_base_registration_t *reg;
552562

@@ -556,7 +566,7 @@ static int check_for_cuda_freed_memory(mca_rcache_base_module_t *rcache, void *a
556566
}
557567

558568
/* If not previously freed memory, just return 0 */
559-
if (!(mca_common_cuda_previously_freed_memory(reg))) {
569+
if (!(mca_rcache_accelerator_previously_freed_memory(reg))) {
560570
return OPAL_SUCCESS;
561571
}
562572

@@ -566,7 +576,6 @@ static int check_for_cuda_freed_memory(mca_rcache_base_module_t *rcache, void *a
566576
return mca_rcache_base_vma_iterate(rcache_grdma->cache->vma_module, addr, size, true, gc_add,
567577
NULL);
568578
}
569-
#endif /* OPAL_CUDA_GDR_SUPPORT */
570579

571580
static void mca_rcache_grdma_finalize(mca_rcache_base_module_t *rcache)
572581
{

opal/mca/rcache/rcache.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "opal/mca/mca.h"
2929
#include "opal/mca/mpool/mpool.h"
3030
#include "opal/mca/threads/mutex.h"
31+
#include "opal/mca/accelerator/accelerator.h"
3132

3233
/* forward-declaration of rcache module structure */
3334
struct mca_rcache_base_module_t;
@@ -40,10 +41,10 @@ enum {
4041
MCA_RCACHE_FLAGS_PERSIST = 0x0002,
4142
/** registation requires strong ordering (disables relaxed ordering) */
4243
MCA_RCACHE_FLAGS_SO_MEM = 0x0004,
43-
/** address range is cuda buffer */
44-
MCA_RCACHE_FLAGS_CUDA_GPU_MEM = 0x0008,
45-
/** register with common cuda */
46-
MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM = 0x0010,
44+
/** address range is accelerator buffer */
45+
MCA_RCACHE_FLAGS_ACCELERATOR_MEM = 0x0008,
46+
/** register with accelerator framework */
47+
MCA_RCACHE_FLAGS_ACCELERATOR_REGISTER_MEM = 0x0010,
4748
/** invalid registration (no valid for passing to rcache register) */
4849
MCA_RCACHE_FLAGS_INVALID = 0x0080,
4950
/** reserved for rcache module */
@@ -88,18 +89,16 @@ struct mca_rcache_base_registration_t {
8889
unsigned char *base;
8990
/** bound of registered region */
9091
unsigned char *bound;
91-
/** artifact of old mpool/rcache architecture. used by cuda code */
92+
/** artifact of old mpool/rcache architecture. */
9293
unsigned char *alloc_base;
9394
/** number of outstanding references */
9495
opal_atomic_int32_t ref_count;
9596
/** registration flags */
9697
opal_atomic_uint32_t flags;
9798
/** internal rcache context */
9899
void *rcache_context;
99-
#if OPAL_CUDA_GDR_SUPPORT
100-
/** CUDA gpu buffer identifier */
101-
unsigned long long gpu_bufID;
102-
#endif /* OPAL_CUDA_GDR_SUPPORT */
100+
/** Accelerator buffer identifier */
101+
opal_accelerator_buffer_id_t gpu_bufID;
103102
/** registration access flags */
104103
int32_t access_flags;
105104
unsigned char padding[64];

0 commit comments

Comments
 (0)