Skip to content

Commit 4002fbc

Browse files
committed
accelerator: introduce compare_ipc_handles fnct
comparing ipc handles might not be always just a memcmp of the two handles. Introduce an abstraction for this functionality. Use the memcmp function that was used so far in the cuda and ze component, but use only certain parts of the ipc handle in rocm. Signed-off-by: Edgar Gabriel <Edgar.Gabriel@amd.com>
1 parent a55e9b2 commit 4002fbc

File tree

6 files changed

+69
-5
lines changed

6 files changed

+69
-5
lines changed

opal/mca/accelerator/accelerator.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,8 @@ typedef int (*opal_accelerator_base_module_get_address_range_fn_t)(
366366
*
367367
* opal_accelerator_base_module_get_ipc_handle_fn_t()
368368
* opal_accelerator_base_module_open_ipc_handle_fn_t()
369-
* opal_accelerator_base_module_import_ipc_event_handle_fn_t()
369+
* opal_accelerator_base_module_import_ipc_handle_fn_t()
370+
* opal_accelerator_base_module_compare_ipc_handles_fn_t()
370371
* opal_accelerator_base_module_get_ipc_event_handle_fn_t()
371372
* opal_accelerator_base_module_open_ipc_event_handle_fn_t()
372373
* opal_accelerator_base_module_import_ipc_event_handle_fn_t()
@@ -427,6 +428,19 @@ typedef int (*opal_accelerator_base_module_import_ipc_handle_fn_t)(
427428
typedef int (*opal_accelerator_base_module_open_ipc_handle_fn_t)(
428429
int dev_id, opal_accelerator_ipc_handle_t *handle, void **dev_ptr);
429430

431+
/**
432+
* Compare two IPC handles
433+
*
434+
* @param[IN] handle_1 First IPC handle
435+
* @param[IN] handle_2 Second IPC handle
436+
*
437+
* @return zero if IPC handles are identical
438+
* non-zero value otherwise
439+
*/
440+
441+
typedef int (*opal_accelerator_base_module_compare_ipc_handles_fn_t)(
442+
uint8_t handle_1[IPC_MAX_HANDLE_SIZE], uint8_t handle_2[IPC_MAX_HANDLE_SIZE]);
443+
430444
/**
431445
* Gets an IPC event handle for an event created by opal_accelerator_base_module_create_event_fn_t.
432446
* This interface assumes that the object has been declared statically,
@@ -568,6 +582,7 @@ typedef struct {
568582
opal_accelerator_base_module_get_ipc_handle_fn_t get_ipc_handle;
569583
opal_accelerator_base_module_import_ipc_handle_fn_t import_ipc_handle;
570584
opal_accelerator_base_module_open_ipc_handle_fn_t open_ipc_handle;
585+
opal_accelerator_base_module_compare_ipc_handles_fn_t compare_ipc_handles;
571586
opal_accelerator_base_module_get_ipc_event_handle_fn_t get_ipc_event_handle;
572587
opal_accelerator_base_module_import_ipc_event_handle_fn_t import_ipc_event_handle;
573588
opal_accelerator_base_module_open_ipc_event_handle_fn_t open_ipc_event_handle;

opal/mca/accelerator/cuda/accelerator_cuda.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ static int accelerator_cuda_import_ipc_handle(int dev_id, uint8_t ipc_handle[IPC
4949
opal_accelerator_ipc_handle_t *handle);
5050
static int accelerator_cuda_open_ipc_handle(int dev_id, opal_accelerator_ipc_handle_t *handle,
5151
void **dev_ptr);
52+
static int accelerator_cuda_compare_ipc_handles(uint8_t handle_1[IPC_MAX_HANDLE_SIZE],
53+
uint8_t handle_2[IPC_MAX_HANDLE_SIZE]);
5254
static int accelerator_cuda_get_ipc_event_handle(opal_accelerator_event_t *event,
5355
opal_accelerator_ipc_event_handle_t *handle);
5456
static int accelerator_cuda_import_ipc_event_handle(uint8_t ipc_handle[IPC_MAX_HANDLE_SIZE],
@@ -89,6 +91,7 @@ opal_accelerator_base_module_t opal_accelerator_cuda_module =
8991
accelerator_cuda_get_ipc_handle,
9092
accelerator_cuda_import_ipc_handle,
9193
accelerator_cuda_open_ipc_handle,
94+
accelerator_cuda_compare_ipc_handles,
9295
accelerator_cuda_get_ipc_event_handle,
9396
accelerator_cuda_import_ipc_event_handle,
9497
accelerator_cuda_open_ipc_event_handle,
@@ -584,6 +587,12 @@ static int accelerator_cuda_open_ipc_handle(int dev_id, opal_accelerator_ipc_han
584587
return OPAL_ERR_NOT_IMPLEMENTED;
585588
}
586589

590+
static int accelerator_cuda_compare_ipc_handles(uint8_t handle_1[IPC_MAX_HANDLE_SIZE],
591+
uint8_t handle_2[IPC_MAX_HANDLE_SIZE])
592+
{
593+
return memcmp(handle_1, handle_2, IPC_MAX_HANDLE_SIZE);
594+
}
595+
587596
static int accelerator_cuda_get_ipc_event_handle(opal_accelerator_event_t *event,
588597
opal_accelerator_ipc_event_handle_t *handle)
589598
{

opal/mca/accelerator/null/accelerator_null_component.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ static int accelerator_null_import_ipc_handle(int dev_id, uint8_t ipc_handle[IPC
6464
opal_accelerator_ipc_handle_t *handle);
6565
static int accelerator_null_open_ipc_handle(int dev_id, opal_accelerator_ipc_handle_t *handle,
6666
void **dev_ptr);
67+
static int accelerator_null_compare_ipc_handles(uint8_t handle_1[IPC_MAX_HANDLE_SIZE],
68+
uint8_t handle_2[IPC_MAX_HANDLE_SIZE]);
6769
static int accelerator_null_get_ipc_event_handle(opal_accelerator_event_t *event,
6870
opal_accelerator_ipc_event_handle_t *handle);
6971
static int accelerator_null_import_ipc_event_handle(uint8_t ipc_handle[IPC_MAX_HANDLE_SIZE],
@@ -140,6 +142,7 @@ opal_accelerator_base_module_t opal_accelerator_null_module =
140142
accelerator_null_get_ipc_handle,
141143
accelerator_null_import_ipc_handle,
142144
accelerator_null_open_ipc_handle,
145+
accelerator_null_compare_ipc_handles,
143146
accelerator_null_get_ipc_event_handle,
144147
accelerator_null_import_ipc_event_handle,
145148
accelerator_null_open_ipc_event_handle,
@@ -275,6 +278,12 @@ static int accelerator_null_open_ipc_handle(int dev_id, opal_accelerator_ipc_han
275278
return OPAL_ERR_NOT_IMPLEMENTED;
276279
}
277280

281+
static int accelerator_null_compare_ipc_handles(uint8_t handle_1[IPC_MAX_HANDLE_SIZE],
282+
uint8_t handle_2[IPC_MAX_HANDLE_SIZE])
283+
{
284+
return OPAL_ERR_NOT_IMPLEMENTED;
285+
}
286+
278287
static int accelerator_null_get_ipc_event_handle(opal_accelerator_event_t *event,
279288
opal_accelerator_ipc_event_handle_t *handle)
280289
{

opal/mca/accelerator/rocm/accelerator_rocm_module.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ static int mca_accelerator_rocm_import_ipc_handle(int dev_id, uint8_t ipc_handle
4141
opal_accelerator_ipc_handle_t *handle);
4242
static int mca_accelerator_rocm_open_ipc_handle(int dev_id, opal_accelerator_ipc_handle_t *handle,
4343
void **dev_ptr);
44+
static int mca_accelerator_rocm_compare_ipc_handles(uint8_t handle_1[IPC_MAX_HANDLE_SIZE],
45+
uint8_t handle_2[IPC_MAX_HANDLE_SIZE]);
4446
static int mca_accelerator_rocm_get_ipc_event_handle(opal_accelerator_event_t *event,
4547
opal_accelerator_ipc_event_handle_t *handle);
4648
static int mca_accelerator_rocm_import_ipc_event_handle(uint8_t ipc_handle[IPC_MAX_HANDLE_SIZE],
@@ -82,6 +84,7 @@ opal_accelerator_base_module_t opal_accelerator_rocm_module =
8284
mca_accelerator_rocm_get_ipc_handle,
8385
mca_accelerator_rocm_import_ipc_handle,
8486
mca_accelerator_rocm_open_ipc_handle,
87+
mca_accelerator_rocm_compare_ipc_handles,
8588
mca_accelerator_rocm_get_ipc_event_handle,
8689
mca_accelerator_rocm_import_ipc_event_handle,
8790
mca_accelerator_rocm_open_ipc_event_handle,
@@ -573,6 +576,27 @@ static int mca_accelerator_rocm_open_ipc_handle(int dev_id, opal_accelerator_ipc
573576
return OPAL_SUCCESS;
574577
}
575578

579+
static int mca_accelerator_rocm_compare_ipc_handles(uint8_t handle_1[IPC_MAX_HANDLE_SIZE],
580+
uint8_t handle_2[IPC_MAX_HANDLE_SIZE])
581+
{
582+
/*
583+
* The HIP IPC handles consists of multiple elements.
584+
* We will only use the ROCr IPC handle (32 bytes, starting at pos 0)
585+
* and the process ID for comparison.
586+
* We definitily need to exclude the offset component in the comparison.
587+
*/
588+
static const int rocr_ipc_handle_size = 32;
589+
static const int pos = rocr_ipc_handle_size + 2*sizeof(size_t);
590+
int *pid_1 = (int *)&handle_1[pos];
591+
int *pid_2 = (int *)&handle_2[pos];
592+
593+
if (*pid_1 != *pid_2) {
594+
return 1;
595+
}
596+
597+
return memcmp(handle_1, handle_2, rocr_ipc_handle_size);
598+
}
599+
576600
static void mca_accelerator_rocm_ipc_event_handle_destruct(opal_accelerator_rocm_ipc_handle_t *handle)
577601
{
578602
// Just a place holder, there is no hipIpcCloseEventHandle.

opal/mca/accelerator/ze/accelerator_ze_module.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ static int mca_accelerator_ze_import_ipc_handle(int dev_id, uint8_t ipc_handle[I
4646
opal_accelerator_ipc_handle_t *handle);
4747
static int mca_accelerator_ze_open_ipc_handle(int dev_id, opal_accelerator_ipc_handle_t *handle,
4848
void **dev_ptr);
49+
static int accelerator_ze_compare_ipc_handles(uint8_t handle_1[IPC_MAX_HANDLE_SIZE],
50+
uint8_t handle_2[IPC_MAX_HANDLE_SIZE]);
4951
static int mca_accelerator_ze_get_ipc_event_handle(opal_accelerator_event_t *event,
5052
opal_accelerator_ipc_event_handle_t *handle);
5153
static int mca_accelerator_ze_import_ipc_event_handle(uint8_t ipc_handle[IPC_MAX_HANDLE_SIZE],
@@ -85,6 +87,7 @@ opal_accelerator_base_module_t opal_accelerator_ze_module =
8587
.get_ipc_handle = mca_accelerator_ze_get_ipc_handle,
8688
.import_ipc_handle = mca_accelerator_ze_import_ipc_handle,
8789
.open_ipc_handle = mca_accelerator_ze_open_ipc_handle,
90+
.compare_ipc_handles = mca_accelerator_ze_compare_ipc_handles,
8891
.get_ipc_event_handle = mca_accelerator_ze_get_ipc_event_handle,
8992
.import_ipc_event_handle = mca_accelerator_ze_import_ipc_event_handle,
9093
.open_ipc_event_handle = mca_accelerator_ze_open_ipc_event_handle,
@@ -650,6 +653,12 @@ static int mca_accelerator_ze_open_ipc_handle(int dev_id, opal_accelerator_ipc_h
650653
return OPAL_ERR_NOT_IMPLEMENTED;
651654
}
652655

656+
static int accelerator_ze_compare_ipc_handles(uint8_t handle_1[IPC_MAX_HANDLE_SIZE],
657+
uint8_t handle_2[IPC_MAX_HANDLE_SIZE])
658+
{
659+
return memcmp(handle_1, handle_2, IPC_MAX_HANDLE_SIZE);
660+
}
661+
653662
static int mca_accelerator_ze_get_ipc_event_handle(opal_accelerator_event_t *event,
654663
opal_accelerator_ipc_event_handle_t *handle)
655664
{

opal/mca/rcache/rgpusm/rcache_rgpusm_module.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,6 @@ int mca_rcache_rgpusm_register(mca_rcache_base_module_t *rcache, void *addr, siz
248248
rgpusm_reg->base.rcache = rcache;
249249
rgpusm_reg->base.base = addr;
250250
rgpusm_reg->base.bound = (unsigned char *) addr + size - 1;
251-
;
252251
rgpusm_reg->base.flags = flags;
253252

254253
/* The rget_reg registration is holding the memory handle needed
@@ -294,9 +293,8 @@ int mca_rcache_rgpusm_register(mca_rcache_base_module_t *rcache, void *addr, siz
294293
"RGPUSM: Found addr=%p,size=%d (base=%p,size=%d) in cache", addr,
295294
(int) size, (void*)(*reg)->base, (int) ((*reg)->bound - (*reg)->base));
296295

297-
if (0 ==
298-
memcmp(((mca_opal_gpu_reg_t *)*reg)->data.ipcHandle.handle, rget_reg->data.ipcHandle.handle,
299-
sizeof(((mca_opal_gpu_reg_t *)*reg)->data.ipcHandle.handle))) {
296+
if (0 == opal_accelerator.compare_ipc_handles(((mca_opal_gpu_reg_t *)*reg)->data.ipcHandle.handle,
297+
rget_reg->data.ipcHandle.handle)) {
300298
/* Registration matches what was requested. All is good. */
301299
rcache_rgpusm->stat_cache_valid++;
302300
} else {

0 commit comments

Comments
 (0)