Skip to content

Commit c7a5af3

Browse files
accelerator/rocm: add rocm component
code compiles and all tests that passed in the non-framework version seem to be passing again. Signed-off-by: Edgar Gabriel <edgar.gabriel@amd.com> Co-authored-by: Edgar Gabriel <edgar.gabriel@amd.com> Co-authored-by: William Zhang <wilzhang@amazon.com>
1 parent 81408d7 commit c7a5af3

20 files changed

+1119
-527
lines changed

config/opal_config_files.m4

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ AC_DEFUN([OPAL_CONFIG_FILES],[
1818
AC_CONFIG_FILES([
1919
opal/Makefile
2020
opal/cuda/Makefile
21-
opal/rocm/Makefile
2221
opal/etc/Makefile
2322
opal/include/Makefile
2423
opal/datatype/Makefile

configure.ac

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -988,19 +988,6 @@ AC_CACHE_SAVE
988988
opal_show_title "System-specific tests"
989989

990990
OPAL_CHECK_CUDA
991-
##################################
992-
# ROCm support
993-
##################################
994-
OPAL_CHECK_ROCM([opal_rocm],
995-
[opal_rocm_happy="yes"],
996-
[opal_rocm_happy="no"])
997-
OPAL_SUMMARY_ADD([Miscellaneous], [ROCm support], [], [$opal_rocm_happy])
998-
999-
AS_IF([test "$OPAL_CUDA_SUPPORT" = "1" && test "$OPAL_ROCM_SUPPORT" = "1"],
1000-
[AC_MSG_WARN([Cannot support both CUDA and ROCm.])
1001-
AC_MSG_WARN([You must reconfigure Open MPI choosing either CUDA or ROCm .])
1002-
AC_MSG_ERROR([Cannot continue.])])
1003-
1004991
##################################
1005992
OPAL_CHECK_OS_FLAVORS
1006993

ompi/mca/mtl/ofi/mtl_ofi.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,9 @@ int ompi_mtl_ofi_register_buffer(struct opal_convertor_t *convertor,
323323
} else if (0 == strcmp(accelerator_base_selected_component.base_version.mca_component_name, "cuda")) {
324324
attr.iface = FI_HMEM_CUDA;
325325
opal_accelerator.get_device(&attr.device.cuda);
326+
} else if (0 == strcmp(accelerator_base_selected_component.base_version.mca_component_name, "rocm")) {
327+
attr.iface = FI_HMEM_ROCR;
328+
opal_accelerator.get_device(&attr.device.cuda);
326329
} else {
327330
return OPAL_ERROR;
328331
}

ompi/mpiext/rocm/c/mpiext_rocm.c

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -25,29 +25,8 @@
2525
#include "opal/runtime/opal_params.h"
2626
#include "ompi/mpiext/rocm/c/mpiext_rocm_c.h"
2727

28-
#if OPAL_ROCM_SUPPORT
29-
#include "opal/rocm/common_rocm_prototypes.h"
30-
#endif
3128

3229
int MPIX_Query_rocm_support(void)
3330
{
34-
35-
if (!opal_built_with_rocm_support) {
36-
return 0;
37-
} else {
38-
if ( opal_rocm_runtime_initialized ) {
39-
return 1;
40-
}
41-
#if OPAL_ROCM_SUPPORT
42-
// There is a chance that the rocm runtime has simply not
43-
// been initialized yet, since that is done during the first convertor creation
44-
// Invoke a function that will trigger the rocm runtime initialized and
45-
// check the value again after that.
46-
47-
int val1, val2;
48-
mca_common_rocm_check_bufs((char *)&val1, (char *)&val2);
49-
#endif
50-
}
51-
52-
return opal_rocm_runtime_initialized;
31+
return opal_built_with_rocm_support && opal_rocm_runtime_initialized;
5332
}

opal/Makefile.am

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,6 @@
2525
if OPAL_cuda_support
2626
LIBOPAL_GPU_SUBDIR = cuda
2727
LIBOPAL_GPU_LA = cuda/libopalcuda.la
28-
else
29-
if OPAL_rocm_support
30-
LIBOPAL_GPU_SUBDIR = rocm
31-
LIBOPAL_GPU_LA = rocm/libopalrocm.la
32-
endif
3328
endif
3429

3530

@@ -47,7 +42,6 @@ SUBDIRS = \
4742
DIST_SUBDIRS = \
4843
include \
4944
cuda \
50-
rocm \
5145
datatype \
5246
etc \
5347
util \

opal/datatype/opal_convertor.c

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,6 @@ static void opal_convertor_construct(opal_convertor_t *convertor)
7070
convertor->remoteArch = opal_local_arch;
7171
convertor->flags = OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED;
7272
convertor->cbmemcpy = &opal_convertor_accelerator_memcpy;
73-
#if OPAL_ROCM_SUPPORT
74-
convertor->cbmemcpy = &mca_common_rocm_memcpy;
75-
#endif
7673
}
7774

7875
static void opal_convertor_destruct(opal_convertor_t *convertor)
@@ -579,11 +576,6 @@ int32_t opal_convertor_prepare_for_recv(opal_convertor_t *convertor,
579576
if (!(convertor->flags & CONVERTOR_SKIP_ACCELERATOR_INIT)) {
580577
opal_convertor_accelerator_init(convertor, pUserBuf);
581578
}
582-
#if OPAL_ROCM_SUPPORT
583-
if (!(convertor->flags & CONVERTOR_SKIP_ACCELERATOR_INIT)) {
584-
mca_common_rocm_convertor_init(convertor, pUserBuf);
585-
}
586-
#endif
587579

588580
assert(!(convertor->flags & CONVERTOR_SEND));
589581
OPAL_CONVERTOR_PREPARE(convertor, datatype, count, pUserBuf);
@@ -624,11 +616,6 @@ int32_t opal_convertor_prepare_for_send(opal_convertor_t *convertor,
624616
if (!(convertor->flags & CONVERTOR_SKIP_ACCELERATOR_INIT)) {
625617
opal_convertor_accelerator_init(convertor, pUserBuf);
626618
}
627-
#if OPAL_ROCM_SUPPORT
628-
if (!(convertor->flags & CONVERTOR_SKIP_ACCELERATOR_INIT)) {
629-
mca_common_rocm_convertor_init(convertor, pUserBuf);
630-
}
631-
#endif
632619

633620
OPAL_CONVERTOR_PREPARE(convertor, datatype, count, pUserBuf);
634621

opal/datatype/opal_convertor.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ BEGIN_C_DECLS
5050
#define CONVERTOR_NO_OP 0x00100000
5151
#define CONVERTOR_WITH_CHECKSUM 0x00200000
5252
#define CONVERTOR_ACCELERATOR 0x00400000
53-
#define CONVERTOR_ROCM 0x00400000 //same as CUDA on purpose
5453
#define CONVERTOR_ACCELERATOR_ASYNC 0x00800000
5554
#define CONVERTOR_TYPE_MASK 0x10FF0000
5655
#define CONVERTOR_STATE_START 0x01000000

opal/datatype/opal_datatype_pack.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,7 @@ static inline void pack_predefined_data(opal_convertor_t *CONVERTOR, const dt_el
106106
*(COUNT) -= cando_count;
107107

108108
if (_elem->blocklen < 9) {
109-
if (!(CONVERTOR->flags & CONVERTOR_ACCELERATOR) &&
110-
!(CONVERTOR->flags & CONVERTOR_ROCM)
109+
if (!(CONVERTOR->flags & CONVERTOR_ACCELERATOR)
111110
&& OPAL_LIKELY(
112111
OPAL_SUCCESS
113112
== opal_datatype_pack_predefined_element(&_memory, &_packed, cando_count, _elem))) {

opal/datatype/opal_datatype_unpack.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,7 @@ static inline void unpack_predefined_data(opal_convertor_t *CONVERTOR, const dt_
103103
*(COUNT) -= cando_count;
104104

105105
if (_elem->blocklen < 9) {
106-
if (!(CONVERTOR->flags & CONVERTOR_ACCELERATOR) &&
107-
!(CONVERTOR->flags & CONVERTOR_ROCM)
106+
if (!(CONVERTOR->flags & CONVERTOR_ACCELERATOR)
108107
&& OPAL_LIKELY(OPAL_SUCCESS
109108
== opal_datatype_unpack_predefined_element(&_packed, &_memory,
110109
cando_count, _elem))) {

opal/mca/accelerator/rocm/Makefile.am

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#
2+
# Copyright (c) 2014 Intel, Inc. All rights reserved.
3+
# Copyright (c) 2017 IBM Corporation. All rights reserved.
4+
# Copyright (c) 2022 Amazon.com, Inc. or its affiliates.
5+
# All Rights reserved.
6+
# Copyright (c) 2022 Advanced Micro Devices, Inc.
7+
# All Rights reserved.
8+
# $COPYRIGHT$
9+
#
10+
# Additional copyrights may follow
11+
#
12+
# $HEADER$
13+
#
14+
15+
16+
AM_CPPFLAGS = $(common_rocm_CPPFLAGS)
17+
18+
sources = \
19+
accelerator_rocm.h \
20+
accelerator_rocm_component.c \
21+
accelerator_rocm_module.c
22+
23+
# Make the output library in this directory, and name it either
24+
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
25+
# (for static builds).
26+
27+
if MCA_BUILD_opal_accelerator_rocm_DSO
28+
component_noinst =
29+
component_install = mca_accelerator_rocm.la
30+
else
31+
component_noinst = libmca_accelerator_rocm.la
32+
component_install =
33+
endif
34+
35+
mcacomponentdir = $(opallibdir)
36+
mcacomponent_LTLIBRARIES = $(component_install)
37+
mca_accelerator_rocm_la_SOURCES = $(sources)
38+
mca_accelerator_rocm_la_LDFLAGS = -module -avoid-version
39+
mca_accelerator_rocm_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la
40+
41+
noinst_LTLIBRARIES = $(component_noinst)
42+
libmca_accelerator_rocm_la_SOURCES =$(sources)
43+
libmca_accelerator_rocm_la_LDFLAGS = -module -avoid-version

0 commit comments

Comments
 (0)