Skip to content

Commit 97f88e3

Browse files
authored
Merge pull request #10250 from edgargabriel/topic/rocm-support
Improve support for AMD ROCm GPU devices
2 parents 450ae3a + a0193bd commit 97f88e3

26 files changed

+921
-35
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,8 @@ ompi/mpiext/cuda/c/MPIX_Query_cuda_support.3
290290
ompi/mpiext/cuda/c/mpiext_cuda_c.h
291291
ompi/mpiext/cuda/c/cuda_c.h
292292

293+
ompi/mpiext/rocm/c/mpiext_rocm_c.h
294+
293295
ompi/mpiext/pcollreq/c/MPIX_*.3
294296
ompi/mpiext/pcollreq/c/profile/pallgather_init.c
295297
ompi/mpiext/pcollreq/c/profile/pallgatherv_init.c

config/opal_check_rocm.m4

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
dnl
2+
dnl Copyright (C) 2022 Advanced Micro Devices, Inc. All rights reserved.
3+
dnl $COPYRIGHT$
4+
dnl
5+
dnl Additional copyrights may follow
6+
dnl
7+
dnl $HEADER$
8+
dnl
9+
10+
11+
# OMPI_CHECK_ROCM(prefix, [action-if-found], [action-if-not-found])
12+
# --------------------------------------------------------
13+
# check if ROCM support can be found. sets prefix_{CPPFLAGS,
14+
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
15+
# support, otherwise executes action-if-not-found
16+
17+
18+
#
19+
# Check for ROCm support
20+
#
21+
AC_DEFUN([OPAL_CHECK_ROCM],[
22+
23+
OPAL_VAR_SCOPE_PUSH([opal_check_rocm_happy rocm_save_CPPFLAGS rocm_save_LDFLAGS rocm_CPPFLAGS rocm_LDFLAGS])
24+
25+
rocm_save_CPPFLAGS="$CPPFLAGS"
26+
rocm_save_LDFLAGS="$LDFLAGS"
27+
28+
# Get some configuration information
29+
AC_ARG_WITH([rocm],
30+
[AS_HELP_STRING([--with-rocm(=DIR)],
31+
[Build ROCm support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])])
32+
33+
34+
AS_IF([ test -n "$with_rocm" && test "$with_rocm" = "yes" ],
35+
[ with_rocm="/opt/rocm"] )
36+
37+
rocm_CPPFLAGS="-D__HIP_PLATFORM_AMD__"
38+
rocm_LDFLAGS="-L${with_rocm}/lib/hip"
39+
40+
AS_IF([ test -n "$with_rocm" && test "$with_rocm" != "no" ],
41+
[ OPAL_APPEND([CPPFLAGS], [$rocm_CPPFLAGS])
42+
OPAL_APPEND([LDFLAGS], [$rocm_LDFLAGS]) ])
43+
44+
OAC_CHECK_PACKAGE([rocm],
45+
[$1],
46+
[hip/hip_runtime.h],
47+
[amdhip64],
48+
[hipFree],
49+
[opal_check_rocm_happy="yes"],
50+
[opal_check_rocm_happy="no"])
51+
52+
LDFLAGS="$rocm_save_LDFLAGS"
53+
OPAL_APPEND([CPPFLAGS], [${$1_CPPFLAGS}] )
54+
55+
AS_IF([ test "$opal_check_rocm_happy" = "no" ],
56+
[ CPPFLAGS="$rocm_save_CPPFLAGS"])
57+
58+
AS_IF([ test "$opal_check_rocm_happy" = "yes" ],
59+
[ AC_DEFINE_UNQUOTED([OPAL_ROCM_SUPPORT], [1], [Enable ROCm support])
60+
ROCM_SUPPORT=1 ],
61+
[ AC_DEFINE_UNQUOTED([OPAL_ROCM_SUPPORT], [0], [Disable ROCm support])
62+
ROCM_SUPPORT=0 ])
63+
64+
AS_IF([ test "$opal_check_rocm_happy" = "yes" ],
65+
[$2],
66+
[AS_IF([test -n "$with_rocm" && test "$with_rocm" != "no"],
67+
[AC_MSG_ERROR([ROCm support requested but not found. Aborting])])
68+
$3])
69+
70+
AM_CONDITIONAL([OPAL_rocm_support], [test "$opal_check_rocm_happy" = "yes"])
71+
OPAL_VAR_SCOPE_POP
72+
])
73+
74+
AC_DEFUN([OPAL_CHECK_ROCM_AFTER_OPAL_DL],[
75+
# We cannot have ROCm support without OPAL DL support. Error out
76+
# if the user wants Rocm but we do not have OPAL DL support.
77+
AS_IF([test $OPAL_HAVE_DL_SUPPORT -eq 0 && test "$opal_check_rocm_happy" = "yes"],
78+
[AC_MSG_WARN([--with-rocm was specified, but dlopen support is disabled.])
79+
AC_MSG_WARN([You must reconfigure Open MPI with dlopen ("dl") support.])
80+
AC_MSG_ERROR([Cannot continue.])])
81+
82+
])

config/opal_config_files.m4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ AC_DEFUN([OPAL_CONFIG_FILES],[
1818
AC_CONFIG_FILES([
1919
opal/Makefile
2020
opal/cuda/Makefile
21+
opal/rocm/Makefile
2122
opal/etc/Makefile
2223
opal/include/Makefile
2324
opal/datatype/Makefile

configure.ac

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,8 +1003,24 @@ AC_CACHE_SAVE
10031003
opal_show_title "System-specific tests"
10041004

10051005
OPAL_CHECK_CUDA
1006+
##################################
1007+
# ROCm support
1008+
##################################
1009+
OPAL_CHECK_ROCM([opal_rocm],
1010+
[opal_rocm_happy="yes"],
1011+
[opal_rocm_happy="no"])
1012+
OPAL_SUMMARY_ADD([Miscellaneous], [ROCm suport], [], [$opal_rocm_happy])
1013+
1014+
AS_IF([test "$OPAL_CUDA_SUPPORT" = "1" && test "$OPAL_ROCM_SUPPORT" = "1"],
1015+
[AC_MSG_WARN([Cannot support both CUDA and ROCm.])
1016+
AC_MSG_WARN([You must reconfigure Open MPI choosing either CUDA or ROCm .])
1017+
AC_MSG_ERROR([Cannot continue.])])
1018+
1019+
##################################
10061020
OPAL_CHECK_OS_FLAVORS
10071021

1022+
1023+
10081024
# Do we have _SC_NPROCESSORS_ONLN? (only going to pass if we also have
10091025
# <unistd.h> and sysconf(), which is ok) OS X 10.4 has <unistd.h> and
10101026
# sysconf(), but does not have _SC_NPROCESSORS_ONLN. Doh!
@@ -1247,6 +1263,8 @@ AC_CACHE_SAVE
12471263

12481264
OPAL_CHECK_CUDA_AFTER_OPAL_DL
12491265

1266+
OPAL_CHECK_ROCM_AFTER_OPAL_DL
1267+
12501268
##################################
12511269
# MPI Extended Interfaces
12521270
##################################
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
.. _mpix_query_rocm_support:
2+
3+
4+
MPIX_Query_rocm_support
5+
=======================
6+
7+
.. include_body
8+
9+
**MPIX_Query_rocm_support** - Returns 1 if there is AMD ROCm aware support
10+
and 0 if there is not.
11+
12+
13+
SYNTAX
14+
------
15+
16+
17+
C Syntax
18+
^^^^^^^^
19+
20+
.. code-block:: c
21+
22+
#include <mpi.h>
23+
#include <mpi-ext.h>
24+
25+
int MPIX_Query_rocm_support(void)
26+
27+
28+
Fortran Syntax
29+
^^^^^^^^^^^^^^
30+
31+
There is no Fortran binding for this function.
32+
33+
34+
C++ Syntax
35+
^^^^^^^^^^
36+
37+
There is no C++ binding for this function.
38+
39+
40+
DESCRIPTION
41+
-----------
42+
43+
This routine return 1 if MPI library is build with ROCm and runtime
44+
supports ROCm buffers. This routine must be called after MPI is
45+
initialized by a call to :ref:`MPI_Init` or :ref:`MPI_Init_thread`.
46+
47+
48+
Examples
49+
^^^^^^^^
50+
51+
::
52+
53+
54+
#include <stdio.h>
55+
#include "mpi.h"
56+
57+
#include "mpi-ext.h" /* Needed for ROCm-aware check */
58+
59+
int main(int argc, char *argv[])
60+
{
61+
62+
MPI_Init(&argc, &argv);
63+
64+
if (MPIX_Query_rocm_support()) {
65+
printf("This MPI library has ROCm-aware support.);
66+
} else {
67+
printf("This MPI library does not have ROCm-aware support.);
68+
}
69+
MPI_Finalize();
70+
71+
return 0;
72+
}

docs/man-openmpi/man3/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,4 +466,5 @@ MPI API manual pages (section 3)
466466
MPI_Wtick.3.rst
467467
MPI_Wtime.3.rst
468468
MPIX_Query_cuda_support.3.rst
469+
MPIX_Query_rocm_support.3.rst
469470
OMPI_Affinity_str.3.rst

ompi/mpiext/rocm/Makefile.am

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#
2+
# Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana
3+
# University Research and Technology
4+
# Corporation. All rights reserved.
5+
# Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved.
6+
# Copyright (c) 2015 NVIDIA, Inc. All rights reserved
7+
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved
8+
# $COPYRIGHT$
9+
#
10+
# Additional copyrights may follow
11+
#
12+
# $HEADER$
13+
#
14+
15+
# This Makefile is not traversed during a normal "make all" in an OMPI
16+
# build. It *is* traversed during "make dist", however. So you can
17+
# put EXTRA_DIST targets in here.
18+
#
19+
# You can also use this as a convenience for building this MPI
20+
# extension (i.e., "make all" in this directory to invoke "make all"
21+
# in all the subdirectories).
22+
23+
SUBDIRS = c

ompi/mpiext/rocm/c/Makefile.am

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#
2+
# Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana
3+
# University Research and Technology
4+
# Corporation. All rights reserved.
5+
# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved.
6+
# Copyright (c) 2015 NVIDIA, Inc. All rights reserved.
7+
# Copyright (c) 2018 Research Organization for Information Science
8+
# and Technology (RIST). All rights reserved.
9+
# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
10+
#
11+
# $COPYRIGHT$
12+
#
13+
# Additional copyrights may follow
14+
#
15+
# $HEADER$
16+
#
17+
18+
# This file builds the C bindings for MPI extensions. It must be
19+
# present in all MPI extensions.
20+
21+
# We must set these #defines so that the inner OMPI MPI prototype
22+
# header files do the Right Thing.
23+
AM_CPPFLAGS = -DOMPI_PROFILE_LAYER=0 -DOMPI_COMPILING_FORTRAN_WRAPPERS=1
24+
25+
# Convenience libtool library that will be slurped up into libmpi.la.
26+
noinst_LTLIBRARIES = libmpiext_rocm_c.la
27+
28+
# This is where the top-level header file (that is included in
29+
# <mpi-ext.h>) must be installed.
30+
ompidir = $(ompiincludedir)/mpiext
31+
32+
# This is the header file that is installed.
33+
nodist_ompi_HEADERS = mpiext_rocm_c.h
34+
35+
# Sources for the convenience libtool library. Other than the one
36+
# header file, all source files in the extension have no file naming
37+
# conventions.
38+
libmpiext_rocm_c_la_SOURCES = \
39+
$(ompi_HEADERS) \
40+
mpiext_rocm.c
41+
libmpiext_rocm_c_la_LDFLAGS = -module -avoid-version
42+

ompi/mpiext/rocm/c/mpiext_rocm.c

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana
3+
* University Research and Technology
4+
* Corporation. All rights reserved.
5+
* Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved.
6+
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
7+
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
8+
* reserved.
9+
* Copyright (c) 2015 NVIDIA, Inc. All rights reserved.
10+
* Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
11+
* $COPYRIGHT$
12+
*
13+
* Additional copyrights may follow
14+
*
15+
* $HEADER$
16+
*
17+
*/
18+
19+
#include "ompi_config.h"
20+
21+
#include <stdio.h>
22+
#include <string.h>
23+
24+
#include "opal/constants.h"
25+
#include "opal/runtime/opal_params.h"
26+
#include "ompi/mpiext/rocm/c/mpiext_rocm_c.h"
27+
28+
#if OPAL_ROCM_SUPPORT
29+
#include "opal/rocm/common_rocm_prototypes.h"
30+
#endif
31+
32+
int MPIX_Query_rocm_support(void)
33+
{
34+
35+
if (!opal_built_with_rocm_support) {
36+
return 0;
37+
} else {
38+
if ( opal_rocm_runtime_initialized ) {
39+
return 1;
40+
}
41+
#if OPAL_ROCM_SUPPORT
42+
// There is a chance that the rocm runtime has simply not
43+
// been initialized yet, since that is done during the first convertor creation
44+
// Invoke a function that will trigger the rocm runtime initialized and
45+
// check the value again after that.
46+
47+
int val1, val2;
48+
mca_common_rocm_check_bufs((char *)&val1, (char *)&val2);
49+
#endif
50+
}
51+
52+
return opal_rocm_runtime_initialized;
53+
}

ompi/mpiext/rocm/c/mpiext_rocm_c.h.in

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
/*
2+
* Copyright (c) 2004-2009 The Trustees of Indiana University.
3+
* All rights reserved.
4+
* Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved.
5+
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
6+
* Copyright (c) 2015 NVIDIA, Inc. All rights reserved.
7+
* Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
8+
* $COPYRIGHT$
9+
*
10+
* Additional copyrights may follow
11+
*
12+
* $HEADER$
13+
*
14+
*/
15+
16+
#define MPIX_ROCM_AWARE_SUPPORT @MPIX_ROCM_AWARE_SUPPORT@
17+
OMPI_DECLSPEC int MPIX_Query_rocm_support(void);

0 commit comments

Comments
 (0)