Skip to content

Commit 6153301

Browse files
authored
Merge pull request #12559 from bosilca/topic/add_SVE_op_support
Add AARCH64 support for MPI_OP.
2 parents b38f53a + 3e02132 commit 6153301

File tree

6 files changed

+1129
-1
lines changed

6 files changed

+1129
-1
lines changed

ompi/mca/op/aarch64/Makefile.am

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#
2+
# Copyright (c) 2019 The University of Tennessee and The University
3+
# of Tennessee Research Foundation. All rights
4+
# reserved.
5+
# Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
6+
# $COPYRIGHT$
7+
#
8+
# Additional copyrights may follow
9+
#
10+
# $HEADER$
11+
#
12+
13+
# This is an aarch64 op component. This Makefile.am is a typical
14+
# aarch64 of how to integrate into Open MPI's Automake-based build
15+
# system.
16+
#
17+
# See https://github.com/open-mpi/ompi/wiki/devel-CreateComponent
18+
# for more details on how to make Open MPI components.
19+
20+
# First, list all .h and .c sources. It is necessary to list all .h
21+
# files so that they will be picked up in the distribution tarball.
22+
23+
sources = \
24+
op_aarch64.h \
25+
op_aarch64_component.c
26+
27+
# Open MPI components can be compiled two ways:
28+
#
29+
# 1. As a standalone dynamic shared object (DSO), sometimes called a
30+
# dynamically loadable library (DLL).
31+
#
32+
# 2. As a static library that is slurped up into the upper-level
33+
# libmpi library (regardless of whether libmpi is a static or dynamic
34+
# library). This is called a "Libtool convenience library".
35+
#
36+
# The component needs to create an output library in this top-level
37+
# component directory, and named either mca_<type>_<name>.la (for DSO
38+
# builds) or libmca_<type>_<name>.la (for static builds). The OMPI
39+
# build system will have set the
40+
# MCA_BUILD_ompi_<framework>_<component>_DSO AM_CONDITIONAL to indicate
41+
# which way this component should be built.
42+
specialized_op_libs =
43+
if MCA_BUILD_ompi_op_has_neon_support
44+
specialized_op_libs += liblocal_ops_neon.la
45+
liblocal_ops_neon_la_SOURCES = op_aarch64_functions.c
46+
liblocal_ops_neon_la_CPPFLAGS = -DGENERATE_NEON_CODE
47+
endif
48+
if MCA_BUILD_ompi_op_has_sve_support
49+
specialized_op_libs += liblocal_ops_sve.la
50+
liblocal_ops_sve_la_SOURCES = op_aarch64_functions.c
51+
liblocal_ops_sve_la_CPPFLAGS = -DGENERATE_SVE_CODE
52+
endif
53+
54+
component_noinst = $(specialized_op_libs)
55+
if MCA_BUILD_ompi_op_aarch64_DSO
56+
component_install = mca_op_aarch64.la
57+
else
58+
component_install =
59+
component_noinst += libmca_op_aarch64.la
60+
endif
61+
62+
# Specific information for DSO builds.
63+
#
64+
# The DSO should install itself in $(ompilibdir) (by default,
65+
# $prefix/lib/openmpi).
66+
67+
mcacomponentdir = $(ompilibdir)
68+
mcacomponent_LTLIBRARIES = $(component_install)
69+
mca_op_aarch64_la_SOURCES = $(sources)
70+
mca_op_aarch64_la_LIBADD = $(specialized_op_libs)
71+
mca_op_aarch64_la_LDFLAGS = -module -avoid-version $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la
72+
73+
74+
# Specific information for static builds.
75+
#
76+
# Note that we *must* "noinst"; the upper-layer Makefile.am's will
77+
# slurp in the resulting .la library into libmpi.
78+
79+
noinst_LTLIBRARIES = $(component_noinst)
80+
libmca_op_aarch64_la_SOURCES = $(sources)
81+
libmca_op_aarch64_la_LIBADD = $(specialized_op_libs)
82+
libmca_op_aarch64_la_LDFLAGS = -module -avoid-version

ompi/mca/op/aarch64/configure.m4

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
# -*- shell-script -*-
2+
#
3+
# Copyright (c) 2019-2020 The University of Tennessee and The University
4+
# of Tennessee Research Foundation. All rights
5+
# reserved.
6+
# Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
7+
#
8+
# $COPYRIGHT$
9+
#
10+
# Additional copyrights may follow
11+
#
12+
# $HEADER$
13+
#
14+
15+
# MCA_ompi_op_arm_CONFIG([action-if-can-compile],
16+
# [action-if-cant-compile])
17+
# ------------------------------------------------
18+
AC_DEFUN([MCA_ompi_op_aarch64_CONFIG],[
19+
AC_CONFIG_FILES([ompi/mca/op/aarch64/Makefile])
20+
case "${host}" in
21+
aarch64*|arm64*)
22+
op_aarch64_check="yes";;
23+
*)
24+
op_aarch64_check="no";;
25+
esac
26+
AS_IF([test "$op_aarch64_check" = "yes"],
27+
[AC_LANG_PUSH([C])
28+
29+
#
30+
# Check for NEON support
31+
#
32+
AC_CACHE_CHECK([for NEON support], op_cv_neon_support,
33+
[
34+
AC_LINK_IFELSE(
35+
[AC_LANG_PROGRAM([[
36+
#if defined(__aarch64__) && defined(__ARM_NEON)
37+
#include <arm_neon.h>
38+
#else
39+
#error "No support for __aarch64__"
40+
#endif
41+
]],
42+
[[
43+
#if defined(__aarch64__) && defined(__ARM_NEON)
44+
int32x4_t vA;
45+
vA = vmovq_n_s32(0)
46+
#endif
47+
]])],
48+
[op_cv_neon_support=yes],
49+
[op_cv_neon_support=no])])
50+
51+
#
52+
# Check for NEON FP support
53+
#
54+
AC_CACHE_CHECK([for NEON FP support], op_cv_neon_fp_support,
55+
[AS_IF([test "$op_cv_neon_support" = "yes"],
56+
[
57+
AC_LINK_IFELSE(
58+
[AC_LANG_PROGRAM([[
59+
#if defined(__aarch64__) && defined(__ARM_NEON) && (defined(__ARM_NEON_FP) || defined(__ARM_FP))
60+
#include <arm_neon.h>
61+
#else
62+
#error "No support for __aarch64__ or NEON FP"
63+
#endif
64+
]],
65+
[[
66+
#if defined(__aarch64__) && defined(__ARM_NEON) && (defined(__ARM_NEON_FP) || defined(__ARM_FP))
67+
float32x4_t vA;
68+
vA = vmovq_n_f32(0)
69+
#endif
70+
]])],
71+
[op_cv_neon_fp_support=yes],
72+
[op_cv_neon_fp_support=no])])])
73+
74+
#
75+
# Check for SVE support
76+
#
77+
AC_CACHE_CHECK([for SVE support], op_cv_sve_support,
78+
[AS_IF([test "$op_cv_neon_support" = "yes"],
79+
[
80+
AC_LINK_IFELSE(
81+
[AC_LANG_PROGRAM([[
82+
#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
83+
#include <arm_sve.h>
84+
#else
85+
#error "No support for __aarch64__ or SVE"
86+
#endif
87+
]],
88+
[[
89+
#if defined(__aarch64__) && defined(_ARM_FEATURE_SVE)
90+
svfloat32_t vA;
91+
vA = svdup_n_f32(0)
92+
#endif
93+
]])],
94+
[op_cv_sve_support=yes],
95+
[op_cv_sve_support=no])])])
96+
])
97+
98+
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_neon_support],
99+
[test "$op_cv_neon_support" = "yes"])
100+
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_neon_fp_support],
101+
[test "$op_cv_neon_fp_support" = "yes"])
102+
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_sve_support],
103+
[test "$op_cv_sve_support" = "yes"])
104+
AC_SUBST(MCA_BUILD_ompi_op_has_neon_support)
105+
AC_SUBST(MCA_BUILD_ompi_op_has_neon_fp_support)
106+
AC_SUBST(MCA_BUILD_ompi_op_has_sve_support)
107+
108+
AS_IF([test "$op_cv_neon_support" = "yes"],
109+
[AC_DEFINE([OMPI_MCA_OP_HAVE_NEON], [1],[NEON supported in the current build])])
110+
AS_IF([test "$op_cv_neon_fp_support" = "yes"],
111+
[AC_DEFINE([OMPI_MCA_OP_HAVE_NEON_FP], [1],[NEON FP supported in the current build])])
112+
AS_IF([test "$op_cv_sve_support" = "yes"],
113+
[AC_DEFINE([OMPI_MCA_OP_HAVE_SVE], [1],[SVE supported in the current build])])
114+
115+
# If we have at least support for Neon
116+
AS_IF([test "$op_cv_neon_support" = "yes"],
117+
[$1],
118+
[$2])
119+
])dnl

ompi/mca/op/aarch64/op_aarch64.h

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Copyright (c) 2019 The University of Tennessee and The University
3+
* of Tennessee Research Foundation. All rights
4+
* reserved.
5+
* Copyright (c) 2019 Arm Ltd. All rights reserved.
6+
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
7+
*
8+
* $COPYRIGHT$
9+
*
10+
* Additional copyrights may follow
11+
*
12+
* $HEADER$
13+
*/
14+
15+
#ifndef MCA_OP_AARCH64_EXPORT_H
16+
#define MCA_OP_AARCH64_EXPORT_H
17+
18+
#include "ompi_config.h"
19+
20+
#include "ompi/mca/mca.h"
21+
#include "opal/class/opal_object.h"
22+
23+
#include "ompi/mca/op/op.h"
24+
25+
BEGIN_C_DECLS
26+
27+
/**
28+
* Derive a struct from the base op component struct, allowing us to
29+
* cache some component-specific information on our well-known
30+
* component struct.
31+
*/
32+
typedef struct {
33+
/** The base op component struct */
34+
ompi_op_base_component_1_0_0_t super;
35+
36+
/* What follows is aarch64-component-specific cached information. We
37+
tend to use this scheme (caching information on the aarch64
38+
component itself) instead of lots of individual global
39+
variables for the component. */
40+
41+
/** A simple boolean indicating that the hardware is available. */
42+
uint32_t hardware_available;
43+
44+
/** A simple boolean indicating whether double precision is
45+
supported. */
46+
bool double_supported;
47+
} ompi_op_aarch64_component_t;
48+
49+
/**
50+
* Globally exported variable. Note that it is a *aarch64* component
51+
* (defined above), which has the ompi_op_base_component_t as its
52+
* first member. Hence, the MCA/op framework will find the data that
53+
* it expects in the first memory locations, but then the component
54+
* itself can cache additional information after that that can be used
55+
* by both the component and modules.
56+
*/
57+
OMPI_DECLSPEC extern ompi_op_aarch64_component_t
58+
mca_op_aarch64_component;
59+
60+
END_C_DECLS
61+
62+
#endif /* MCA_OP_AARCH64_EXPORT_H */

0 commit comments

Comments
 (0)