Skip to content

Commit 1f237f5

Browse files
authored
Merge pull request #7419 from bosilca/topic/avx512
Add support for AVX512/AVX2/SSE/MMX
2 parents d0c0cb7 + 14b3c70 commit 1f237f5

File tree

12 files changed

+3543
-23
lines changed

12 files changed

+3543
-23
lines changed

config/opal_config_asm.m4

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ dnl
22
dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33
dnl University Research and Technology
44
dnl Corporation. All rights reserved.
5-
dnl Copyright (c) 2004-2018 The University of Tennessee and The University
5+
dnl Copyright (c) 2004-2020 The University of Tennessee and The University
66
dnl of Tennessee Research Foundation. All rights
77
dnl reserved.
88
dnl Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@@ -1255,7 +1255,7 @@ AC_DEFUN([OPAL_CONFIG_ASM],[
12551255
12561256
# Check for RDTSCP support
12571257
result=0
1258-
AS_IF([test "$opal_cv_asm_arch" = "OPAL_X86_64" || test "$opal_cv_asm_arch" = "OPAL_IA32"],
1258+
AS_IF([test "$opal_cv_asm_arch" = "X86_64" || test "$opal_cv_asm_arch" = "IA32"],
12591259
[AC_MSG_CHECKING([for RDTSCP assembly support])
12601260
AC_LANG_PUSH([C])
12611261
AC_TRY_RUN([[

ompi/mca/op/avx/Makefile.am

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
#
2+
# Copyright (c) 2019-2020 The University of Tennessee and The University
3+
# of Tennessee Research Foundation. All rights
4+
# reserved.
5+
# Copyright (c) 2020 Research Organization for Information Science
6+
# and Technology (RIST). All rights reserved.
7+
# $COPYRIGHT$
8+
#
9+
# Additional copyrights may follow
10+
#
11+
# $HEADER$
12+
#
13+
14+
# This component provide support for the Advanced Vector Extensions (AVX)
15+
# available in recent versions of x86 processors.
16+
#
17+
# See https://github.com/open-mpi/ompi/wiki/devel-CreateComponent
18+
# for more details on how to make Open MPI components.
19+
20+
# First, list all .h and .c sources. It is necessary to list all .h
21+
# files so that they will be picked up in the distribution tarball.
22+
23+
sources = op_avx_component.c op_avx.h
24+
sources_extended = op_avx_functions.c
25+
26+
# Open MPI components can be compiled two ways:
27+
#
28+
# 1. As a standalone dynamic shared object (DSO), sometimes called a
29+
# dynamically loadable library (DLL).
30+
#
31+
# 2. As a static library that is slurped up into the upper-level
32+
# libmpi library (regardless of whether libmpi is a static or dynamic
33+
# library). This is called a "Libtool convenience library".
34+
#
35+
# The component needs to create an output library in this top-level
36+
# component directory, and named either mca_<type>_<name>.la (for DSO
37+
# builds) or libmca_<type>_<name>.la (for static builds). The OMPI
38+
# build system will have set the
39+
# MCA_BUILD_ompi_<framework>_<component>_DSO AM_CONDITIONAL to indicate
40+
# which way this component should be built.
41+
42+
# We need to support all processors from early AVX to full AVX512 support, based on
43+
# a decision made at runtime. So, we generate all combinations of capabilities, and
44+
# we will select the most suitable (based on the processor flags) during the
45+
# component initialization.
46+
specialized_op_libs =
47+
if MCA_BUILD_ompi_op_has_avx_support
48+
specialized_op_libs += liblocal_ops_avx.la
49+
liblocal_ops_avx_la_SOURCES = $(sources_extended)
50+
liblocal_ops_avx_la_CFLAGS = @MCA_BUILD_OP_AVX_FLAGS@
51+
liblocal_ops_avx_la_CPPFLAGS = -DGENERATE_AVX_CODE
52+
if MCA_BUILD_ompi_op_has_sse3_support
53+
liblocal_ops_avx_la_CPPFLAGS += -DGENERATE_SSE3_CODE
54+
endif
55+
if MCA_BUILD_ompi_op_has_sse41_support
56+
liblocal_ops_avx_la_CPPFLAGS += -DGENERATE_SSE41_CODE
57+
endif
58+
endif
59+
if MCA_BUILD_ompi_op_has_avx2_support
60+
specialized_op_libs += liblocal_ops_avx2.la
61+
liblocal_ops_avx2_la_SOURCES = $(sources_extended)
62+
liblocal_ops_avx2_la_CFLAGS = @MCA_BUILD_OP_AVX2_FLAGS@
63+
liblocal_ops_avx2_la_CPPFLAGS = -DGENERATE_SSE3_CODE -DGENERATE_SSE41_CODE -DGENERATE_AVX_CODE -DGENERATE_AVX2_CODE
64+
endif
65+
if MCA_BUILD_ompi_op_has_avx512_support
66+
specialized_op_libs += liblocal_ops_avx512.la
67+
liblocal_ops_avx512_la_SOURCES = $(sources_extended)
68+
liblocal_ops_avx512_la_CFLAGS = @MCA_BUILD_OP_AVX512_FLAGS@
69+
liblocal_ops_avx512_la_CPPFLAGS = -DGENERATE_SSE3_CODE -DGENERATE_SSE41_CODE -DGENERATE_AVX_CODE -DGENERATE_AVX2_CODE -DGENERATE_AVX512_CODE
70+
endif
71+
72+
component_noinst = $(specialized_op_libs)
73+
if MCA_BUILD_ompi_op_avx_DSO
74+
component_install = mca_op_avx.la
75+
else
76+
component_install =
77+
component_noinst += libmca_op_avx.la
78+
endif
79+
80+
# Specific information for DSO builds.
81+
#
82+
# The DSO should install itself in $(ompilibdir) (by default,
83+
# $prefix/lib/openmpi).
84+
85+
mcacomponentdir = $(ompilibdir)
86+
mcacomponent_LTLIBRARIES = $(component_install)
87+
mca_op_avx_la_SOURCES = $(sources)
88+
mca_op_avx_la_LIBADD = $(specialized_op_libs)
89+
mca_op_avx_la_LDFLAGS = -module -avoid-version
90+
91+
92+
# Specific information for static builds.
93+
#
94+
# Note that we *must* "noinst"; the upper-layer Makefile.am's will
95+
# slurp in the resulting .la library into libmpi.
96+
97+
noinst_LTLIBRARIES = $(component_noinst)
98+
libmca_op_avx_la_SOURCES = $(sources)
99+
libmca_op_avx_la_LIBADD = $(specialized_op_libs)
100+
libmca_op_avx_la_LDFLAGS = -module -avoid-version
101+

ompi/mca/op/avx/configure.m4

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
# -*- shell-script -*-
2+
#
3+
# Copyright (c) 2019-2020 The University of Tennessee and The University
4+
# of Tennessee Research Foundation. All rights
5+
# reserved.
6+
# Copyright (c) 2020 Cisco Systems, Inc. All rights reserved.
7+
#
8+
# $COPYRIGHT$
9+
#
10+
# Additional copyrights may follow
11+
#
12+
# $HEADER$
13+
#
14+
15+
# MCA_ompi_op_avx_CONFIG([action-if-can-compile],
16+
# [action-if-cant-compile])
17+
# ------------------------------------------------
18+
# We can always build, unless we were explicitly disabled.
19+
AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
20+
AC_CONFIG_FILES([ompi/mca/op/avx/Makefile])
21+
22+
MCA_BUILD_OP_AVX_FLAGS=""
23+
MCA_BUILD_OP_AVX2_FLAGS=""
24+
MCA_BUILD_OP_AVX512_FLAGS=""
25+
op_sse3_support=0
26+
op_sse41_support=0
27+
op_avx_support=0
28+
op_avx2_support=0
29+
op_avx512_support=0
30+
OPAL_VAR_SCOPE_PUSH([op_avx_cflags_save])
31+
32+
AS_IF([test "$opal_cv_asm_arch" = "X86_64"],
33+
[AC_LANG_PUSH([C])
34+
35+
#
36+
# Check for AVX512 support
37+
#
38+
AC_MSG_CHECKING([for AVX512 support (no additional flags)])
39+
AC_LINK_IFELSE(
40+
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
41+
[[
42+
__m512 vA, vB;
43+
_mm512_add_ps(vA, vB)
44+
]])],
45+
[op_avx512_support=1
46+
AC_MSG_RESULT([yes])],
47+
[AC_MSG_RESULT([no])])
48+
49+
AS_IF([test $op_avx512_support -eq 0],
50+
[AC_MSG_CHECKING([for AVX512 support (with -march=skylake-avx512)])
51+
op_avx_cflags_save="$CFLAGS"
52+
CFLAGS="$CFLAGS -march=skylake-avx512"
53+
AC_LINK_IFELSE(
54+
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
55+
[[
56+
__m512 vA, vB;
57+
_mm512_add_ps(vA, vB)
58+
]])],
59+
[op_avx512_support=1
60+
MCA_BUILD_OP_AVX512_FLAGS="-march=skylake-avx512"
61+
AC_MSG_RESULT([yes])],
62+
[AC_MSG_RESULT([no])])
63+
CFLAGS="$op_avx_cflags_save"
64+
])
65+
#
66+
# Some combination of gcc and older as would not correctly build the code generated by
67+
# _mm256_loadu_si256. Screen them out.
68+
#
69+
AS_IF([test $op_avx512_support -eq 1],
70+
[AC_MSG_CHECKING([if _mm512_loadu_si512 generates code that can be compiled])
71+
op_avx_cflags_save="$CFLAGS"
72+
CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX512_FLAGS"
73+
AC_LINK_IFELSE(
74+
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
75+
[[
76+
int A[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
77+
__m512i vA = _mm512_loadu_si512((__m512i*)&(A[1]))
78+
]])],
79+
[AC_MSG_RESULT([yes])],
80+
[op_avx512_support=0
81+
MCA_BUILD_OP_AVX512_FLAGS=""
82+
AC_MSG_RESULT([no])])
83+
CFLAGS="$op_avx_cflags_save"
84+
])
85+
#
86+
# Check support for AVX2
87+
#
88+
AC_MSG_CHECKING([for AVX2 support (no additional flags)])
89+
AC_LINK_IFELSE(
90+
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
91+
[[
92+
__m256 vA, vB;
93+
_mm256_add_ps(vA, vB)
94+
]])],
95+
[op_avx2_support=1
96+
AC_MSG_RESULT([yes])],
97+
[AC_MSG_RESULT([no])])
98+
AS_IF([test $op_avx2_support -eq 0],
99+
[AC_MSG_CHECKING([for AVX2 support (with -mavx2)])
100+
op_avx_cflags_save="$CFLAGS"
101+
CFLAGS="$CFLAGS -mavx2"
102+
AC_LINK_IFELSE(
103+
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
104+
[[
105+
__m256 vA, vB;
106+
_mm256_add_ps(vA, vB)
107+
]])],
108+
[op_avx2_support=1
109+
MCA_BUILD_OP_AVX2_FLAGS="-mavx2"
110+
AC_MSG_RESULT([yes])],
111+
[AC_MSG_RESULT([no])])
112+
CFLAGS="$op_avx_cflags_save"
113+
])
114+
#
115+
# Some combination of gcc and older as would not correctly build the code generated by
116+
# _mm256_loadu_si256. Screen them out.
117+
#
118+
AS_IF([test $op_avx2_support -eq 1],
119+
[AC_MSG_CHECKING([if _mm256_loadu_si256 generates code that can be compiled])
120+
op_avx_cflags_save="$CFLAGS"
121+
CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX2_FLAGS"
122+
AC_LINK_IFELSE(
123+
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
124+
[[
125+
int A[8] = {0, 1, 2, 3, 4, 5, 6, 7};
126+
__m256i vA = _mm256_loadu_si256((__m256i*)&A)
127+
]])],
128+
[AC_MSG_RESULT([yes])],
129+
[op_avx2_support=0
130+
MCA_BUILD_OP_AVX2_FLAGS=""
131+
AC_MSG_RESULT([no])])
132+
CFLAGS="$op_avx_cflags_save"
133+
])
134+
#
135+
# What about early AVX support. The rest of the logic is slightly different as
136+
# we need to include some of the SSE4.1 and SSE3 instructions. So, we first check
137+
# if we can compile AVX code without a flag, then we validate that we have support
138+
# for the SSE4.1 and SSE3 instructions we need. If not, we check for the usage of
139+
# the AVX flag, and then recheck if we have support for the SSE4.1 and SSE3
140+
# instructions.
141+
#
142+
AC_MSG_CHECKING([for AVX support (no additional flags)])
143+
AC_LINK_IFELSE(
144+
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
145+
[[
146+
__m128 vA, vB;
147+
_mm_add_ps(vA, vB)
148+
]])],
149+
[op_avx_support=1
150+
AC_MSG_RESULT([yes])],
151+
[AC_MSG_RESULT([no])])
152+
#
153+
# Check for SSE4.1 support
154+
#
155+
AS_IF([test $op_avx_support -eq 1],
156+
[AC_MSG_CHECKING([for SSE4.1 support])
157+
AC_LINK_IFELSE(
158+
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
159+
[[
160+
__m128i vA, vB;
161+
(void)_mm_max_epi8(vA, vB)
162+
]])],
163+
[op_sse41_support=1
164+
AC_MSG_RESULT([yes])],
165+
[AC_MSG_RESULT([no])])
166+
])
167+
#
168+
# Check for SSE3 support
169+
#
170+
AS_IF([test $op_avx_support -eq 1],
171+
[AC_MSG_CHECKING([for SSE3 support])
172+
AC_LINK_IFELSE(
173+
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
174+
[[
175+
int A[4] = {0, 1, 2, 3};
176+
__m128i vA = _mm_lddqu_si128((__m128i*)&A)
177+
]])],
178+
[op_sse3_support=1
179+
AC_MSG_RESULT([yes])],
180+
[AC_MSG_RESULT([no])])
181+
])
182+
# Second pass, do we need to add the AVX flag ?
183+
AS_IF([test $op_avx_support -eq 0 || test $op_sse41_support -eq 0 || test $op_sse3_support -eq 0],
184+
[AC_MSG_CHECKING([for AVX support (with -mavx)])
185+
op_avx_cflags_save="$CFLAGS"
186+
CFLAGS="$CFLAGS -mavx"
187+
AC_LINK_IFELSE(
188+
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
189+
[[
190+
__m128 vA, vB;
191+
_mm_add_ps(vA, vB)
192+
]])],
193+
[op_avx_support=1
194+
MCA_BUILD_OP_AVX_FLAGS="-mavx"
195+
op_sse41_support=0
196+
op_sse3_support=0
197+
AC_MSG_RESULT([yes])],
198+
[AC_MSG_RESULT([no])])
199+
200+
AS_IF([test $op_sse41_support -eq 0],
201+
[AC_MSG_CHECKING([for SSE4.1 support])
202+
AC_LINK_IFELSE(
203+
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
204+
[[
205+
__m128i vA, vB;
206+
(void)_mm_max_epi8(vA, vB)
207+
]])],
208+
[op_sse41_support=1
209+
AC_MSG_RESULT([yes])],
210+
[AC_MSG_RESULT([no])])
211+
])
212+
AS_IF([test $op_sse3_support -eq 0],
213+
[AC_MSG_CHECKING([for SSE3 support])
214+
AC_LINK_IFELSE(
215+
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
216+
[[
217+
int A[4] = {0, 1, 2, 3};
218+
__m128i vA = _mm_lddqu_si128((__m128i*)&A)
219+
]])],
220+
[op_sse3_support=1
221+
AC_MSG_RESULT([yes])],
222+
[AC_MSG_RESULT([no])])
223+
])
224+
CFLAGS="$op_avx_cflags_save"
225+
])
226+
227+
AC_LANG_POP([C])
228+
])
229+
AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_AVX512],
230+
[$op_avx512_support],
231+
[AVX512 supported in the current build])
232+
AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_AVX2],
233+
[$op_avx2_support],
234+
[AVX2 supported in the current build])
235+
AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_AVX],
236+
[$op_avx_support],
237+
[AVX supported in the current build])
238+
AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_SSE41],
239+
[$op_sse41_support],
240+
[SSE4.1 supported in the current build])
241+
AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_SSE3],
242+
[$op_sse3_support],
243+
[SSE3 supported in the current build])
244+
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_avx512_support],
245+
[test "$op_avx512_support" == "1"])
246+
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_avx2_support],
247+
[test "$op_avx2_support" == "1"])
248+
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_avx_support],
249+
[test "$op_avx_support" == "1"])
250+
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_sse41_support],
251+
[test "$op_sse41_support" == "1"])
252+
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_sse3_support],
253+
[test "$op_sse3_support" == "1"])
254+
AC_SUBST(MCA_BUILD_OP_AVX512_FLAGS)
255+
AC_SUBST(MCA_BUILD_OP_AVX2_FLAGS)
256+
AC_SUBST(MCA_BUILD_OP_AVX_FLAGS)
257+
258+
OPAL_VAR_SCOPE_POP
259+
# Enable this component iff we have at least the most basic form of support
260+
# for vectorial ISA
261+
AS_IF([test $op_avx_support -eq 1 || test $op_avx2_support -eq 1 || test $op_avx512_support -eq 1],
262+
[$1],
263+
[$2])
264+
265+
])dnl

0 commit comments

Comments
 (0)