Skip to content

Commit fcf2766

Browse files
committed
AVX code generation improvements
1. Allow fallback to a lesser AVX support during make Due to the fact that some distro restrict the compiule architecture during make (while not setting any restrictions during configure) we need to detect the target architecture also during make in order to restrict the code we generate. 2. Add comments and better protect the arch specific code. Identify all the vectorial functions used and clasify them according to the neccesary hardware capabilities. Use these requirements to protect the code for load and stores (the rest of the code being automatically generated it is more difficult to protect). 3. Correctly check for AVX* support. Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
1 parent 31068e0 commit fcf2766

File tree

2 files changed

+288
-62
lines changed

2 files changed

+288
-62
lines changed

ompi/mca/op/avx/configure.m4

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
4444
#
4545
# Check for AVX512 support
4646
#
47-
AC_CACHE_CHECK([if we are checking for AVX512 support], op_avx_check_avx512, AS_VAR_SET(op_avx_check_avx512, yes))
47+
AC_CACHE_CHECK([for AVX512 support], op_avx_check_avx512, AS_VAR_SET(op_avx_check_avx512, yes))
4848
AS_IF([test "$op_avx_check_avx512" = "yes"],
4949
[AC_MSG_CHECKING([for AVX512 support (no additional flags)])
5050
AC_LINK_IFELSE(
@@ -115,14 +115,14 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
115115
#
116116
# Check support for AVX2
117117
#
118-
AC_CACHE_CHECK([if we are checking for AVX2 support], op_avx_check_avx2, AS_VAR_SET(op_avx_check_avx2, yes))
118+
AC_CACHE_CHECK([for AVX2 support], op_avx_check_avx2, AS_VAR_SET(op_avx_check_avx2, yes))
119119
AS_IF([test "$op_avx_check_avx2" = "yes"],
120120
[AC_MSG_CHECKING([for AVX2 support (no additional flags)])
121121
AC_LINK_IFELSE(
122122
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
123123
[[
124-
__m256 vA, vB;
125-
_mm256_add_ps(vA, vB)
124+
__m256i vA, vB, vC;
125+
vC = _mm256_and_si256(vA, vB)
126126
]])],
127127
[op_avx2_support=1
128128
AC_MSG_RESULT([yes])],
@@ -134,8 +134,8 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
134134
AC_LINK_IFELSE(
135135
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
136136
[[
137-
__m256 vA, vB;
138-
_mm256_add_ps(vA, vB)
137+
__m256i vA, vB, vC;
138+
vC = _mm256_and_si256(vA, vB)
139139
]])],
140140
[op_avx2_support=1
141141
MCA_BUILD_OP_AVX2_FLAGS="-mavx2"
@@ -164,29 +164,29 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
164164
CFLAGS="$op_avx_cflags_save"
165165
])])
166166
#
167-
# What about early AVX support. The rest of the logic is slightly different as
167+
# What about early AVX support? The rest of the logic is slightly different as
168168
# we need to include some of the SSE4.1 and SSE3 instructions. So, we first check
169169
# if we can compile AVX code without a flag, then we validate that we have support
170170
# for the SSE4.1 and SSE3 instructions we need. If not, we check for the usage of
171171
# the AVX flag, and then recheck if we have support for the SSE4.1 and SSE3
172172
# instructions.
173173
#
174-
AC_CACHE_CHECK([if we are checking for AVX support], op_avx_check_avx, AS_VAR_SET(op_avx_check_avx, yes))
174+
AC_CACHE_CHECK([for AVX support], op_avx_check_avx, AS_VAR_SET(op_avx_check_avx, yes))
175175
AS_IF([test "$op_avx_check_avx" = "yes"],
176176
[AC_MSG_CHECKING([for AVX support (no additional flags)])
177177
AC_LINK_IFELSE(
178178
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
179179
[[
180-
__m128 vA, vB;
181-
_mm_add_ps(vA, vB)
180+
__m256 vA, vB, vC;
181+
vC = _mm256_add_ps(vA, vB)
182182
]])],
183183
[op_avx_support=1
184184
AC_MSG_RESULT([yes])],
185185
[AC_MSG_RESULT([no])])])
186186
#
187187
# Check for SSE4.1 support
188188
#
189-
AC_CACHE_CHECK([if we are checking for SSE4.1 support], op_avx_check_sse41, AS_VAR_SET(op_avx_check_sse41, yes))
189+
AC_CACHE_CHECK([for SSE4.1 support], op_avx_check_sse41, AS_VAR_SET(op_avx_check_sse41, yes))
190190
AS_IF([test $op_avx_support -eq 1 && test "$op_avx_check_sse41" = "yes"],
191191
[AC_MSG_CHECKING([for SSE4.1 support])
192192
AC_LINK_IFELSE(
@@ -202,7 +202,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
202202
#
203203
# Check for SSE3 support
204204
#
205-
AC_CACHE_CHECK([if we are checking for SSE3 support], op_avx_check_sse3, AS_VAR_SET(op_avx_check_sse3, yes))
205+
AC_CACHE_CHECK([for SSE3 support], op_avx_check_sse3, AS_VAR_SET(op_avx_check_sse3, yes))
206206
AS_IF([test $op_avx_support -eq 1 && test "$op_avx_check_sse3" = "yes"],
207207
[AC_MSG_CHECKING([for SSE3 support])
208208
AC_LINK_IFELSE(
@@ -224,8 +224,8 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
224224
AC_LINK_IFELSE(
225225
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
226226
[[
227-
__m128 vA, vB;
228-
_mm_add_ps(vA, vB)
227+
__m256 vA, vB, vC;
228+
vC = _mm256_add_ps(vA, vB)
229229
]])],
230230
[op_avx_support=1
231231
MCA_BUILD_OP_AVX_FLAGS="-mavx"

0 commit comments

Comments
 (0)