Skip to content

Commit c3b2104

Browse files
committed
Improve AVX* detection.
Check for all the necessary capabilities before allowing the build system to generate the AVX512 code. Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
1 parent f881440 commit c3b2104

File tree

2 files changed

+33
-3
lines changed

2 files changed

+33
-3
lines changed

ompi/mca/op/avx/configure.m4

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,13 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
3838

3939
OPAL_VAR_SCOPE_PUSH([op_avx_cflags_save])
4040

41-
AS_IF([test "$opal_cv_asm_arch" = "X86_64"],
41+
case "${host}" in
42+
x86_64-*x32|i?86-*|x86_64*|amd64*)
43+
check_avx="yes";;
44+
*)
45+
check_avx="no";;
46+
esac
47+
AS_IF([test "$check_avx" = "yes"],
4248
[AC_LANG_PUSH([C])
4349

4450
#
@@ -123,6 +129,28 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
123129
MCA_BUILD_OP_AVX512_FLAGS=""
124130
AC_MSG_RESULT([no])])
125131
CFLAGS="$op_avx_cflags_save"
132+
])
133+
#
134+
# Check for combination of AVX512F + AVX512VL
135+
#
136+
AS_IF([test $op_avx512_support -eq 1],
137+
[AC_MSG_CHECKING([if _mm_max_epi64 generates code that can be compiled])
138+
op_avx_cflags_save="$CFLAGS"
139+
CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX512_FLAGS"
140+
AC_LINK_IFELSE(
141+
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
142+
[[
143+
#if !defined(__AVX512F__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
144+
#error "icc needs the -m flags to provide the AVX* detection macros"
145+
#endif
146+
__m128i vA, vB;
147+
_mm_max_epi64(vA, vB)
148+
]])],
149+
[AC_MSG_RESULT([yes])],
150+
[op_avx512_support=0
151+
MCA_BUILD_OP_AVX512_FLAGS=""
152+
AC_MSG_RESULT([no])])
153+
CFLAGS="$op_avx_cflags_save"
126154
])])
127155
#
128156
# Check support for AVX2

ompi/mca/op/avx/op_avx_functions.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,18 @@
3232
* to a lesser support (AVX512 -> AVX2, AVX2 -> AVX, AVX -> error out).
3333
*/
3434
#if defined(GENERATE_AVX512_CODE)
35+
# define PREPEND _avx512
3536
# if defined(__AVX512BW__) && defined(__AVX512F__) && defined(__AVX512VL__)
36-
# define PREPEND _avx512
37+
/* all good */
3738
# else
3839
# undef GENERATE_AVX512_CODE
3940
# endif /* defined(__AVX512BW__) && defined(__AVX512F__) && defined(__AVX512VL__) */
4041
#endif /* defined(GENERATE_AVX512_CODE) */
4142

4243
#if !defined(PREPEND) && defined(GENERATE_AVX2_CODE)
44+
# define PREPEND _avx2
4345
# if defined(__AVX2__)
44-
# define PREPEND _avx2
46+
/* all good */
4547
# else
4648
# undef GENERATE_AVX2_CODE
4749
# endif /* defined(__AVX2__) */

0 commit comments

Comments
 (0)