@@ -44,7 +44,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
44
44
#
45
45
# Check for AVX512 support
46
46
#
47
- AC_CACHE_CHECK([if we are checking for AVX512 support], op_avx_check_avx512, AS_VAR_SET(op_avx_check_avx512, yes))
47
+ AC_CACHE_CHECK([for AVX512 support], op_avx_check_avx512, AS_VAR_SET(op_avx_check_avx512, yes))
48
48
AS_IF([test " $op_avx_check_avx512 " = " yes" ],
49
49
[AC_MSG_CHECKING([for AVX512 support (no additional flags)])
50
50
AC_LINK_IFELSE(
@@ -115,14 +115,14 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
115
115
#
116
116
# Check support for AVX2
117
117
#
118
- AC_CACHE_CHECK([if we are checking for AVX2 support], op_avx_check_avx2, AS_VAR_SET(op_avx_check_avx2, yes))
118
+ AC_CACHE_CHECK([for AVX2 support], op_avx_check_avx2, AS_VAR_SET(op_avx_check_avx2, yes))
119
119
AS_IF([test " $op_avx_check_avx2 " = " yes" ],
120
120
[AC_MSG_CHECKING([for AVX2 support (no additional flags)])
121
121
AC_LINK_IFELSE(
122
122
[AC_LANG_PROGRAM([[# include <immintrin.h>]],
123
123
[[
124
- __m256 vA, vB;
125
- _mm256_add_ps (vA, vB)
124
+ __m256i vA, vB, vC ;
125
+ vC = _mm256_and_si256 (vA, vB)
126
126
]])],
127
127
[op_avx2_support= 1
128
128
AC_MSG_RESULT([yes])],
@@ -134,8 +134,8 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
134
134
AC_LINK_IFELSE(
135
135
[AC_LANG_PROGRAM([[# include <immintrin.h>]],
136
136
[[
137
- __m256 vA, vB;
138
- _mm256_add_ps (vA, vB)
137
+ __m256i vA, vB, vC ;
138
+ vC = _mm256_and_si256 (vA, vB)
139
139
]])],
140
140
[op_avx2_support= 1
141
141
MCA_BUILD_OP_AVX2_FLAGS= " -mavx2"
@@ -164,29 +164,29 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
164
164
CFLAGS="$op_avx_cflags_save "
165
165
])])
166
166
#
167
- # What about early AVX support. The rest of the logic is slightly different as
167
+ # What about early AVX support? The rest of the logic is slightly different as
168
168
# we need to include some of the SSE4 .1 and SSE3 instructions. So, we first check
169
169
# if we can compile AVX code without a flag, then we validate that we have support
170
170
# for the SSE4 .1 and SSE3 instructions we need. If not, we check for the usage of
171
171
# the AVX flag, and then recheck if we have support for the SSE4 .1 and SSE3
172
172
# instructions.
173
173
#
174
- AC_CACHE_CHECK([if we are checking for AVX support], op_avx_check_avx, AS_VAR_SET(op_avx_check_avx, yes))
174
+ AC_CACHE_CHECK([for AVX support], op_avx_check_avx, AS_VAR_SET(op_avx_check_avx, yes))
175
175
AS_IF([test " $op_avx_check_avx " = " yes" ],
176
176
[AC_MSG_CHECKING([for AVX support (no additional flags)])
177
177
AC_LINK_IFELSE(
178
178
[AC_LANG_PROGRAM([[# include <immintrin.h>]],
179
179
[[
180
- __m128 vA, vB;
181
- _mm_add_ps (vA, vB)
180
+ __m256 vA, vB, vC ;
181
+ vC = _mm256_add_ps (vA, vB)
182
182
]])],
183
183
[op_avx_support= 1
184
184
AC_MSG_RESULT([yes])],
185
185
[AC_MSG_RESULT([no])])])
186
186
#
187
187
# Check for SSE4.1 support
188
188
#
189
- AC_CACHE_CHECK([if we are checking for SSE4.1 support], op_avx_check_sse41, AS_VAR_SET(op_avx_check_sse41, yes))
189
+ AC_CACHE_CHECK([for SSE4.1 support], op_avx_check_sse41, AS_VAR_SET(op_avx_check_sse41, yes))
190
190
AS_IF([test $op_avx_support -eq 1 && test " $op_avx_check_sse41 " = " yes" ],
191
191
[AC_MSG_CHECKING([for SSE4.1 support])
192
192
AC_LINK_IFELSE(
@@ -202,7 +202,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
202
202
#
203
203
# Check for SSE3 support
204
204
#
205
- AC_CACHE_CHECK([if we are checking for SSE3 support], op_avx_check_sse3, AS_VAR_SET(op_avx_check_sse3, yes))
205
+ AC_CACHE_CHECK([for SSE3 support], op_avx_check_sse3, AS_VAR_SET(op_avx_check_sse3, yes))
206
206
AS_IF([test $op_avx_support -eq 1 && test " $op_avx_check_sse3 " = " yes" ],
207
207
[AC_MSG_CHECKING([for SSE3 support])
208
208
AC_LINK_IFELSE(
@@ -224,8 +224,8 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
224
224
AC_LINK_IFELSE(
225
225
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
226
226
[[
227
- __m 128 vA, vB;
228
- _mm_add_ps (vA, vB)
227
+ __m 256 vA, vB, vC ;
228
+ vC = _mm 256 _add_ps (vA, vB)
229
229
]])],
230
230
[op_avx_support=1
231
231
MCA_BUILD_OP_AVX_FLAGS="-mavx"
0 commit comments